aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJuergen Ributzka <juergen@apple.com>2013-11-13 01:57:54 +0000
committerJuergen Ributzka <juergen@apple.com>2013-11-13 01:57:54 +0000
commitc7e77f91fecd662b198939a9a8ee0a0cc3828fc4 (patch)
tree10b8c447404b770e3ab436452f5fedafebedc151
parentcfe36cb02a4c2d99fd2ea0892e7b2668f2df2b8b (diff)
downloadexternal_llvm-c7e77f91fecd662b198939a9a8ee0a0cc3828fc4.zip
external_llvm-c7e77f91fecd662b198939a9a8ee0a0cc3828fc4.tar.gz
external_llvm-c7e77f91fecd662b198939a9a8ee0a0cc3828fc4.tar.bz2
SelectionDAG: Teach the legalizer to split SETCC if VSELECT needs splitting too.
This patch reapplies r193676 with an additional fix for the Hexagon backend. The SystemZ backend has already been fixed by r194148. The Type Legalizer recognizes that VSELECT needs to be split, because the type is to wide for the given target. The same does not always apply to SETCC, because less space is required to encode the result of a comparison. As a result VSELECT is split and SETCC is unrolled into scalar comparisons. This commit fixes the issue by checking for VSELECT-SETCC patterns in the DAG Combiner. If a matching pattern is found, then the result mask of SETCC is promoted to the expected vector mask type for the given target. Now the type legalizer will split both VSELECT and SETCC. This allows the following X86 DAG Combine code to sucessfully detect the MIN/MAX pattern. This fixes PR16695, PR17002, and <rdar://problem/14594431>. Reviewed by Nadav git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194542 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp23
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp21
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.h7
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp11
-rw-r--r--test/CodeGen/X86/vec_split.ll42
5 files changed, 93 insertions, 11 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 7343236..90cd1d3 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4364,6 +4364,29 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
}
}
+ // Treat SETCC as a vector mask and promote the result type based on the
+ // targets expected SETCC result type. This will ensure that SETCC and VSELECT
+ // are both split by the type legalizer. This is done to prevent the type
+ // legalizer from unrolling SETCC into scalar comparions.
+ EVT SelectVT = N->getValueType(0);
+ EVT MaskVT = getSetCCResultType(SelectVT);
+ assert(MaskVT.isVector() && "Expected a vector type.");
+ if (N0.getOpcode() == ISD::SETCC && N0.getValueType() != MaskVT) {
+ SDLoc MaskDL(N0);
+
+ // Extend the mask to the desired value type.
+ ISD::NodeType ExtendCode =
+ TargetLowering::getExtendForContent(TLI.getBooleanContents(true));
+ SDValue Mask = DAG.getNode(ExtendCode, MaskDL, MaskVT, N0);
+
+ AddToWorkList(Mask.getNode());
+
+ SDValue LHS = N->getOperand(1);
+ SDValue RHS = N->getOperand(2);
+
+ return DAG.getNode(ISD::VSELECT, DL, SelectVT, Mask, LHS, RHS);
+ }
+
return SDValue();
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 7b1d14d..f1b06fc 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -492,14 +492,19 @@ void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo,
SDValue Cond = N->getOperand(0);
CL = CH = Cond;
if (Cond.getValueType().isVector()) {
- assert(Cond.getValueType().getVectorElementType() == MVT::i1 &&
- "Condition legalized before result?");
- unsigned NumElements = Cond.getValueType().getVectorNumElements();
- EVT VCondTy = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElements / 2);
- CL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond,
- DAG.getConstant(0, TLI.getVectorIdxTy()));
- CH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond,
- DAG.getConstant(NumElements / 2, TLI.getVectorIdxTy()));
+ if (Cond.getOpcode() == ISD::SETCC) {
+ assert(Cond.getValueType() == getSetCCResultType(N->getValueType(0)) &&
+ "Condition has not been prepared for split!");
+ GetSplitVector(Cond, CL, CH);
+ } else {
+ EVT ETy = Cond.getValueType().getVectorElementType();
+ unsigned NumElements = Cond.getValueType().getVectorNumElements();
+ EVT VCondTy = EVT::getVectorVT(*DAG.getContext(), ETy, NumElements / 2);
+ CL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond,
+ DAG.getConstant(0, TLI.getVectorIdxTy()));
+ CH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond,
+ DAG.getConstant(NumElements / 2, TLI.getVectorIdxTy()));
+ }
}
Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), CL, LL, RL);
diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h
index 4fe0107..73da226 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/lib/Target/Hexagon/HexagonISelLowering.h
@@ -141,8 +141,11 @@ namespace llvm {
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
- virtual EVT getSetCCResultType(LLVMContext &, EVT) const {
- return MVT::i1;
+ virtual EVT getSetCCResultType(LLVMContext &C, EVT VT) const {
+ if (!VT.isVector())
+ return MVT::i1;
+ else
+ return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements());
}
virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op,
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index a8743af..6df0fd8 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1547,7 +1547,16 @@ void X86TargetLowering::resetOperationActions() {
}
EVT X86TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
- if (!VT.isVector()) return MVT::i8;
+ if (!VT.isVector())
+ return MVT::i8;
+
+ const TargetMachine &TM = getTargetMachine();
+ if (!TM.Options.UseSoftFloat && Subtarget->hasAVX512())
+ switch(VT.getVectorNumElements()) {
+ case 8: return MVT::v8i1;
+ case 16: return MVT::v16i1;
+ }
+
return VT.changeVectorElementTypeToInteger();
}
diff --git a/test/CodeGen/X86/vec_split.ll b/test/CodeGen/X86/vec_split.ll
new file mode 100644
index 0000000..f9e7c20
--- /dev/null
+++ b/test/CodeGen/X86/vec_split.ll
@@ -0,0 +1,42 @@
+; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s -check-prefix=SSE4
+; RUN: llc -march=x86-64 -mcpu=corei7-avx < %s | FileCheck %s -check-prefix=AVX1
+; RUN: llc -march=x86-64 -mcpu=core-avx2 < %s | FileCheck %s -check-prefix=AVX2
+
+define <16 x i16> @split16(<16 x i16> %a, <16 x i16> %b, <16 x i8> %__mask) {
+; SSE4-LABEL: split16:
+; SSE4: pminuw
+; SSE4: pminuw
+; SSE4: ret
+; AVX1-LABEL: split16:
+; AVX1: vpminuw
+; AVX1: vpminuw
+; AVX1: ret
+; AVX2-LABEL: split16:
+; AVX2: vpminuw
+; AVX2: ret
+ %1 = icmp ult <16 x i16> %a, %b
+ %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
+ ret <16 x i16> %2
+}
+
+define <32 x i16> @split32(<32 x i16> %a, <32 x i16> %b, <32 x i8> %__mask) {
+; SSE4-LABEL: split32:
+; SSE4: pminuw
+; SSE4: pminuw
+; SSE4: pminuw
+; SSE4: pminuw
+; SSE4: ret
+; AVX1-LABEL: split32:
+; AVX1: vpminuw
+; AVX1: vpminuw
+; AVX1: vpminuw
+; AVX1: vpminuw
+; AVX1: ret
+; AVX2-LABEL: split32:
+; AVX2: vpminuw
+; AVX2: vpminuw
+; AVX2: ret
+ %1 = icmp ult <32 x i16> %a, %b
+ %2 = select <32 x i1> %1, <32 x i16> %a, <32 x i16> %b
+ ret <32 x i16> %2
+}