aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp13
-rw-r--r--test/CodeGen/X86/v4i32load-crash.ll27
2 files changed, 37 insertions, 3 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index d0667ff..76eeb64 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -5395,7 +5395,8 @@ LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, SDLoc dl, SelectionDAG &DAG) {
/// rather than undef via VZEXT_LOAD, but we do not detect that case today.
/// There's even a handy isZeroNode for that purpose.
static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
- SDLoc &DL, SelectionDAG &DAG) {
+ SDLoc &DL, SelectionDAG &DAG,
+ bool isAfterLegalize) {
EVT EltVT = VT.getVectorElementType();
unsigned NumElems = Elts.size();
@@ -5431,7 +5432,13 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
// load of the entire vector width starting at the base pointer. If we found
// consecutive loads for the low half, generate a vzext_load node.
if (LastLoadedElt == NumElems - 1) {
+
+ if (isAfterLegalize &&
+ !DAG.getTargetLoweringInfo().isOperationLegal(ISD::LOAD, VT))
+ return SDValue();
+
SDValue NewLd = SDValue();
+
if (DAG.InferPtrAlignment(LDBase->getBasePtr()) >= 16)
NewLd = DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
LDBase->getPointerInfo(),
@@ -6075,7 +6082,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
V[i] = Op.getOperand(i);
// Check for elements which are consecutive loads.
- SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG);
+ SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG, false);
if (LD.getNode())
return LD;
@@ -16263,7 +16270,7 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
Elts.push_back(getShuffleScalarElt(N, i, DAG, 0));
- return EltsFromConsecutiveLoads(VT, Elts, dl, DAG);
+ return EltsFromConsecutiveLoads(VT, Elts, dl, DAG, true);
}
/// PerformTruncateCombine - Converts truncate operation to
diff --git a/test/CodeGen/X86/v4i32load-crash.ll b/test/CodeGen/X86/v4i32load-crash.ll
new file mode 100644
index 0000000..052c4c3
--- /dev/null
+++ b/test/CodeGen/X86/v4i32load-crash.ll
@@ -0,0 +1,27 @@
+; RUN: llc --mcpu=x86-64 --mattr=ssse3 < %s
+
+;PR18045:
+;Issue of selection for 'v4i32 load'.
+;This instruction is not legal for X86 CPUs with sse < 'sse4.1'.
+;This node was generated by X86ISelLowering.cpp, EltsFromConsecutiveLoads
+;static function after legilize stage.
+
+@e = external global [4 x i32], align 4
+@f = external global [4 x i32], align 4
+
+; Function Attrs: nounwind
+define void @fn3(i32 %el) {
+entry:
+ %0 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i32 0)
+ %1 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i32 1)
+ %2 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i32 2)
+ %3 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i32 3)
+ %4 = insertelement <4 x i32> undef, i32 %0, i32 0
+ %5 = insertelement <4 x i32> %4, i32 %1, i32 1
+ %6 = insertelement <4 x i32> %5, i32 %2, i32 2
+ %7 = insertelement <4 x i32> %6, i32 %3, i32 3
+ %8 = add <4 x i32> %6, %7
+ store <4 x i32> %8, <4 x i32>* bitcast ([4 x i32]* @f to <4 x i32>*)
+ ret void
+}
+