diff options
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 13 | ||||
-rw-r--r-- | test/CodeGen/X86/v4i32load-crash.ll | 27 |
2 files changed, 37 insertions, 3 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index d0667ff..76eeb64 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5395,7 +5395,8 @@ LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, SDLoc dl, SelectionDAG &DAG) { /// rather than undef via VZEXT_LOAD, but we do not detect that case today. /// There's even a handy isZeroNode for that purpose. static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts, - SDLoc &DL, SelectionDAG &DAG) { + SDLoc &DL, SelectionDAG &DAG, + bool isAfterLegalize) { EVT EltVT = VT.getVectorElementType(); unsigned NumElems = Elts.size(); @@ -5431,7 +5432,13 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts, // load of the entire vector width starting at the base pointer. If we found // consecutive loads for the low half, generate a vzext_load node. if (LastLoadedElt == NumElems - 1) { + + if (isAfterLegalize && + !DAG.getTargetLoweringInfo().isOperationLegal(ISD::LOAD, VT)) + return SDValue(); + SDValue NewLd = SDValue(); + if (DAG.InferPtrAlignment(LDBase->getBasePtr()) >= 16) NewLd = DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(), LDBase->getPointerInfo(), @@ -6075,7 +6082,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { V[i] = Op.getOperand(i); // Check for elements which are consecutive loads. - SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG); + SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG, false); if (LD.getNode()) return LD; @@ -16263,7 +16270,7 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) Elts.push_back(getShuffleScalarElt(N, i, DAG, 0)); - return EltsFromConsecutiveLoads(VT, Elts, dl, DAG); + return EltsFromConsecutiveLoads(VT, Elts, dl, DAG, true); } /// PerformTruncateCombine - Converts truncate operation to diff --git a/test/CodeGen/X86/v4i32load-crash.ll b/test/CodeGen/X86/v4i32load-crash.ll new file mode 100644 index 0000000..052c4c3 --- /dev/null +++ b/test/CodeGen/X86/v4i32load-crash.ll @@ -0,0 +1,27 @@ +; RUN: llc --mcpu=x86-64 --mattr=ssse3 < %s + +;PR18045: +;Issue of selection for 'v4i32 load'. +;This instruction is not legal for X86 CPUs with sse < 'sse4.1'. +;This node was generated by X86ISelLowering.cpp, EltsFromConsecutiveLoads +;static function after legilize stage. + +@e = external global [4 x i32], align 4 +@f = external global [4 x i32], align 4 + +; Function Attrs: nounwind +define void @fn3(i32 %el) { +entry: + %0 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i32 0) + %1 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i32 1) + %2 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i32 2) + %3 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i32 3) + %4 = insertelement <4 x i32> undef, i32 %0, i32 0 + %5 = insertelement <4 x i32> %4, i32 %1, i32 1 + %6 = insertelement <4 x i32> %5, i32 %2, i32 2 + %7 = insertelement <4 x i32> %6, i32 %3, i32 3 + %8 = add <4 x i32> %6, %7 + store <4 x i32> %8, <4 x i32>* bitcast ([4 x i32]* @f to <4 x i32>*) + ret void +} + |