diff options
author | Daniel Sanders <daniel.sanders@imgtec.com> | 2013-11-25 15:53:39 +0000 |
---|---|---|
committer | Daniel Sanders <daniel.sanders@imgtec.com> | 2013-11-25 15:53:39 +0000 |
commit | 975e958dbc12ba412385b721100fc1c830b0e3ab (patch) | |
tree | f4970cf0f88acd5b21f574a0f11810c867a487e8 | |
parent | f6312d4b2d7effce08db0ee07e3946bc3c7cb9aa (diff) | |
download | external_llvm-975e958dbc12ba412385b721100fc1c830b0e3ab.zip external_llvm-975e958dbc12ba412385b721100fc1c830b0e3ab.tar.gz external_llvm-975e958dbc12ba412385b721100fc1c830b0e3ab.tar.bz2 |
Merging r195635:
------------------------------------------------------------------------
r195635 | dsanders | 2013-11-25 11:14:43 +0000 (Mon, 25 Nov 2013) | 19 lines
Fixed tryFoldToZero() for vector types that need expansion.
Summary:
Moved the requirement for SelectionDAG::getConstant() to return legally
typed nodes slightly earlier. There were two optional DAGCombine passes
that were missed out and were required to produce type-legal DAGs.
Simplified a code-path in tryFoldToZero() to use SelectionDAG::getConstant().
This provides support for both promoted and expanded vector types whereas the
previous code only supported promoted vector types.
Fixes a "Type for zero vector elements is not legal" assertion detected by
an llvm-stress generated test.
Reviewers: resistor
CC: llvm-commits
Differential Revision: http://llvm-reviews.chandlerc.com/D2251
------------------------------------------------------------------------
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_34@195651 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 15 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 4 | ||||
-rw-r--r-- | test/CodeGen/Mips/msa/llvm-stress-s3926023935.ll | 143 |
3 files changed, 147 insertions, 15 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index e0732fa..805cc9e 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1635,19 +1635,8 @@ static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT, bool LegalOperations, bool LegalTypes) { if (!VT.isVector()) return DAG.getConstant(0, VT); - if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) { - // Produce a vector of zeros. - EVT ElemTy = VT.getVectorElementType(); - if (LegalTypes && TLI.getTypeAction(*DAG.getContext(), ElemTy) == - TargetLowering::TypePromoteInteger) - ElemTy = TLI.getTypeToTransformTo(*DAG.getContext(), ElemTy); - assert((!LegalTypes || TLI.isTypeLegal(ElemTy)) && - "Type for zero vector elements is not legal"); - SDValue El = DAG.getConstant(0, ElemTy); - std::vector<SDValue> Ops(VT.getVectorNumElements(), El); - return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, - &Ops[0], Ops.size()); - } + if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) + return DAG.getConstant(0, VT); return SDValue(); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 50d6402..3a0cfa1 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -670,6 +670,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { DEBUG(dbgs() << "Type-legalized selection DAG: BB#" << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); + CurDAG->NewNodesMustHaveLegalTypes = true; + if (Changed) { if (ViewDAGCombineLT) CurDAG->viewGraph("dag-combine-lt input for " + BlockName); @@ -711,8 +713,6 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); } - CurDAG->NewNodesMustHaveLegalTypes = true; - if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName); { diff --git a/test/CodeGen/Mips/msa/llvm-stress-s3926023935.ll b/test/CodeGen/Mips/msa/llvm-stress-s3926023935.ll new file mode 100644 index 0000000..96547d9 --- /dev/null +++ b/test/CodeGen/Mips/msa/llvm-stress-s3926023935.ll @@ -0,0 +1,143 @@ +; RUN: llc -march=mips < %s +; RUN: llc -march=mips -mattr=+msa,+fp64 < %s +; RUN: llc -march=mipsel < %s +; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s + +; This test originally failed for MSA with a +; "Type for zero vector elements is not legal" assertion. +; It should at least successfully build. + +define void @autogen_SD3926023935(i8*, i32*, i64*, i32, i64, i8) { +BB: + %A4 = alloca i1 + %A3 = alloca float + %A2 = alloca double + %A1 = alloca float + %A = alloca double + %L = load i8* %0 + store i8 -123, i8* %0 + %E = extractelement <4 x i64> zeroinitializer, i32 1 + %Shuff = shufflevector <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 2, i32 4, i32 6> + %I = insertelement <2 x i1> zeroinitializer, i1 false, i32 0 + %BC = bitcast i64 181325 to double + %Sl = select i1 false, <2 x i32> zeroinitializer, <2 x i32> zeroinitializer + %Cmp = icmp ne <4 x i64> zeroinitializer, zeroinitializer + %L5 = load i8* %0 + store i8 %L, i8* %0 + %E6 = extractelement <4 x i64> zeroinitializer, i32 3 + %Shuff7 = shufflevector <2 x i16> zeroinitializer, <2 x i16> zeroinitializer, <2 x i32> <i32 2, i32 0> + %I8 = insertelement <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, i64 498254, i32 4 + %B = shl i32 0, 364464 + %Sl9 = select i1 false, i64 %E, i64 498254 + %Cmp10 = icmp sge i8 -123, %5 + br label %CF80 + +CF80: ; preds = %BB + %L11 = load i8* %0 + store i8 -123, i8* %0 + %E12 = extractelement <2 x i16> zeroinitializer, i32 1 + %Shuff13 = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 2, i32 4, i32 6> + %I14 = insertelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 %B, i32 2 + %B15 = sdiv i64 334618, -1 + %PC = bitcast i1* %A4 to i64* + %Sl16 = select i1 %Cmp10, <4 x i32> zeroinitializer, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1> + %Cmp17 = icmp ule <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %Sl16 + %L18 = load double* %A2 + store i64 498254, i64* %PC + %E19 = extractelement <4 x i64> zeroinitializer, i32 0 + %Shuff20 = shufflevector <2 x i1> zeroinitializer, <2 x i1> %I, <2 x i32> <i32 3, i32 1> + %I21 = insertelement <2 x i1> zeroinitializer, i1 false, i32 1 + %B22 = fadd double 0.000000e+00, %BC + %ZE = zext <2 x i1> %Shuff20 to <2 x i32> + %Sl23 = select i1 %Cmp10, <2 x i1> %Shuff20, <2 x i1> zeroinitializer + %Cmp24 = icmp ult <2 x i32> zeroinitializer, zeroinitializer + %L25 = load i8* %0 + store i8 %L25, i8* %0 + %E26 = extractelement <4 x i8> <i8 -1, i8 -1, i8 -1, i8 -1>, i32 3 + %Shuff27 = shufflevector <4 x i32> %Shuff, <4 x i32> %I14, <4 x i32> <i32 6, i32 0, i32 undef, i32 4> + %I28 = insertelement <4 x i32> zeroinitializer, i32 %3, i32 0 + %B29 = lshr i8 %E26, -43 + %Tr = trunc i8 %L5 to i1 + br label %CF79 + +CF79: ; preds = %CF80 + %Sl30 = select i1 false, i8 %B29, i8 -123 + %Cmp31 = icmp sge <2 x i1> %I, %I + %L32 = load i64* %PC + store i8 -123, i8* %0 + %E33 = extractelement <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, i32 2 + %Shuff34 = shufflevector <4 x i64> zeroinitializer, <4 x i64> %Shuff13, <4 x i32> <i32 5, i32 7, i32 1, i32 3> + %I35 = insertelement <4 x i64> zeroinitializer, i64 498254, i32 3 + %B36 = sub <8 x i64> %I8, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1> + %PC37 = bitcast i8* %0 to i1* + %Sl38 = select i1 %Cmp10, i8 -43, i8 %L5 + %Cmp39 = icmp eq i64 498254, %B15 + br label %CF + +CF: ; preds = %CF, %CF79 + %L40 = load double* %A + store i1 %Cmp39, i1* %PC37 + %E41 = extractelement <4 x i64> zeroinitializer, i32 3 + %Shuff42 = shufflevector <2 x i32> zeroinitializer, <2 x i32> %ZE, <2 x i32> <i32 2, i32 undef> + %I43 = insertelement <4 x i32> %Shuff, i32 %3, i32 0 + %B44 = shl i64 %E41, -1 + %Se = sext <2 x i1> %I to <2 x i32> + %Sl45 = select i1 %Cmp10, i1 false, i1 false + br i1 %Sl45, label %CF, label %CF77 + +CF77: ; preds = %CF77, %CF + %Cmp46 = fcmp uno double 0.000000e+00, 0.000000e+00 + br i1 %Cmp46, label %CF77, label %CF78 + +CF78: ; preds = %CF78, %CF83, %CF82, %CF77 + %L47 = load i64* %PC + store i8 -123, i8* %0 + %E48 = extractelement <4 x i64> zeroinitializer, i32 3 + %Shuff49 = shufflevector <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 6, i32 undef> + %I50 = insertelement <2 x i1> zeroinitializer, i1 %Cmp10, i32 0 + %B51 = sdiv i64 %E19, 463132 + %Tr52 = trunc i64 %E48 to i32 + %Sl53 = select i1 %Tr, i1 %Cmp46, i1 %Cmp10 + br i1 %Sl53, label %CF78, label %CF83 + +CF83: ; preds = %CF78 + %Cmp54 = fcmp uge double %L40, %L40 + br i1 %Cmp54, label %CF78, label %CF82 + +CF82: ; preds = %CF83 + %L55 = load i64* %PC + store i64 %L32, i64* %PC + %E56 = extractelement <2 x i16> %Shuff7, i32 1 + %Shuff57 = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 6, i32 0> + %I58 = insertelement <2 x i32> %Sl, i32 %Tr52, i32 0 + %B59 = or i32 %B, %3 + %FC = sitofp i64 498254 to double + %Sl60 = select i1 false, i64 %E6, i64 -1 + %Cmp61 = icmp sgt <4 x i32> %Shuff27, %I43 + %L62 = load i64* %PC + store i64 %Sl9, i64* %PC + %E63 = extractelement <2 x i32> %ZE, i32 0 + %Shuff64 = shufflevector <4 x i64> zeroinitializer, <4 x i64> %Shuff13, <4 x i32> <i32 1, i32 3, i32 undef, i32 7> + %I65 = insertelement <4 x i32> %Shuff, i32 %3, i32 3 + %B66 = sub i64 %L47, 53612 + %Tr67 = trunc i64 %4 to i32 + %Sl68 = select i1 %Cmp39, i1 %Cmp39, i1 false + br i1 %Sl68, label %CF78, label %CF81 + +CF81: ; preds = %CF82 + %Cmp69 = icmp ne <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, %B36 + %L70 = load i8* %0 + store i64 %L55, i64* %PC + %E71 = extractelement <4 x i32> %Shuff49, i32 1 + %Shuff72 = shufflevector <4 x i64> zeroinitializer, <4 x i64> %Shuff34, <4 x i32> <i32 0, i32 2, i32 4, i32 6> + %I73 = insertelement <4 x i64> %Shuff64, i64 %E, i32 2 + %B74 = lshr <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, %B36 + %Sl75 = select i1 %Sl68, i64 %B51, i64 %L55 + %Cmp76 = icmp sgt <8 x i64> %B74, %B36 + store i1 %Cmp39, i1* %PC37 + store i64 %E41, i64* %PC + store i64 %L32, i64* %PC + store i64 %Sl75, i64* %2 + store i64 %L32, i64* %PC + ret void +} |