diff options
author | Daniel Sanders <daniel.sanders@imgtec.com> | 2013-11-21 15:03:54 +0000 |
---|---|---|
committer | Daniel Sanders <daniel.sanders@imgtec.com> | 2013-11-21 15:03:54 +0000 |
commit | 4e2d2f091e88dd83d1685173d2c0692d8ae155e6 (patch) | |
tree | 9e93aac3ffb4e24e7d1657fff20f394c63cfb995 | |
parent | 9eaade8b56b124150d189e1786b14306cdaf5641 (diff) | |
download | external_llvm-4e2d2f091e88dd83d1685173d2c0692d8ae155e6.zip external_llvm-4e2d2f091e88dd83d1685173d2c0692d8ae155e6.tar.gz external_llvm-4e2d2f091e88dd83d1685173d2c0692d8ae155e6.tar.bz2 |
Merging r195355:
------------------------------------------------------------------------
r195355 | dsanders | 2013-11-21 13:24:49 +0000 (Thu, 21 Nov 2013) | 20 lines
Add support for legalizing SETNE/SETEQ by inverting the condition code and the result of the comparison.
Summary:
LegalizeSetCCCondCode can now legalize SETEQ and SETNE by returning the inverse
condition and requesting that the caller invert the result of the condition.
The caller of LegalizeSetCCCondCode must handle the inverted CC, and they do
so as follows:
SETCC, BR_CC:
Invert the result of the SETCC with SelectionDAG::getNOT()
SELECT_CC:
Swap the true/false operands.
This is necessary for MSA which lacks an integer SETNE instruction.
Reviewers: resistor
CC: llvm-commits
Differential Revision: http://llvm-reviews.chandlerc.com/D2229
------------------------------------------------------------------------
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_34@195363 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 68 | ||||
-rw-r--r-- | test/CodeGen/Mips/msa/compare.ll | 85 | ||||
-rw-r--r-- | test/CodeGen/Mips/msa/llvm-stress-s3861334421.ll | 149 |
3 files changed, 288 insertions, 14 deletions
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index f50e5e7..9061ae9 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -96,7 +96,7 @@ private: ArrayRef<int> Mask) const; bool LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, - SDLoc dl); + bool &NeedInvert, SDLoc dl); SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); SDValue ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops, @@ -1597,18 +1597,30 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, /// LegalizeSetCCCondCode - Legalize a SETCC with given LHS and RHS and /// condition code CC on the current target. +/// /// If the SETCC has been legalized using AND / OR, then the legalized node -/// will be stored in LHS. RHS and CC will be set to SDValue(). +/// will be stored in LHS. RHS and CC will be set to SDValue(). NeedInvert +/// will be set to false. +/// /// If the SETCC has been legalized by using getSetCCSwappedOperands(), -/// then the values of LHS and RHS will be swapped and CC will be set to the -/// new condition. +/// then the values of LHS and RHS will be swapped, CC will be set to the +/// new condition, and NeedInvert will be set to false. +/// +/// If the SETCC has been legalized using the inverse condcode, then LHS and +/// RHS will be unchanged, CC will set to the inverted condcode, and NeedInvert +/// will be set to true. The caller must invert the result of the SETCC with +/// SelectionDAG::getNOT() or take equivalent action to swap the effect of a +/// true/false result. +/// /// \returns true if the SetCC has been legalized, false if it hasn't. bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, + bool &NeedInvert, SDLoc dl) { MVT OpVT = LHS.getSimpleValueType(); ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get(); + NeedInvert = false; switch (TLI.getCondCodeAction(CCCode, OpVT)) { default: llvm_unreachable("Unknown condition code action!"); case TargetLowering::Legal: @@ -1661,11 +1673,21 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, case ISD::SETGT: case ISD::SETGE: case ISD::SETLT: - case ISD::SETNE: - case ISD::SETEQ: // We only support using the inverted operation, which is computed above // and not a different manner of supporting expanding these cases. llvm_unreachable("Don't know how to expand this condition!"); + case ISD::SETNE: + case ISD::SETEQ: + // Try inverting the result of the inverse condition. + InvCC = CCCode == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ; + if (TLI.isCondCodeLegal(InvCC, OpVT)) { + CC = DAG.getCondCode(InvCC); + NeedInvert = true; + return true; + } + // If inverting the condition didn't work then we have no means to expand + // the condition. + llvm_unreachable("Don't know how to expand this condition!"); } SDValue SetCC1, SetCC2; @@ -2783,6 +2805,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { SmallVector<SDValue, 8> Results; SDLoc dl(Node); SDValue Tmp1, Tmp2, Tmp3, Tmp4; + bool NeedInvert; switch (Node->getOpcode()) { case ISD::CTPOP: case ISD::CTLZ: @@ -3673,15 +3696,20 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp2 = Node->getOperand(1); Tmp3 = Node->getOperand(2); bool Legalized = LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, - Tmp3, dl); + Tmp3, NeedInvert, dl); if (Legalized) { - // If we exapanded the SETCC by swapping LHS and RHS, create a new SETCC - // node. + // If we expanded the SETCC by swapping LHS and RHS, or by inverting the + // condition code, create a new SETCC node. if (Tmp3.getNode()) Tmp1 = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), Tmp1, Tmp2, Tmp3); + // If we expanded the SETCC by inverting the condition code, then wrap + // the existing SETCC in a NOT to restore the intended condition. + if (NeedInvert) + Tmp1 = DAG.getNOT(dl, Tmp1, Tmp1->getValueType(0)); + Results.push_back(Tmp1); break; } @@ -3736,11 +3764,18 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { if (!Legalized) { Legalized = LegalizeSetCCCondCode( - getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, CC, dl); + getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, CC, NeedInvert, + dl); assert(Legalized && "Can't legalize SELECT_CC with legal condition!"); - // If we exapanded the SETCC by swapping LHS and RHS, create a new - // SELECT_CC node. + + // If we expanded the SETCC by inverting the condition code, then swap + // the True/False operands to match. + if (NeedInvert) + std::swap(Tmp3, Tmp4); + + // If we expanded the SETCC by swapping LHS and RHS, or by inverting the + // condition code, create a new SELECT_CC node. if (CC.getNode()) { Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2, Tmp3, Tmp4, CC); @@ -3761,11 +3796,16 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp4 = Node->getOperand(1); // CC bool Legalized = LegalizeSetCCCondCode(getSetCCResultType( - Tmp2.getValueType()), Tmp2, Tmp3, Tmp4, dl); + Tmp2.getValueType()), Tmp2, Tmp3, Tmp4, NeedInvert, dl); (void)Legalized; assert(Legalized && "Can't legalize BR_CC with legal condition!"); - // If we exapanded the SETCC by swapping LHS and RHS, create a new BR_CC + // If we expanded the SETCC by inverting the condition code, then wrap + // the existing SETCC in a NOT to restore the intended condition. + if (NeedInvert) + Tmp4 = DAG.getNOT(dl, Tmp4, Tmp4->getValueType(0)); + + // If we expanded the SETCC by swapping LHS and RHS, create a new BR_CC // node. if (Tmp4.getNode()) { Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, diff --git a/test/CodeGen/Mips/msa/compare.ll b/test/CodeGen/Mips/msa/compare.ll index 36569a9..6408d7b 100644 --- a/test/CodeGen/Mips/msa/compare.ll +++ b/test/CodeGen/Mips/msa/compare.ll @@ -341,6 +341,91 @@ define void @clt_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { ; CHECK: .size clt_u_v2i64 } +; There is no != comparison, but test it anyway since we've had legalizer +; issues in this area. +define void @cne_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { + ; CHECK: cne_v16i8: + %1 = load <16 x i8>* %a + ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) + %2 = load <16 x i8>* %b + ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) + %3 = icmp ne <16 x i8> %1, %2 + %4 = sext <16 x i1> %3 to <16 x i8> + ; CHECK-DAG: ceq.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] + ; CHECK-DAG: xori.b [[R3]], [[R3]], 255 + store <16 x i8> %4, <16 x i8>* %c + ; CHECK-DAG: st.b [[R3]], 0($4) + + ret void + ; CHECK: .size cne_v16i8 +} + +; There is no != comparison, but test it anyway since we've had legalizer +; issues in this area. +define void @cne_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { + ; CHECK: cne_v8i16: + + %1 = load <8 x i16>* %a + ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) + %2 = load <8 x i16>* %b + ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) + %3 = icmp ne <8 x i16> %1, %2 + %4 = sext <8 x i1> %3 to <8 x i16> + ; CHECK-DAG: ceq.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] + ; TODO: This should be an 'xori.b [[R3]], [[R3]], 255' but thats an optimisation issue + ; CHECK-DAG: ldi.b [[R4:\$w[0-9]+]], -1 + ; CHECK-DAG: xor.v [[R3]], [[R3]], [[R4]] + store <8 x i16> %4, <8 x i16>* %c + ; CHECK-DAG: st.h [[R3]], 0($4) + + ret void + ; CHECK: .size cne_v8i16 +} + +; There is no != comparison, but test it anyway since we've had legalizer +; issues in this area. +define void @cne_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { + ; CHECK: cne_v4i32: + + %1 = load <4 x i32>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = load <4 x i32>* %b + ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) + %3 = icmp ne <4 x i32> %1, %2 + %4 = sext <4 x i1> %3 to <4 x i32> + ; CHECK-DAG: ceq.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] + ; TODO: This should be an 'xori.b [[R3]], [[R3]], 255' but thats an optimisation issue + ; CHECK-DAG: ldi.b [[R4:\$w[0-9]+]], -1 + ; CHECK-DAG: xor.v [[R3]], [[R3]], [[R4]] + store <4 x i32> %4, <4 x i32>* %c + ; CHECK-DAG: st.w [[R3]], 0($4) + + ret void + ; CHECK: .size cne_v4i32 +} + +; There is no != comparison, but test it anyway since we've had legalizer +; issues in this area. +define void @cne_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { + ; CHECK: cne_v2i64: + + %1 = load <2 x i64>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = load <2 x i64>* %b + ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) + %3 = icmp ne <2 x i64> %1, %2 + %4 = sext <2 x i1> %3 to <2 x i64> + ; CHECK-DAG: ceq.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] + ; TODO: This should be an 'xori.b [[R3]], [[R3]], 255' but thats an optimisation issue + ; CHECK-DAG: ldi.b [[R4:\$w[0-9]+]], -1 + ; CHECK-DAG: xor.v [[R3]], [[R3]], [[R4]] + store <2 x i64> %4, <2 x i64>* %c + ; CHECK-DAG: st.d [[R3]], 0($4) + + ret void + ; CHECK: .size cne_v2i64 +} + define void @ceqi_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind { ; CHECK: ceqi_v16i8: diff --git a/test/CodeGen/Mips/msa/llvm-stress-s3861334421.ll b/test/CodeGen/Mips/msa/llvm-stress-s3861334421.ll new file mode 100644 index 0000000..1a03e55 --- /dev/null +++ b/test/CodeGen/Mips/msa/llvm-stress-s3861334421.ll @@ -0,0 +1,149 @@ +; RUN: llc -march=mips < %s +; RUN: llc -march=mips -mattr=+msa,+fp64 < %s +; RUN: llc -march=mipsel < %s +; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s + +; This test originally failed for MSA with a +; "Don't know how to expand this condition!" unreachable. +; It should at least successfully build. + +define void @autogen_SD3861334421(i8*, i32*, i64*, i32, i64, i8) { +BB: + %A4 = alloca <2 x i32> + %A3 = alloca <2 x double> + %A2 = alloca i64 + %A1 = alloca i64 + %A = alloca double + %L = load i8* %0 + store i8 -101, i8* %0 + %E = extractelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 0 + %Shuff = shufflevector <8 x i64> zeroinitializer, <8 x i64> zeroinitializer, <8 x i32> <i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 undef, i32 1> + %I = insertelement <8 x i64> zeroinitializer, i64 %4, i32 5 + %B = and i64 116376, 57247 + %FC = uitofp i8 7 to double + %Sl = select i1 false, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> + %L5 = load i8* %0 + store i8 %L, i8* %0 + %E6 = extractelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 3 + %Shuff7 = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 6, i32 0> + %I8 = insertelement <8 x i8> %Sl, i8 7, i32 4 + %B9 = or <8 x i64> zeroinitializer, zeroinitializer + %Sl10 = select i1 false, i64 116376, i64 380809 + %Cmp = icmp sgt i32 394647, 17081 + br label %CF + +CF: ; preds = %CF, %BB + %L11 = load i8* %0 + store i8 -87, i8* %0 + %E12 = extractelement <4 x i64> zeroinitializer, i32 0 + %Shuff13 = shufflevector <8 x i64> zeroinitializer, <8 x i64> zeroinitializer, <8 x i32> <i32 7, i32 9, i32 11, i32 13, i32 undef, i32 1, i32 3, i32 5> + %I14 = insertelement <4 x i64> zeroinitializer, i64 380809, i32 1 + %B15 = srem i64 %Sl10, 380809 + %FC16 = sitofp i64 57247 to float + %Sl17 = select i1 false, double 0x87A9374869A78EC6, double 0.000000e+00 + %Cmp18 = icmp uge i8 %L, %5 + br i1 %Cmp18, label %CF, label %CF80 + +CF80: ; preds = %CF80, %CF88, %CF + %L19 = load i8* %0 + store i8 -101, i8* %0 + %E20 = extractelement <4 x i64> zeroinitializer, i32 0 + %Shuff21 = shufflevector <4 x i64> zeroinitializer, <4 x i64> %Shuff7, <4 x i32> <i32 7, i32 1, i32 3, i32 5> + %I22 = insertelement <4 x i64> zeroinitializer, i64 127438, i32 1 + %B23 = fdiv double %Sl17, 0.000000e+00 + %Sl24 = select i1 %Cmp18, i32 420510, i32 492085 + %Cmp25 = icmp ugt i1 %Cmp18, false + br i1 %Cmp25, label %CF80, label %CF83 + +CF83: ; preds = %CF83, %CF80 + %L26 = load i8* %0 + store i8 -87, i8* %0 + %E27 = extractelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 0 + %Shuff28 = shufflevector <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 7, i32 1, i32 3, i32 5> + %I29 = insertelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 492085, i32 1 + %B30 = lshr <8 x i8> %I8, %I8 + %FC31 = sitofp <4 x i32> %Shuff28 to <4 x double> + %Sl32 = select i1 false, <8 x i8> %I8, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> + %Cmp33 = icmp eq i64 %B, 116376 + br i1 %Cmp33, label %CF83, label %CF88 + +CF88: ; preds = %CF83 + %L34 = load i8* %0 + store i8 -87, i8* %0 + %E35 = extractelement <8 x i64> %Shuff, i32 7 + %Shuff36 = shufflevector <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %Shuff28, <4 x i32> <i32 2, i32 undef, i32 undef, i32 0> + %I37 = insertelement <4 x i64> zeroinitializer, i64 380809, i32 0 + %B38 = xor <8 x i64> %B9, %B9 + %ZE = zext i32 0 to i64 + %Sl39 = select i1 %Cmp33, i8 %L11, i8 %L5 + %Cmp40 = icmp sgt i1 %Cmp, false + br i1 %Cmp40, label %CF80, label %CF81 + +CF81: ; preds = %CF81, %CF85, %CF87, %CF88 + %L41 = load i8* %0 + store i8 %L34, i8* %0 + %E42 = extractelement <8 x i64> %Shuff13, i32 6 + %Shuff43 = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 undef, i32 undef, i32 undef, i32 7> + %I44 = insertelement <4 x i64> zeroinitializer, i64 116376, i32 3 + %B45 = fsub float %FC16, 0x3AC86DCC40000000 + %Tr = trunc <4 x i64> %I14 to <4 x i32> + %Sl46 = select i1 false, <8 x i64> %B38, <8 x i64> zeroinitializer + %Cmp47 = icmp sgt i1 %Cmp18, %Cmp18 + br i1 %Cmp47, label %CF81, label %CF85 + +CF85: ; preds = %CF81 + %L48 = load i8* %0 + store i8 -101, i8* %0 + %E49 = extractelement <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, i32 2 + %Shuff50 = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 5, i32 7, i32 1, i32 3> + %I51 = insertelement <4 x i64> zeroinitializer, i64 %E20, i32 3 + %B52 = or i32 336955, %Sl24 + %FC53 = uitofp i8 %L48 to double + %Sl54 = select i1 %Cmp47, i32 %3, i32 %Sl24 + %Cmp55 = icmp ne <8 x i64> %Shuff13, zeroinitializer + %L56 = load i8* %0 + store i8 %L11, i8* %0 + %E57 = extractelement <4 x i64> %Shuff21, i32 1 + %Shuff58 = shufflevector <8 x i64> %Shuff, <8 x i64> zeroinitializer, <8 x i32> <i32 4, i32 6, i32 undef, i32 10, i32 12, i32 undef, i32 0, i32 2> + %I59 = insertelement <4 x i64> zeroinitializer, i64 %E42, i32 2 + %B60 = udiv <8 x i8> %Sl, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> + %Tr61 = trunc i8 49 to i1 + br i1 %Tr61, label %CF81, label %CF84 + +CF84: ; preds = %CF84, %CF85 + %Sl62 = select i1 false, i8 %L, i8 %L48 + %Cmp63 = icmp ne <8 x i64> %I, zeroinitializer + %L64 = load i8* %0 + store i8 %5, i8* %0 + %E65 = extractelement <8 x i1> %Cmp55, i32 0 + br i1 %E65, label %CF84, label %CF87 + +CF87: ; preds = %CF84 + %Shuff66 = shufflevector <4 x i64> %Shuff21, <4 x i64> %I14, <4 x i32> <i32 3, i32 undef, i32 7, i32 1> + %I67 = insertelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 %Sl54, i32 1 + %B68 = frem double %B23, %Sl17 + %ZE69 = zext <8 x i8> %Sl32 to <8 x i64> + %Sl70 = select i1 %Tr61, i64 %E20, i64 %E12 + %Cmp71 = icmp slt <8 x i64> %I, %Shuff + %L72 = load i8* %0 + store i8 %L72, i8* %0 + %E73 = extractelement <8 x i1> %Cmp55, i32 6 + br i1 %E73, label %CF81, label %CF82 + +CF82: ; preds = %CF82, %CF87 + %Shuff74 = shufflevector <4 x i32> %I67, <4 x i32> %I29, <4 x i32> <i32 1, i32 3, i32 undef, i32 7> + %I75 = insertelement <4 x i64> zeroinitializer, i64 380809, i32 3 + %B76 = fsub double 0.000000e+00, %FC53 + %Tr77 = trunc i32 %E to i8 + %Sl78 = select i1 %Cmp18, i64* %A2, i64* %2 + %Cmp79 = icmp eq i32 394647, 492085 + br i1 %Cmp79, label %CF82, label %CF86 + +CF86: ; preds = %CF82 + store i64 %Sl70, i64* %Sl78 + store i64 %E57, i64* %Sl78 + store i64 %Sl70, i64* %Sl78 + store i64 %B, i64* %Sl78 + store i64 %Sl10, i64* %Sl78 + ret void +} |