diff options
author | Daniel Sanders <daniel.sanders@imgtec.com> | 2013-10-30 13:51:01 +0000 |
---|---|---|
committer | Daniel Sanders <daniel.sanders@imgtec.com> | 2013-10-30 13:51:01 +0000 |
commit | a7c3cac87118c3e409a7fc889090c5ffe242985e (patch) | |
tree | 0bf0d2fea2fa6acf79bcbf5e6c6d21a89400f2cd | |
parent | aed9334acfdd8fa7548dc540fe865a5a641cb208 (diff) | |
download | external_llvm-a7c3cac87118c3e409a7fc889090c5ffe242985e.zip external_llvm-a7c3cac87118c3e409a7fc889090c5ffe242985e.tar.gz external_llvm-a7c3cac87118c3e409a7fc889090c5ffe242985e.tar.bz2 |
[mips][msa] Combine binsri-like DAG of AND and OR into equivalent VSELECT
(or (and $a, $mask), (and $b, $inverse_mask)) => (vselect $mask, $a, $b).
where $mask is a constant splat. This allows bitwise operations to make use
of bsel.
It's also a stepping stone towards matching bins[lr], and bins[lr]i from
normal IR.
Two sets of similar tests have been added in this commit. The bsel_* functions
test the case where binsri cannot be used. The binsr_*_i functions will
start to use the binsri instruction in the next commit.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193682 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/Mips/MipsSEISelLowering.cpp | 108 | ||||
-rw-r--r-- | test/CodeGen/Mips/msa/bitwise.ll | 164 |
2 files changed, 272 insertions, 0 deletions
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index fc536bb..d858e2a 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -93,6 +93,7 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) addMSAFloatType(MVT::v2f64, &Mips::MSA128DRegClass); setTargetDAGCombine(ISD::AND); + setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::SRA); setTargetDAGCombine(ISD::VSELECT); setTargetDAGCombine(ISD::XOR); @@ -487,6 +488,110 @@ static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +// Determine if the specified node is a constant vector splat. +// +// Returns true and sets Imm if: +// * N is a ISD::BUILD_VECTOR representing a constant splat +// +// This function is quite similar to MipsSEDAGToDAGISel::selectVSplat. The +// differences are that it assumes the MSA has already been checked and the +// arbitrary requirement for a maximum of 32-bit integers isn't applied (and +// must not be in order for binsri.d to be selectable). +static bool isVSplat(SDValue N, APInt &Imm, bool IsLittleEndian) { + BuildVectorSDNode *Node = dyn_cast<BuildVectorSDNode>(N.getNode()); + + if (Node == NULL) + return false; + + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + + if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, + 8, !IsLittleEndian)) + return false; + + Imm = SplatValue; + + return true; +} + +// Perform combines where ISD::OR is the root node. +// +// Performs the following transformations: +// - (or (and $a, $mask), (and $b, $inv_mask)) => (vselect $mask, $a, $b) +// where $inv_mask is the bitwise inverse of $mask and the 'or' has a 128-bit +// vector type. +static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget *Subtarget) { + if (!Subtarget->hasMSA()) + return SDValue(); + + EVT Ty = N->getValueType(0); + + if (!Ty.is128BitVector()) + return SDValue(); + + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + + if (Op0->getOpcode() == ISD::AND && Op1->getOpcode() == ISD::AND) { + SDValue Op0Op0 = Op0->getOperand(0); + SDValue Op0Op1 = Op0->getOperand(1); + SDValue Op1Op0 = Op1->getOperand(0); + SDValue Op1Op1 = Op1->getOperand(1); + bool IsLittleEndian = !Subtarget->isLittle(); + + SDValue IfSet, IfClr, Cond; + APInt Mask, InvMask; + + // If Op0Op0 is an appropriate mask, try to find it's inverse in either + // Op1Op0, or Op1Op1. Keep track of the Cond, IfSet, and IfClr nodes, while + // looking. + // IfClr will be set if we find a valid match. + if (isVSplat(Op0Op0, Mask, IsLittleEndian)) { + Cond = Op0Op0; + IfSet = Op0Op1; + + if (isVSplat(Op1Op0, InvMask, IsLittleEndian) && Mask == ~InvMask) + IfClr = Op1Op1; + else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) && Mask == ~InvMask) + IfClr = Op1Op0; + } + + // If IfClr is not yet set, and Op0Op1 is an appropriate mask, try the same + // thing again using this mask. + // IfClr will be set if we find a valid match. + if (!IfClr.getNode() && isVSplat(Op0Op1, Mask, IsLittleEndian)) { + Cond = Op0Op1; + IfSet = Op0Op0; + + if (isVSplat(Op1Op0, InvMask, IsLittleEndian) && Mask == ~InvMask) + IfClr = Op1Op1; + else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) && Mask == ~InvMask) + IfClr = Op1Op0; + } + + // At this point, IfClr will be set if we have a valid match. + if (!IfClr.getNode()) + return SDValue(); + + assert(Cond.getNode() && IfSet.getNode()); + + // Fold degenerate cases. + if (Mask.isAllOnesValue()) + return IfSet; + else if (Mask == 0) + return IfClr; + + // Transform the DAG into an equivalent VSELECT. + return DAG.getNode(ISD::VSELECT, SDLoc(N), Ty, Cond, IfClr, IfSet); + } + + return SDValue(); +} + static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget *Subtarget) { @@ -777,6 +882,9 @@ MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { case ISD::AND: Val = performANDCombine(N, DAG, DCI, Subtarget); break; + case ISD::OR: + Val = performORCombine(N, DAG, DCI, Subtarget); + break; case ISD::SUBE: return performSUBECombine(N, DAG, DCI, Subtarget); case ISD::MUL: diff --git a/test/CodeGen/Mips/msa/bitwise.ll b/test/CodeGen/Mips/msa/bitwise.ll index f5da9f2..a606fdf 100644 --- a/test/CodeGen/Mips/msa/bitwise.ll +++ b/test/CodeGen/Mips/msa/bitwise.ll @@ -972,6 +972,170 @@ define void @ctlz_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind { ; CHECK: .size ctlz_v2i64 } +define void @bsel_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { + ; CHECK: bsel_v16i8: + + %1 = load <16 x i8>* %a + ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) + %2 = load <16 x i8>* %b + ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) + %3 = and <16 x i8> %1, <i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, + i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6> + %4 = and <16 x i8> %2, <i8 249, i8 249, i8 249, i8 249, + i8 249, i8 249, i8 249, i8 249, + i8 249, i8 249, i8 249, i8 249, + i8 249, i8 249, i8 249, i8 249> + %5 = or <16 x i8> %3, %4 + ; CHECK-DAG: ldi.b [[R3:\$w[0-9]+]], 6 + ; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]] + store <16 x i8> %5, <16 x i8>* %c + ; CHECK-DAG: st.b [[R3]], 0($4) + + ret void + ; CHECK: .size bsel_v16i8 +} + +define void @bsel_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { + ; CHECK: bsel_v8i16: + + %1 = load <8 x i16>* %a + ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) + %2 = load <8 x i16>* %b + ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) + %3 = and <8 x i16> %1, <i16 6, i16 6, i16 6, i16 6, + i16 6, i16 6, i16 6, i16 6> + %4 = and <8 x i16> %2, <i16 65529, i16 65529, i16 65529, i16 65529, + i16 65529, i16 65529, i16 65529, i16 65529> + %5 = or <8 x i16> %3, %4 + ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 6 + ; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]] + store <8 x i16> %5, <8 x i16>* %c + ; CHECK-DAG: st.h [[R3]], 0($4) + + ret void + ; CHECK: .size bsel_v8i16 +} + +define void @bsel_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { + ; CHECK: bsel_v4i32: + + %1 = load <4 x i32>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = load <4 x i32>* %b + ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) + %3 = and <4 x i32> %1, <i32 6, i32 6, i32 6, i32 6> + %4 = and <4 x i32> %2, <i32 4294967289, i32 4294967289, i32 4294967289, i32 4294967289> + %5 = or <4 x i32> %3, %4 + ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 6 + ; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]] + store <4 x i32> %5, <4 x i32>* %c + ; CHECK-DAG: st.w [[R3]], 0($4) + + ret void + ; CHECK: .size bsel_v4i32 +} + +define void @bsel_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { + ; CHECK: bsel_v2i64: + + %1 = load <2 x i64>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = load <2 x i64>* %b + ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) + %3 = and <2 x i64> %1, <i64 6, i64 6> + %4 = and <2 x i64> %2, <i64 18446744073709551609, i64 18446744073709551609> + %5 = or <2 x i64> %3, %4 + ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 6 + ; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]] + store <2 x i64> %5, <2 x i64>* %c + ; CHECK-DAG: st.d [[R3]], 0($4) + + ret void + ; CHECK: .size bsel_v2i64 +} + +define void @binsr_v16i8_i(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { + ; CHECK: binsr_v16i8_i: + + %1 = load <16 x i8>* %a + ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) + %2 = load <16 x i8>* %b + ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) + %3 = and <16 x i8> %1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, + i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> + %4 = and <16 x i8> %2, <i8 252, i8 252, i8 252, i8 252, + i8 252, i8 252, i8 252, i8 252, + i8 252, i8 252, i8 252, i8 252, + i8 252, i8 252, i8 252, i8 252> + %5 = or <16 x i8> %3, %4 + ; CHECK-DAG: ldi.b [[R3:\$w[0-9]+]], 3 + ; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]] + store <16 x i8> %5, <16 x i8>* %c + ; CHECK-DAG: st.b [[R3]], 0($4) + + ret void + ; CHECK: .size binsr_v16i8_i +} + +define void @binsr_v8i16_i(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { + ; CHECK: binsr_v8i16_i: + + %1 = load <8 x i16>* %a + ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) + %2 = load <8 x i16>* %b + ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) + %3 = and <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3, + i16 3, i16 3, i16 3, i16 3> + %4 = and <8 x i16> %2, <i16 65532, i16 65532, i16 65532, i16 65532, + i16 65532, i16 65532, i16 65532, i16 65532> + %5 = or <8 x i16> %3, %4 + ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 3 + ; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]] + store <8 x i16> %5, <8 x i16>* %c + ; CHECK-DAG: st.h [[R3]], 0($4) + + ret void + ; CHECK: .size binsr_v8i16_i +} + +define void @binsr_v4i32_i(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { + ; CHECK: binsr_v4i32_i: + + %1 = load <4 x i32>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = load <4 x i32>* %b + ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) + %3 = and <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3> + %4 = and <4 x i32> %2, <i32 4294967292, i32 4294967292, i32 4294967292, i32 4294967292> + %5 = or <4 x i32> %3, %4 + ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 3 + ; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]] + store <4 x i32> %5, <4 x i32>* %c + ; CHECK-DAG: st.w [[R3]], 0($4) + + ret void + ; CHECK: .size binsr_v4i32_i +} + +define void @binsr_v2i64_i(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { + ; CHECK: binsr_v2i64_i: + + %1 = load <2 x i64>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = load <2 x i64>* %b + ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) + %3 = and <2 x i64> %1, <i64 3, i64 3> + %4 = and <2 x i64> %2, <i64 18446744073709551612, i64 18446744073709551612> + %5 = or <2 x i64> %3, %4 + ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 3 + ; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]] + store <2 x i64> %5, <2 x i64>* %c + ; CHECK-DAG: st.d [[R3]], 0($4) + + ret void + ; CHECK: .size binsr_v2i64_i +} + declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %val) declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %val) declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %val) |