diff options
| author | Nadav Rotem <nadav.rotem@intel.com> | 2011-09-08 08:11:19 +0000 | 
|---|---|---|
| committer | Nadav Rotem <nadav.rotem@intel.com> | 2011-09-08 08:11:19 +0000 | 
| commit | ffe3e7da849a10cdbe8ee4e5b5e243fc48ca0ffd (patch) | |
| tree | 6eaaab69d092573c574d305e528d81f23b9391df | |
| parent | 6d483c2b0712d0111b02d23cda7c65c0f43cf1f1 (diff) | |
| download | external_llvm-ffe3e7da849a10cdbe8ee4e5b5e243fc48ca0ffd.zip external_llvm-ffe3e7da849a10cdbe8ee4e5b5e243fc48ca0ffd.tar.gz external_llvm-ffe3e7da849a10cdbe8ee4e5b5e243fc48ca0ffd.tar.bz2 | |
Add X86-SSE4 codegen support for vector-select.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@139285 91177308-0d34-0410-b5e6-96231b3b80d8
| -rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 34 | ||||
| -rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 5 | ||||
| -rw-r--r-- | lib/Target/X86/X86InstrFragmentsSIMD.td | 8 | ||||
| -rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 8 | ||||
| -rw-r--r-- | test/CodeGen/Generic/promote-integers.ll | 2 | 
5 files changed, 52 insertions, 5 deletions
| diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 1ed35d8..d74a872 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -917,6 +917,13 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)      setOperationAction(ISD::SHL,                MVT::v4i32, Custom);      setOperationAction(ISD::SHL,                MVT::v16i8, Custom); +    setOperationAction(ISD::VSELECT,            MVT::v2f64, Custom); +    setOperationAction(ISD::VSELECT,            MVT::v2i64, Custom); +    setOperationAction(ISD::VSELECT,            MVT::v16i8, Custom); +    setOperationAction(ISD::VSELECT,            MVT::v8i16, Custom); +    setOperationAction(ISD::VSELECT,            MVT::v4i32, Custom); +    setOperationAction(ISD::VSELECT,            MVT::v4f32, Custom); +      // i8 and i16 vectors are custom , because the source register and source      // source memory operand types are not the same width.  f32 vectors are      // custom since the immediate controlling the insert encodes additional @@ -8684,6 +8691,32 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {    return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops, array_lengthof(Ops));  } +SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const { +  SDValue Cond  = Op.getOperand(0); +  SDValue Op1 = Op.getOperand(1); +  SDValue Op2 = Op.getOperand(2); +  DebugLoc DL = Op.getDebugLoc(); + +  SDValue Ops[] = {Cond, Op1, Op2}; + +  assert(Op1.getValueType().isVector() && "Op1 must be a vector"); +  assert(Op2.getValueType().isVector() && "Op2 must be a vector"); +  assert(Cond.getValueType().isVector() && "Cond must be a vector"); +  assert(Op1.getValueType() == Op2.getValueType() && "Type mismatch"); +   +  switch (Op1.getValueType().getSimpleVT().SimpleTy) { +    default: break; +    case MVT::v2i64: return DAG.getNode(X86ISD::BLENDVPD, DL, Op1.getValueType(), Ops, array_lengthof(Ops)); +    case MVT::v2f64: return DAG.getNode(X86ISD::BLENDVPD, DL, Op1.getValueType(), Ops, array_lengthof(Ops)); +    case MVT::v4i32: return DAG.getNode(X86ISD::BLENDVPS, DL, Op1.getValueType(), Ops, array_lengthof(Ops)); +    case MVT::v4f32: return DAG.getNode(X86ISD::BLENDVPS, DL, Op1.getValueType(), Ops, array_lengthof(Ops)); +    case MVT::v16i8: return DAG.getNode(X86ISD::PBLENDVB, DL, Op1.getValueType(), Ops, array_lengthof(Ops)); +  } +   +  return SDValue(); +} + +  // isAndOrOfSingleUseSetCCs - Return true if node is an ISD::AND or  // ISD::OR of two X86ISD::SETCC nodes each of which has no other use apart  // from the AND / OR. @@ -10350,6 +10383,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {    case ISD::FGETSIGN:           return LowerFGETSIGN(Op, DAG);    case ISD::SETCC:              return LowerSETCC(Op, DAG);    case ISD::SELECT:             return LowerSELECT(Op, DAG); +  case ISD::VSELECT:            return LowerVSELECT(Op, DAG);    case ISD::BRCOND:             return LowerBRCOND(Op, DAG);    case ISD::JumpTable:          return LowerJumpTable(Op, DAG);    case ISD::VASTART:            return LowerVASTART(Op, DAG); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 6ff9ca5..3051e16 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -175,8 +175,10 @@ namespace llvm {        /// PSIGNB/W/D - Copy integer sign.        PSIGNB, PSIGNW, PSIGND, -      /// PBLENDVB - Variable blend +      /// BLENDVXX family of opcodes        PBLENDVB, +      BLENDVPD, +      BLENDVPS,        /// FMAX, FMIN - Floating point max and min.        /// @@ -809,6 +811,7 @@ namespace llvm {      SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;      SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;      SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; +    SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;      SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;      SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG) const;      SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index e80038e..7ad9c87 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -58,9 +58,15 @@ def X86psignw  : SDNode<"X86ISD::PSIGNW",  def X86psignd  : SDNode<"X86ISD::PSIGND",                   SDTypeProfile<1, 2, [SDTCisVT<0, v4i32>, SDTCisSameAs<0,1>,                                        SDTCisSameAs<0,2>]>>; -def X86pblendv : SDNode<"X86ISD::PBLENDVB", +def X86pblendvb : SDNode<"X86ISD::PBLENDVB",                   SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,                                        SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>; +def X86blendvpd : SDNode<"X86ISD::BLENDVPD",  +                  SDTypeProfile<1, 3, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>, +                                       SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>; +def X86blendvps : SDNode<"X86ISD::BLENDVPS",  +                 SDTypeProfile<1, 3, [SDTCisVT<0, v4i32>, SDTCisSameAs<0,1>, +                                      SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>;  def X86pextrb  : SDNode<"X86ISD::PEXTRB",                   SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;  def X86pextrw  : SDNode<"X86ISD::PEXTRW", diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 8eab5d6..c210a98 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -5843,7 +5843,7 @@ defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem,  defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem,                                           memopv32i8, int_x86_avx_blendv_ps_256>; -def : Pat<(X86pblendv VR128:$src1, VR128:$src2, VR128:$src3), +def : Pat<(X86pblendvb VR128:$src1, VR128:$src2, VR128:$src3),            (VPBLENDVBrr VR128:$src1, VR128:$src2, VR128:$src3)>,            Requires<[HasAVX]>; @@ -5871,8 +5871,12 @@ defm BLENDVPD     : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>;  defm BLENDVPS     : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>;  defm PBLENDVB     : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>; -def : Pat<(X86pblendv VR128:$src1, VR128:$src2, XMM0), +def : Pat<(X86pblendvb VR128:$src1, VR128:$src2, XMM0),            (PBLENDVBrr0 VR128:$src1, VR128:$src2)>, Requires<[HasSSE41]>; +def : Pat<(X86blendvpd  XMM0, VR128:$src1, VR128:$src2), +          (BLENDVPDrr0 VR128:$src1, VR128:$src2)>, Requires<[HasSSE41]>; +def : Pat<(X86blendvps  XMM0, VR128:$src1, VR128:$src2), +          (BLENDVPSrr0 VR128:$src1, VR128:$src2)>, Requires<[HasSSE41]>;  let Predicates = [HasAVX] in  def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), diff --git a/test/CodeGen/Generic/promote-integers.ll b/test/CodeGen/Generic/promote-integers.ll index 5812592..d965abf 100644 --- a/test/CodeGen/Generic/promote-integers.ll +++ b/test/CodeGen/Generic/promote-integers.ll @@ -3,8 +3,8 @@  ; This test is the poster-child for integer-element-promotion.  ; Until this feature is complete, we mark this test as expected to fail. -; XFAIL: *  ; CHECK: vector_code +; CHECK: blend  ; CHECK: ret  define <4 x float> @vector_code(<4 x i64> %A, <4 x i64> %B, <4 x float> %R0, <4 x float> %R1 )  {     %C = icmp eq <4 x i64> %A, %B | 
