diff options
author | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2012-12-05 09:24:57 +0000 |
---|---|---|
committer | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2012-12-05 09:24:57 +0000 |
commit | 226e0e6264dc15ea8f26261a813eae3c17987b3b (patch) | |
tree | f481003cfe75f95725d8c7787015ba30edfaca4b /lib | |
parent | eca1fcf3d2d8246c45648fea59bd21a4091f9115 (diff) | |
download | external_llvm-226e0e6264dc15ea8f26261a813eae3c17987b3b.zip external_llvm-226e0e6264dc15ea8f26261a813eae3c17987b3b.tar.gz external_llvm-226e0e6264dc15ea8f26261a813eae3c17987b3b.tar.bz2 |
Simplified BLEND pattern matching for shuffles.
Generate VPBLENDD for AVX2 and VPBLENDW for v16i16 type on AVX2.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169366 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 89 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 8 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrFragmentsSIMD.td | 4 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 45 |
4 files changed, 68 insertions, 78 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 81e8a7b..b3ff4ee 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5641,64 +5641,53 @@ LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp, SDValue V1 = SVOp->getOperand(0); SDValue V2 = SVOp->getOperand(1); DebugLoc dl = SVOp->getDebugLoc(); - MVT VT = SVOp->getValueType(0).getSimpleVT(); + EVT VT = SVOp->getValueType(0); + EVT EltVT = VT.getVectorElementType(); unsigned NumElems = VT.getVectorNumElements(); - if (!Subtarget->hasSSE41()) + if (!Subtarget->hasSSE41() || EltVT == MVT::i8) + return SDValue(); + if (!Subtarget->hasInt256() && VT == MVT::v16i16) return SDValue(); - unsigned ISDNo = 0; - MVT OpTy; - - switch (VT.SimpleTy) { - default: return SDValue(); - case MVT::v8i16: - ISDNo = X86ISD::BLENDPW; - OpTy = MVT::v8i16; - break; - case MVT::v4i32: - case MVT::v4f32: - ISDNo = X86ISD::BLENDPS; - OpTy = MVT::v4f32; - break; - case MVT::v2i64: - case MVT::v2f64: - ISDNo = X86ISD::BLENDPD; - OpTy = MVT::v2f64; - break; - case MVT::v8i32: - case MVT::v8f32: - if (!Subtarget->hasFp256()) - return SDValue(); - ISDNo = X86ISD::BLENDPS; - OpTy = MVT::v8f32; - break; - case MVT::v4i64: - case MVT::v4f64: - if (!Subtarget->hasFp256()) - return SDValue(); - ISDNo = X86ISD::BLENDPD; - OpTy = MVT::v4f64; - break; - } - assert(ISDNo && "Invalid Op Number"); + // Check the mask for BLEND and build the value. + unsigned MaskValue = 0; + // There are 2 lanes if (NumElems > 8), and 1 lane otherwise. + unsigned NumLanes = (NumElems-1)/8 + 1; + unsigned NumElemsInLane = NumElems / NumLanes; - unsigned MaskVals = 0; + // Blend for v16i16 should be symetric for the both lanes. + for (unsigned i = 0; i < NumElemsInLane; ++i) { - for (unsigned i = 0; i != NumElems; ++i) { + int SndLaneEltIdx = (NumLanes == 2) ? + SVOp->getMaskElt(i + NumElemsInLane) : -1; int EltIdx = SVOp->getMaskElt(i); - if (EltIdx == (int)i || EltIdx < 0) - MaskVals |= (1<<i); - else if (EltIdx == (int)(i + NumElems)) - continue; // Bit is set to zero; - else + + if ((EltIdx == -1 || EltIdx == (int)i) && + (SndLaneEltIdx == -1 || SndLaneEltIdx == (int)(i + NumElemsInLane))) + continue; + + if (((unsigned)EltIdx == (i + NumElems)) && + (SndLaneEltIdx == -1 || + (unsigned)SndLaneEltIdx == i + NumElems + NumElemsInLane)) + MaskValue |= (1<<i); + else return SDValue(); } - V1 = DAG.getNode(ISD::BITCAST, dl, OpTy, V1); - V2 = DAG.getNode(ISD::BITCAST, dl, OpTy, V2); - SDValue Ret = DAG.getNode(ISDNo, dl, OpTy, V1, V2, - DAG.getConstant(MaskVals, MVT::i32)); + // Convert i32 vectors to floating point if it is not AVX2. + // AVX2 introduced VPBLENDD instruction for 128 and 256-bit vectors. + EVT BlendVT = VT; + if (EltVT == MVT::i64 || (EltVT == MVT::i32 && !Subtarget->hasInt256())) { + BlendVT = EVT::getVectorVT(*DAG.getContext(), + EVT::getFloatingPointVT(EltVT.getSizeInBits()), + NumElems); + V1 = DAG.getNode(ISD::BITCAST, dl, VT, V1); + V2 = DAG.getNode(ISD::BITCAST, dl, VT, V2); + } + + SDValue Ret = DAG.getNode(X86ISD::BLENDI, dl, BlendVT, V1, V2, + DAG.getConstant(MaskValue, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, VT, Ret); } @@ -11972,9 +11961,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::ANDNP: return "X86ISD::ANDNP"; case X86ISD::PSIGN: return "X86ISD::PSIGN"; case X86ISD::BLENDV: return "X86ISD::BLENDV"; - case X86ISD::BLENDPW: return "X86ISD::BLENDPW"; - case X86ISD::BLENDPS: return "X86ISD::BLENDPS"; - case X86ISD::BLENDPD: return "X86ISD::BLENDPD"; + case X86ISD::BLENDI: return "X86ISD::BLENDI"; case X86ISD::HADD: return "X86ISD::HADD"; case X86ISD::HSUB: return "X86ISD::HSUB"; case X86ISD::FHADD: return "X86ISD::FHADD"; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 2988cee..e830c5f 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -176,13 +176,11 @@ namespace llvm { /// PSIGN - Copy integer sign. PSIGN, - /// BLENDV - Blend where the selector is an XMM. + /// BLENDV - Blend where the selector is a register. BLENDV, - /// BLENDxx - Blend where the selector is an immediate. - BLENDPW, - BLENDPS, - BLENDPD, + /// BLENDI - Blend where the selector is an immediate. + BLENDI, /// HADD - Integer horizontal add. HADD, diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 73ba001..09ab995 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -187,9 +187,7 @@ def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>; def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>; -def X86Blendpw : SDNode<"X86ISD::BLENDPW", SDTBlend>; -def X86Blendps : SDNode<"X86ISD::BLENDPS", SDTBlend>; -def X86Blendpd : SDNode<"X86ISD::BLENDPD", SDTBlend>; +def X86Blendi : SDNode<"X86ISD::BLENDI", SDTBlend>; def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFma>; def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFma>; def X86Fmsub : SDNode<"X86ISD::FMSUB", SDTFma>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 229e8b2..da06bf5 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -6891,31 +6891,31 @@ let Predicates = [HasAVX] in { (v4f64 VR256:$src2))), (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>; - def : Pat<(v8f32 (X86Blendps (v8f32 VR256:$src1), (v8f32 VR256:$src2), + def : Pat<(v8f32 (X86Blendi (v8f32 VR256:$src1), (v8f32 VR256:$src2), (imm:$mask))), - (VBLENDPSYrri VR256:$src2, VR256:$src1, imm:$mask)>; - def : Pat<(v4f64 (X86Blendpd (v4f64 VR256:$src1), (v4f64 VR256:$src2), + (VBLENDPSYrri VR256:$src1, VR256:$src2, imm:$mask)>; + def : Pat<(v4f64 (X86Blendi (v4f64 VR256:$src1), (v4f64 VR256:$src2), (imm:$mask))), - (VBLENDPDYrri VR256:$src2, VR256:$src1, imm:$mask)>; + (VBLENDPDYrri VR256:$src1, VR256:$src2, imm:$mask)>; - def : Pat<(v8i16 (X86Blendpw (v8i16 VR128:$src1), (v8i16 VR128:$src2), + def : Pat<(v8i16 (X86Blendi (v8i16 VR128:$src1), (v8i16 VR128:$src2), (imm:$mask))), - (VPBLENDWrri VR128:$src2, VR128:$src1, imm:$mask)>; - def : Pat<(v4f32 (X86Blendps (v4f32 VR128:$src1), (v4f32 VR128:$src2), + (VPBLENDWrri VR128:$src1, VR128:$src2, imm:$mask)>; + def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$src1), (v4f32 VR128:$src2), (imm:$mask))), - (VBLENDPSrri VR128:$src2, VR128:$src1, imm:$mask)>; - def : Pat<(v2f64 (X86Blendpd (v2f64 VR128:$src1), (v2f64 VR128:$src2), + (VBLENDPSrri VR128:$src1, VR128:$src2, imm:$mask)>; + def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$src1), (v2f64 VR128:$src2), (imm:$mask))), - (VBLENDPDrri VR128:$src2, VR128:$src1, imm:$mask)>; + (VBLENDPDrri VR128:$src1, VR128:$src2, imm:$mask)>; } let Predicates = [HasAVX2] in { def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1), (v32i8 VR256:$src2))), - (VPBLENDVBYrr VR256:$src2, VR256:$src1, VR256:$mask)>; - def : Pat<(v16i16 (X86Blendpw (v16i16 VR256:$src1), (v16i16 VR256:$src2), + (VPBLENDVBYrr VR256:$src1, VR256:$src2, VR256:$mask)>; + def : Pat<(v16i16 (X86Blendi (v16i16 VR256:$src1), (v16i16 VR256:$src2), (imm:$mask))), - (VPBLENDWYrri VR256:$src2, VR256:$src1, imm:$mask)>; + (VPBLENDWYrri VR256:$src1, VR256:$src2, imm:$mask)>; } /// SS41I_ternary_int - SSE 4.1 ternary operator @@ -6979,15 +6979,15 @@ let Predicates = [UseSSE41] in { (v2f64 VR128:$src2))), (BLENDVPDrr0 VR128:$src2, VR128:$src1)>; - def : Pat<(v8i16 (X86Blendpw (v8i16 VR128:$src1), (v8i16 VR128:$src2), + def : Pat<(v8i16 (X86Blendi (v8i16 VR128:$src1), (v8i16 VR128:$src2), (imm:$mask))), - (PBLENDWrri VR128:$src2, VR128:$src1, imm:$mask)>; - def : Pat<(v4f32 (X86Blendps (v4f32 VR128:$src1), (v4f32 VR128:$src2), + (PBLENDWrri VR128:$src1, VR128:$src2, imm:$mask)>; + def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$src1), (v4f32 VR128:$src2), (imm:$mask))), - (BLENDPSrri VR128:$src2, VR128:$src1, imm:$mask)>; - def : Pat<(v2f64 (X86Blendpd (v2f64 VR128:$src1), (v2f64 VR128:$src2), + (BLENDPSrri VR128:$src1, VR128:$src2, imm:$mask)>; + def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$src1), (v2f64 VR128:$src2), (imm:$mask))), - (BLENDPDrri VR128:$src2, VR128:$src1, imm:$mask)>; + (BLENDPDrri VR128:$src1, VR128:$src2, imm:$mask)>; } @@ -7873,6 +7873,13 @@ defm VPBLENDDY : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_256, VR256, memopv4i64, i256mem>, VEX_L; } +def : Pat<(v4i32 (X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), + imm:$mask)), + (VPBLENDDrri VR128:$src1, VR128:$src2, imm:$mask)>; +def : Pat<(v8i32 (X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2), + imm:$mask)), + (VPBLENDDYrri VR256:$src1, VR256:$src2, imm:$mask)>; + //===----------------------------------------------------------------------===// // VPBROADCAST - Load from memory and broadcast to all elements of the // destination operand |