diff options
| author | Nadav Rotem <nadav.rotem@intel.com> | 2012-04-10 14:33:13 +0000 |
|---|---|---|
| committer | Nadav Rotem <nadav.rotem@intel.com> | 2012-04-10 14:33:13 +0000 |
| commit | 50e64cfe6e250dbe2528fc5bda75c68b04a8bc49 (patch) | |
| tree | 11f8711c3c0c8a1418507063bde7ac8b81a7826f /lib/Target/X86/X86ISelLowering.cpp | |
| parent | 45fb79bc54159330979bf24e4bfbdbb64bee1e2c (diff) | |
| download | external_llvm-50e64cfe6e250dbe2528fc5bda75c68b04a8bc49.zip external_llvm-50e64cfe6e250dbe2528fc5bda75c68b04a8bc49.tar.gz external_llvm-50e64cfe6e250dbe2528fc5bda75c68b04a8bc49.tar.bz2 | |
Modify the code that lowers shuffles to blends from using blendvXX to vblendXX.
blendv uses a register for the selection while vblend uses an immediate.
On sandybridge they still have the same latency and execute on the same execution ports.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@154396 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/X86/X86ISelLowering.cpp')
| -rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 88 |
1 files changed, 54 insertions, 34 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 4f14a0e..9af194c 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5391,59 +5391,76 @@ static SDValue LowerVECTOR_SHUFFLEtoBlend(SDValue Op, SDValue V1 = SVOp->getOperand(0); SDValue V2 = SVOp->getOperand(1); DebugLoc dl = SVOp->getDebugLoc(); - LLVMContext *Context = DAG.getContext(); EVT VT = Op.getValueType(); EVT InVT = V1.getValueType(); EVT EltVT = VT.getVectorElementType(); - unsigned EltSize = EltVT.getSizeInBits(); int MaskSize = VT.getVectorNumElements(); int InSize = InVT.getVectorNumElements(); - // TODO: At the moment we only use AVX blends. We could also use SSE4 blends. - if (!Subtarget->hasAVX()) + if (!Subtarget->hasSSE41()) return SDValue(); if (MaskSize != InSize) return SDValue(); - SmallVector<Constant*,2> MaskVals; - ConstantInt *Zero = ConstantInt::get(*Context, APInt(EltSize, 0)); - ConstantInt *NegOne = ConstantInt::get(*Context, APInt(EltSize, -1)); + int ISDNo = 0; + MVT OpTy; + + switch (VT.getSimpleVT().SimpleTy) { + default: return SDValue(); + case MVT::v8i16: + ISDNo = X86ISD::BLENDPW; + OpTy = MVT::v8i16; + break; + case MVT::v4i32: + case MVT::v4f32: + ISDNo = X86ISD::BLENDPS; + OpTy = MVT::v4f32; + break; + case MVT::v2i64: + case MVT::v2f64: + ISDNo = X86ISD::BLENDPD; + OpTy = MVT::v2f64; + break; + case MVT::v8i32: + case MVT::v8f32: + if (!Subtarget->hasAVX()) + return SDValue(); + ISDNo = X86ISD::BLENDPS; + OpTy = MVT::v8f32; + break; + case MVT::v4i64: + case MVT::v4f64: + if (!Subtarget->hasAVX()) + return SDValue(); + ISDNo = X86ISD::BLENDPD; + OpTy = MVT::v4f64; + break; + case MVT::v16i16: + if (!Subtarget->hasAVX2()) + return SDValue(); + ISDNo = X86ISD::BLENDPW; + OpTy = MVT::v16i16; + break; + } + assert(ISDNo && "Invalid Op Number"); + + unsigned MaskVals = 0; for (int i = 0; i < MaskSize; ++i) { int EltIdx = SVOp->getMaskElt(i); if (EltIdx == i || EltIdx == -1) - MaskVals.push_back(NegOne); + MaskVals |= (1<<i); else if (EltIdx == (i + MaskSize)) - MaskVals.push_back(Zero); + continue; // Bit is set to zero; else return SDValue(); } - Constant *MaskC = ConstantVector::get(MaskVals); - EVT MaskTy = EVT::getEVT(MaskC->getType()); - assert(MaskTy.getSizeInBits() == VT.getSizeInBits() && "Invalid mask size"); - SDValue MaskIdx = DAG.getConstantPool(MaskC, PtrTy); - unsigned Alignment = cast<ConstantPoolSDNode>(MaskIdx)->getAlignment(); - SDValue Mask = DAG.getLoad(MaskTy, dl, DAG.getEntryNode(), MaskIdx, - MachinePointerInfo::getConstantPool(), - false, false, false, Alignment); - - if (Subtarget->hasAVX2() && MaskTy == MVT::v32i8) - return DAG.getNode(ISD::VSELECT, dl, VT, Mask, V1, V2); - - if (Subtarget->hasAVX()) { - switch (MaskTy.getSimpleVT().SimpleTy) { - default: return SDValue(); - case MVT::v16i8: - case MVT::v4i32: - case MVT::v2i64: - case MVT::v8i32: - case MVT::v4i64: - return DAG.getNode(ISD::VSELECT, dl, VT, Mask, V1, V2); - } - } - - return SDValue(); + V1 = DAG.getNode(ISD::BITCAST, dl, OpTy, V1); + V2 = DAG.getNode(ISD::BITCAST, dl, OpTy, V2); + SDValue Ret = DAG.getNode(ISDNo, dl, OpTy, V1, V2, + DAG.getConstant(MaskVals, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, VT, Ret); } // v8i16 shuffles - Prefer shuffles in the following order: @@ -11050,6 +11067,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::ANDNP: return "X86ISD::ANDNP"; case X86ISD::PSIGN: return "X86ISD::PSIGN"; case X86ISD::BLENDV: return "X86ISD::BLENDV"; + case X86ISD::BLENDPW: return "X86ISD::BLENDPW"; + case X86ISD::BLENDPS: return "X86ISD::BLENDPS"; + case X86ISD::BLENDPD: return "X86ISD::BLENDPD"; case X86ISD::HADD: return "X86ISD::HADD"; case X86ISD::HSUB: return "X86ISD::HSUB"; case X86ISD::FHADD: return "X86ISD::FHADD"; |
