aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorElena Demikhovsky <elena.demikhovsky@intel.com>2012-12-05 09:24:57 +0000
committerElena Demikhovsky <elena.demikhovsky@intel.com>2012-12-05 09:24:57 +0000
commit226e0e6264dc15ea8f26261a813eae3c17987b3b (patch)
treef481003cfe75f95725d8c7787015ba30edfaca4b /lib
parenteca1fcf3d2d8246c45648fea59bd21a4091f9115 (diff)
downloadexternal_llvm-226e0e6264dc15ea8f26261a813eae3c17987b3b.zip
external_llvm-226e0e6264dc15ea8f26261a813eae3c17987b3b.tar.gz
external_llvm-226e0e6264dc15ea8f26261a813eae3c17987b3b.tar.bz2
Simplified BLEND pattern matching for shuffles.
Generate VPBLENDD for AVX2 and VPBLENDW for v16i16 type on AVX2. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169366 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp89
-rw-r--r--lib/Target/X86/X86ISelLowering.h8
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td4
-rw-r--r--lib/Target/X86/X86InstrSSE.td45
4 files changed, 68 insertions, 78 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 81e8a7b..b3ff4ee 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -5641,64 +5641,53 @@ LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp,
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
DebugLoc dl = SVOp->getDebugLoc();
- MVT VT = SVOp->getValueType(0).getSimpleVT();
+ EVT VT = SVOp->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
unsigned NumElems = VT.getVectorNumElements();
- if (!Subtarget->hasSSE41())
+ if (!Subtarget->hasSSE41() || EltVT == MVT::i8)
+ return SDValue();
+ if (!Subtarget->hasInt256() && VT == MVT::v16i16)
return SDValue();
- unsigned ISDNo = 0;
- MVT OpTy;
-
- switch (VT.SimpleTy) {
- default: return SDValue();
- case MVT::v8i16:
- ISDNo = X86ISD::BLENDPW;
- OpTy = MVT::v8i16;
- break;
- case MVT::v4i32:
- case MVT::v4f32:
- ISDNo = X86ISD::BLENDPS;
- OpTy = MVT::v4f32;
- break;
- case MVT::v2i64:
- case MVT::v2f64:
- ISDNo = X86ISD::BLENDPD;
- OpTy = MVT::v2f64;
- break;
- case MVT::v8i32:
- case MVT::v8f32:
- if (!Subtarget->hasFp256())
- return SDValue();
- ISDNo = X86ISD::BLENDPS;
- OpTy = MVT::v8f32;
- break;
- case MVT::v4i64:
- case MVT::v4f64:
- if (!Subtarget->hasFp256())
- return SDValue();
- ISDNo = X86ISD::BLENDPD;
- OpTy = MVT::v4f64;
- break;
- }
- assert(ISDNo && "Invalid Op Number");
+ // Check the mask for BLEND and build the value.
+ unsigned MaskValue = 0;
+ // There are 2 lanes if (NumElems > 8), and 1 lane otherwise.
+ unsigned NumLanes = (NumElems-1)/8 + 1;
+ unsigned NumElemsInLane = NumElems / NumLanes;
- unsigned MaskVals = 0;
+ // Blend for v16i16 should be symetric for the both lanes.
+ for (unsigned i = 0; i < NumElemsInLane; ++i) {
- for (unsigned i = 0; i != NumElems; ++i) {
+ int SndLaneEltIdx = (NumLanes == 2) ?
+ SVOp->getMaskElt(i + NumElemsInLane) : -1;
int EltIdx = SVOp->getMaskElt(i);
- if (EltIdx == (int)i || EltIdx < 0)
- MaskVals |= (1<<i);
- else if (EltIdx == (int)(i + NumElems))
- continue; // Bit is set to zero;
- else
+
+ if ((EltIdx == -1 || EltIdx == (int)i) &&
+ (SndLaneEltIdx == -1 || SndLaneEltIdx == (int)(i + NumElemsInLane)))
+ continue;
+
+ if (((unsigned)EltIdx == (i + NumElems)) &&
+ (SndLaneEltIdx == -1 ||
+ (unsigned)SndLaneEltIdx == i + NumElems + NumElemsInLane))
+ MaskValue |= (1<<i);
+ else
return SDValue();
}
- V1 = DAG.getNode(ISD::BITCAST, dl, OpTy, V1);
- V2 = DAG.getNode(ISD::BITCAST, dl, OpTy, V2);
- SDValue Ret = DAG.getNode(ISDNo, dl, OpTy, V1, V2,
- DAG.getConstant(MaskVals, MVT::i32));
+ // Convert i32 vectors to floating point if it is not AVX2.
+ // AVX2 introduced VPBLENDD instruction for 128 and 256-bit vectors.
+ EVT BlendVT = VT;
+ if (EltVT == MVT::i64 || (EltVT == MVT::i32 && !Subtarget->hasInt256())) {
+ BlendVT = EVT::getVectorVT(*DAG.getContext(),
+ EVT::getFloatingPointVT(EltVT.getSizeInBits()),
+ NumElems);
+ V1 = DAG.getNode(ISD::BITCAST, dl, VT, V1);
+ V2 = DAG.getNode(ISD::BITCAST, dl, VT, V2);
+ }
+
+ SDValue Ret = DAG.getNode(X86ISD::BLENDI, dl, BlendVT, V1, V2,
+ DAG.getConstant(MaskValue, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, VT, Ret);
}
@@ -11972,9 +11961,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::ANDNP: return "X86ISD::ANDNP";
case X86ISD::PSIGN: return "X86ISD::PSIGN";
case X86ISD::BLENDV: return "X86ISD::BLENDV";
- case X86ISD::BLENDPW: return "X86ISD::BLENDPW";
- case X86ISD::BLENDPS: return "X86ISD::BLENDPS";
- case X86ISD::BLENDPD: return "X86ISD::BLENDPD";
+ case X86ISD::BLENDI: return "X86ISD::BLENDI";
case X86ISD::HADD: return "X86ISD::HADD";
case X86ISD::HSUB: return "X86ISD::HSUB";
case X86ISD::FHADD: return "X86ISD::FHADD";
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 2988cee..e830c5f 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -176,13 +176,11 @@ namespace llvm {
/// PSIGN - Copy integer sign.
PSIGN,
- /// BLENDV - Blend where the selector is an XMM.
+ /// BLENDV - Blend where the selector is a register.
BLENDV,
- /// BLENDxx - Blend where the selector is an immediate.
- BLENDPW,
- BLENDPS,
- BLENDPD,
+ /// BLENDI - Blend where the selector is an immediate.
+ BLENDI,
/// HADD - Integer horizontal add.
HADD,
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 73ba001..09ab995 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -187,9 +187,7 @@ def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;
def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
-def X86Blendpw : SDNode<"X86ISD::BLENDPW", SDTBlend>;
-def X86Blendps : SDNode<"X86ISD::BLENDPS", SDTBlend>;
-def X86Blendpd : SDNode<"X86ISD::BLENDPD", SDTBlend>;
+def X86Blendi : SDNode<"X86ISD::BLENDI", SDTBlend>;
def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFma>;
def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFma>;
def X86Fmsub : SDNode<"X86ISD::FMSUB", SDTFma>;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 229e8b2..da06bf5 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -6891,31 +6891,31 @@ let Predicates = [HasAVX] in {
(v4f64 VR256:$src2))),
(VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
- def : Pat<(v8f32 (X86Blendps (v8f32 VR256:$src1), (v8f32 VR256:$src2),
+ def : Pat<(v8f32 (X86Blendi (v8f32 VR256:$src1), (v8f32 VR256:$src2),
(imm:$mask))),
- (VBLENDPSYrri VR256:$src2, VR256:$src1, imm:$mask)>;
- def : Pat<(v4f64 (X86Blendpd (v4f64 VR256:$src1), (v4f64 VR256:$src2),
+ (VBLENDPSYrri VR256:$src1, VR256:$src2, imm:$mask)>;
+ def : Pat<(v4f64 (X86Blendi (v4f64 VR256:$src1), (v4f64 VR256:$src2),
(imm:$mask))),
- (VBLENDPDYrri VR256:$src2, VR256:$src1, imm:$mask)>;
+ (VBLENDPDYrri VR256:$src1, VR256:$src2, imm:$mask)>;
- def : Pat<(v8i16 (X86Blendpw (v8i16 VR128:$src1), (v8i16 VR128:$src2),
+ def : Pat<(v8i16 (X86Blendi (v8i16 VR128:$src1), (v8i16 VR128:$src2),
(imm:$mask))),
- (VPBLENDWrri VR128:$src2, VR128:$src1, imm:$mask)>;
- def : Pat<(v4f32 (X86Blendps (v4f32 VR128:$src1), (v4f32 VR128:$src2),
+ (VPBLENDWrri VR128:$src1, VR128:$src2, imm:$mask)>;
+ def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$src1), (v4f32 VR128:$src2),
(imm:$mask))),
- (VBLENDPSrri VR128:$src2, VR128:$src1, imm:$mask)>;
- def : Pat<(v2f64 (X86Blendpd (v2f64 VR128:$src1), (v2f64 VR128:$src2),
+ (VBLENDPSrri VR128:$src1, VR128:$src2, imm:$mask)>;
+ def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$src1), (v2f64 VR128:$src2),
(imm:$mask))),
- (VBLENDPDrri VR128:$src2, VR128:$src1, imm:$mask)>;
+ (VBLENDPDrri VR128:$src1, VR128:$src2, imm:$mask)>;
}
let Predicates = [HasAVX2] in {
def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1),
(v32i8 VR256:$src2))),
- (VPBLENDVBYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
- def : Pat<(v16i16 (X86Blendpw (v16i16 VR256:$src1), (v16i16 VR256:$src2),
+ (VPBLENDVBYrr VR256:$src1, VR256:$src2, VR256:$mask)>;
+ def : Pat<(v16i16 (X86Blendi (v16i16 VR256:$src1), (v16i16 VR256:$src2),
(imm:$mask))),
- (VPBLENDWYrri VR256:$src2, VR256:$src1, imm:$mask)>;
+ (VPBLENDWYrri VR256:$src1, VR256:$src2, imm:$mask)>;
}
/// SS41I_ternary_int - SSE 4.1 ternary operator
@@ -6979,15 +6979,15 @@ let Predicates = [UseSSE41] in {
(v2f64 VR128:$src2))),
(BLENDVPDrr0 VR128:$src2, VR128:$src1)>;
- def : Pat<(v8i16 (X86Blendpw (v8i16 VR128:$src1), (v8i16 VR128:$src2),
+ def : Pat<(v8i16 (X86Blendi (v8i16 VR128:$src1), (v8i16 VR128:$src2),
(imm:$mask))),
- (PBLENDWrri VR128:$src2, VR128:$src1, imm:$mask)>;
- def : Pat<(v4f32 (X86Blendps (v4f32 VR128:$src1), (v4f32 VR128:$src2),
+ (PBLENDWrri VR128:$src1, VR128:$src2, imm:$mask)>;
+ def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$src1), (v4f32 VR128:$src2),
(imm:$mask))),
- (BLENDPSrri VR128:$src2, VR128:$src1, imm:$mask)>;
- def : Pat<(v2f64 (X86Blendpd (v2f64 VR128:$src1), (v2f64 VR128:$src2),
+ (BLENDPSrri VR128:$src1, VR128:$src2, imm:$mask)>;
+ def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$src1), (v2f64 VR128:$src2),
(imm:$mask))),
- (BLENDPDrri VR128:$src2, VR128:$src1, imm:$mask)>;
+ (BLENDPDrri VR128:$src1, VR128:$src2, imm:$mask)>;
}
@@ -7873,6 +7873,13 @@ defm VPBLENDDY : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_256,
VR256, memopv4i64, i256mem>, VEX_L;
}
+def : Pat<(v4i32 (X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2),
+ imm:$mask)),
+ (VPBLENDDrri VR128:$src1, VR128:$src2, imm:$mask)>;
+def : Pat<(v8i32 (X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2),
+ imm:$mask)),
+ (VPBLENDDYrri VR256:$src1, VR256:$src2, imm:$mask)>;
+
//===----------------------------------------------------------------------===//
// VPBROADCAST - Load from memory and broadcast to all elements of the
// destination operand