aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2012-01-23 06:16:53 +0000
committerCraig Topper <craig.topper@gmail.com>2012-01-23 06:16:53 +0000
commit80e46360e9224a9169b1d2d1cd376e9c36d0d566 (patch)
treeda8b3da11e399bc7e3f79bd463d8455c9a5c31c3 /lib
parent4254df306268edb7f47a77eecd1f8b59f1b17fe0 (diff)
downloadexternal_llvm-80e46360e9224a9169b1d2d1cd376e9c36d0d566.zip
external_llvm-80e46360e9224a9169b1d2d1cd376e9c36d0d566.tar.gz
external_llvm-80e46360e9224a9169b1d2d1cd376e9c36d0d566.tar.bz2
Custom lower vector shift intrinsics to target specific nodes and remove the patterns that are no longer needed.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@148684 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp235
-rw-r--r--lib/Target/X86/X86InstrSSE.td306
2 files changed, 156 insertions, 385 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 4d67834..eccb546 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -64,17 +64,6 @@ static cl::opt<bool> UseRegMask("x86-use-regmask",
static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
SDValue V2);
-static SDValue Insert128BitVector(SDValue Result,
- SDValue Vec,
- SDValue Idx,
- SelectionDAG &DAG,
- DebugLoc dl);
-
-static SDValue Extract128BitVector(SDValue Vec,
- SDValue Idx,
- SelectionDAG &DAG,
- DebugLoc dl);
-
/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
/// sets things up to match to an AVX VEXTRACTF128 instruction or a
/// simple subregister reference. Idx is an index in the 128 bits we
@@ -9157,6 +9146,43 @@ SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
}
+// getTargetVShiftNOde - Handle vector element shifts where the shift amount
+// may or may not be a constant. Takes immediate version of shift as input.
+static SDValue getTargetVShiftNode(unsigned Opc, DebugLoc dl, EVT VT,
+ SDValue SrcOp, SDValue ShAmt,
+ SelectionDAG &DAG) {
+ assert(ShAmt.getValueType() == MVT::i32 && "ShAmt is not i32");
+
+ if (isa<ConstantSDNode>(ShAmt)) {
+ switch (Opc) {
+ default: llvm_unreachable("Unknown target vector shift node");
+ case X86ISD::VSHLI:
+ case X86ISD::VSRLI:
+ case X86ISD::VSRAI:
+ return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
+ }
+ }
+
+ // Change opcode to non-immediate version
+ switch (Opc) {
+ default: llvm_unreachable("Unknown target vector shift node");
+ case X86ISD::VSHLI: Opc = X86ISD::VSHL; break;
+ case X86ISD::VSRLI: Opc = X86ISD::VSRL; break;
+ case X86ISD::VSRAI: Opc = X86ISD::VSRA; break;
+ }
+
+ // Need to build a vector containing shift amount
+ // Shift amount is 32-bits, but SSE instructions read 64-bit, so fill with 0
+ SDValue ShOps[4];
+ ShOps[0] = ShAmt;
+ ShOps[1] = DAG.getConstant(0, MVT::i32);
+ ShOps[2] = DAG.getUNDEF(MVT::i32);
+ ShOps[3] = DAG.getUNDEF(MVT::i32);
+ ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &ShOps[0], 4);
+ ShAmt = DAG.getNode(ISD::BITCAST, dl, VT, ShAmt);
+ return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
+}
+
SDValue
X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
@@ -9359,24 +9385,53 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
- // Fix vector shift instructions where the last operand is a non-immediate
- // i32 value.
- case Intrinsic::x86_avx2_pslli_w:
- case Intrinsic::x86_avx2_pslli_d:
- case Intrinsic::x86_avx2_pslli_q:
- case Intrinsic::x86_avx2_psrli_w:
- case Intrinsic::x86_avx2_psrli_d:
- case Intrinsic::x86_avx2_psrli_q:
- case Intrinsic::x86_avx2_psrai_w:
- case Intrinsic::x86_avx2_psrai_d:
+ // SSE/AVX shift intrinsics
+ case Intrinsic::x86_sse2_psll_w:
+ case Intrinsic::x86_sse2_psll_d:
+ case Intrinsic::x86_sse2_psll_q:
+ case Intrinsic::x86_avx2_psll_w:
+ case Intrinsic::x86_avx2_psll_d:
+ case Intrinsic::x86_avx2_psll_q:
+ return DAG.getNode(X86ISD::VSHL, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::x86_sse2_psrl_w:
+ case Intrinsic::x86_sse2_psrl_d:
+ case Intrinsic::x86_sse2_psrl_q:
+ case Intrinsic::x86_avx2_psrl_w:
+ case Intrinsic::x86_avx2_psrl_d:
+ case Intrinsic::x86_avx2_psrl_q:
+ return DAG.getNode(X86ISD::VSRL, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::x86_sse2_psra_w:
+ case Intrinsic::x86_sse2_psra_d:
+ case Intrinsic::x86_avx2_psra_w:
+ case Intrinsic::x86_avx2_psra_d:
+ return DAG.getNode(X86ISD::VSRA, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_sse2_pslli_w:
case Intrinsic::x86_sse2_pslli_d:
case Intrinsic::x86_sse2_pslli_q:
+ case Intrinsic::x86_avx2_pslli_w:
+ case Intrinsic::x86_avx2_pslli_d:
+ case Intrinsic::x86_avx2_pslli_q:
+ return getTargetVShiftNode(X86ISD::VSHLI, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2), DAG);
case Intrinsic::x86_sse2_psrli_w:
case Intrinsic::x86_sse2_psrli_d:
case Intrinsic::x86_sse2_psrli_q:
+ case Intrinsic::x86_avx2_psrli_w:
+ case Intrinsic::x86_avx2_psrli_d:
+ case Intrinsic::x86_avx2_psrli_q:
+ return getTargetVShiftNode(X86ISD::VSRLI, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2), DAG);
case Intrinsic::x86_sse2_psrai_w:
case Intrinsic::x86_sse2_psrai_d:
+ case Intrinsic::x86_avx2_psrai_w:
+ case Intrinsic::x86_avx2_psrai_d:
+ return getTargetVShiftNode(X86ISD::VSRAI, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2), DAG);
+ // Fix vector shift instructions where the last operand is a non-immediate
+ // i32 value.
case Intrinsic::x86_mmx_pslli_w:
case Intrinsic::x86_mmx_pslli_d:
case Intrinsic::x86_mmx_pslli_q:
@@ -9390,103 +9445,40 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
return SDValue();
unsigned NewIntNo = 0;
- EVT ShAmtVT = MVT::v4i32;
switch (IntNo) {
- case Intrinsic::x86_sse2_pslli_w:
- NewIntNo = Intrinsic::x86_sse2_psll_w;
- break;
- case Intrinsic::x86_sse2_pslli_d:
- NewIntNo = Intrinsic::x86_sse2_psll_d;
- break;
- case Intrinsic::x86_sse2_pslli_q:
- NewIntNo = Intrinsic::x86_sse2_psll_q;
- break;
- case Intrinsic::x86_sse2_psrli_w:
- NewIntNo = Intrinsic::x86_sse2_psrl_w;
+ case Intrinsic::x86_mmx_pslli_w:
+ NewIntNo = Intrinsic::x86_mmx_psll_w;
break;
- case Intrinsic::x86_sse2_psrli_d:
- NewIntNo = Intrinsic::x86_sse2_psrl_d;
+ case Intrinsic::x86_mmx_pslli_d:
+ NewIntNo = Intrinsic::x86_mmx_psll_d;
break;
- case Intrinsic::x86_sse2_psrli_q:
- NewIntNo = Intrinsic::x86_sse2_psrl_q;
+ case Intrinsic::x86_mmx_pslli_q:
+ NewIntNo = Intrinsic::x86_mmx_psll_q;
break;
- case Intrinsic::x86_sse2_psrai_w:
- NewIntNo = Intrinsic::x86_sse2_psra_w;
+ case Intrinsic::x86_mmx_psrli_w:
+ NewIntNo = Intrinsic::x86_mmx_psrl_w;
break;
- case Intrinsic::x86_sse2_psrai_d:
- NewIntNo = Intrinsic::x86_sse2_psra_d;
+ case Intrinsic::x86_mmx_psrli_d:
+ NewIntNo = Intrinsic::x86_mmx_psrl_d;
break;
- case Intrinsic::x86_avx2_pslli_w:
- NewIntNo = Intrinsic::x86_avx2_psll_w;
+ case Intrinsic::x86_mmx_psrli_q:
+ NewIntNo = Intrinsic::x86_mmx_psrl_q;
break;
- case Intrinsic::x86_avx2_pslli_d:
- NewIntNo = Intrinsic::x86_avx2_psll_d;
+ case Intrinsic::x86_mmx_psrai_w:
+ NewIntNo = Intrinsic::x86_mmx_psra_w;
break;
- case Intrinsic::x86_avx2_pslli_q:
- NewIntNo = Intrinsic::x86_avx2_psll_q;
- break;
- case Intrinsic::x86_avx2_psrli_w:
- NewIntNo = Intrinsic::x86_avx2_psrl_w;
- break;
- case Intrinsic::x86_avx2_psrli_d:
- NewIntNo = Intrinsic::x86_avx2_psrl_d;
- break;
- case Intrinsic::x86_avx2_psrli_q:
- NewIntNo = Intrinsic::x86_avx2_psrl_q;
- break;
- case Intrinsic::x86_avx2_psrai_w:
- NewIntNo = Intrinsic::x86_avx2_psra_w;
- break;
- case Intrinsic::x86_avx2_psrai_d:
- NewIntNo = Intrinsic::x86_avx2_psra_d;
- break;
- default: {
- ShAmtVT = MVT::v2i32;
- switch (IntNo) {
- case Intrinsic::x86_mmx_pslli_w:
- NewIntNo = Intrinsic::x86_mmx_psll_w;
- break;
- case Intrinsic::x86_mmx_pslli_d:
- NewIntNo = Intrinsic::x86_mmx_psll_d;
- break;
- case Intrinsic::x86_mmx_pslli_q:
- NewIntNo = Intrinsic::x86_mmx_psll_q;
- break;
- case Intrinsic::x86_mmx_psrli_w:
- NewIntNo = Intrinsic::x86_mmx_psrl_w;
- break;
- case Intrinsic::x86_mmx_psrli_d:
- NewIntNo = Intrinsic::x86_mmx_psrl_d;
- break;
- case Intrinsic::x86_mmx_psrli_q:
- NewIntNo = Intrinsic::x86_mmx_psrl_q;
- break;
- case Intrinsic::x86_mmx_psrai_w:
- NewIntNo = Intrinsic::x86_mmx_psra_w;
- break;
- case Intrinsic::x86_mmx_psrai_d:
- NewIntNo = Intrinsic::x86_mmx_psra_d;
- break;
- default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
- }
+ case Intrinsic::x86_mmx_psrai_d:
+ NewIntNo = Intrinsic::x86_mmx_psra_d;
break;
- }
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
}
// The vector shift intrinsics with scalars uses 32b shift amounts but
// the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits
// to be zero.
- SDValue ShOps[4];
- ShOps[0] = ShAmt;
- ShOps[1] = DAG.getConstant(0, MVT::i32);
- if (ShAmtVT == MVT::v4i32) {
- ShOps[2] = DAG.getUNDEF(MVT::i32);
- ShOps[3] = DAG.getUNDEF(MVT::i32);
- ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 4);
- } else {
- ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2);
+ ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, ShAmt,
+ DAG.getConstant(0, MVT::i32));
// FIXME this must be lowered to get rid of the invalid type.
- }
EVT VT = Op.getValueType();
ShAmt = DAG.getNode(ISD::BITCAST, dl, VT, ShAmt);
@@ -10006,43 +9998,6 @@ SDValue X86TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
return Res;
}
-// getTargetVShiftNOde - Handle vector element shifts where the shift amount
-// may or may not be a constant. Takes immediate version of shift as input.
-static SDValue getTargetVShiftNode(unsigned Opc, DebugLoc dl, EVT VT,
- SDValue SrcOp, SDValue ShAmt,
- SelectionDAG &DAG) {
- assert(ShAmt.getValueType() == MVT::i32 && "ShAmt is not i32");
-
- if (isa<ConstantSDNode>(ShAmt)) {
- switch (Opc) {
- default: llvm_unreachable("Unknown target vector shift node");
- case X86ISD::VSHLI:
- case X86ISD::VSRLI:
- case X86ISD::VSRAI:
- return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
- }
- }
-
- // Change opcode to non-immediate version
- switch (Opc) {
- default: llvm_unreachable("Unknown target vector shift node");
- case X86ISD::VSHLI: Opc = X86ISD::VSHL; break;
- case X86ISD::VSRLI: Opc = X86ISD::VSRL; break;
- case X86ISD::VSRAI: Opc = X86ISD::VSRA; break;
- }
-
- // Need to build a vector containing shift amount
- // Shift amount is 32-bits, but SSE instructions read 64-bit, so fill with 0
- SDValue ShOps[4];
- ShOps[0] = ShAmt;
- ShOps[1] = DAG.getConstant(0, MVT::i32);
- ShOps[2] = DAG.getUNDEF(MVT::i32);
- ShOps[3] = DAG.getUNDEF(MVT::i32);
- ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &ShOps[0], 4);
- ShAmt = DAG.getNode(ISD::BITCAST, dl, VT, ShAmt);
- return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
-}
-
SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index da0fccc..ff10627 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -3511,8 +3511,9 @@ multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
}
multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
- string OpcodeStr, Intrinsic IntId,
- Intrinsic IntId2, RegisterClass RC,
+ string OpcodeStr, SDNode OpNode,
+ SDNode OpNode2, RegisterClass RC,
+ ValueType DstVT, ValueType SrcVT, PatFrag bc_frag,
bit Is2Addr = 1> {
// src2 is always 128-bit
def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
@@ -3520,19 +3521,20 @@ multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (IntId RC:$src1, VR128:$src2))]>;
+ [(set RC:$dst, (OpNode (DstVT RC:$src1), (SrcVT VR128:$src2)))]>;
def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, i128mem:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (IntId RC:$src1, (bitconvert (memopv2i64 addr:$src2))))]>;
+ [(set RC:$dst, (OpNode (DstVT RC:$src1),
+ (bc_frag (memopv2i64 addr:$src2))))]>;
def ri : PDIi8<opc2, ImmForm, (outs RC:$dst),
(ins RC:$src1, i32i8imm:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (IntId2 RC:$src1, (i32 imm:$src2)))]>;
+ [(set RC:$dst, (OpNode2 (DstVT RC:$src1), (i32 imm:$src2)))]>;
}
} // ExeDomain = SSEPackedInt
@@ -3728,32 +3730,24 @@ defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw,
//===---------------------------------------------------------------------===//
let Predicates = [HasAVX] in {
-defm VPSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw",
- int_x86_sse2_psll_w, int_x86_sse2_pslli_w,
- VR128, 0>, VEX_4V;
-defm VPSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "vpslld",
- int_x86_sse2_psll_d, int_x86_sse2_pslli_d,
- VR128, 0>, VEX_4V;
-defm VPSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "vpsllq",
- int_x86_sse2_psll_q, int_x86_sse2_pslli_q,
- VR128, 0>, VEX_4V;
-
-defm VPSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "vpsrlw",
- int_x86_sse2_psrl_w, int_x86_sse2_psrli_w,
- VR128, 0>, VEX_4V;
-defm VPSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "vpsrld",
- int_x86_sse2_psrl_d, int_x86_sse2_psrli_d,
- VR128, 0>, VEX_4V;
-defm VPSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "vpsrlq",
- int_x86_sse2_psrl_q, int_x86_sse2_psrli_q,
- VR128, 0>, VEX_4V;
-
-defm VPSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "vpsraw",
- int_x86_sse2_psra_w, int_x86_sse2_psrai_w,
- VR128, 0>, VEX_4V;
-defm VPSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad",
- int_x86_sse2_psra_d, int_x86_sse2_psrai_d,
- VR128, 0>, VEX_4V;
+defm VPSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli,
+ VR128, v8i16, v8i16, bc_v8i16, 0>, VEX_4V;
+defm VPSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli,
+ VR128, v4i32, v4i32, bc_v4i32, 0>, VEX_4V;
+defm VPSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli,
+ VR128, v2i64, v2i64, bc_v2i64, 0>, VEX_4V;
+
+defm VPSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli,
+ VR128, v8i16, v8i16, bc_v8i16, 0>, VEX_4V;
+defm VPSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli,
+ VR128, v4i32, v4i32, bc_v4i32, 0>, VEX_4V;
+defm VPSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli,
+ VR128, v2i64, v2i64, bc_v2i64, 0>, VEX_4V;
+
+defm VPSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai,
+ VR128, v8i16, v8i16, bc_v8i16, 0>, VEX_4V;
+defm VPSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
+ VR128, v4i32, v4i32, bc_v4i32, 0>, VEX_4V;
let ExeDomain = SSEPackedInt in {
// 128-bit logical shifts.
@@ -3774,32 +3768,24 @@ let ExeDomain = SSEPackedInt in {
} // Predicates = [HasAVX]
let Predicates = [HasAVX2] in {
-defm VPSLLWY : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw",
- int_x86_avx2_psll_w, int_x86_avx2_pslli_w,
- VR256, 0>, VEX_4V;
-defm VPSLLDY : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "vpslld",
- int_x86_avx2_psll_d, int_x86_avx2_pslli_d,
- VR256, 0>, VEX_4V;
-defm VPSLLQY : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "vpsllq",
- int_x86_avx2_psll_q, int_x86_avx2_pslli_q,
- VR256, 0>, VEX_4V;
-
-defm VPSRLWY : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "vpsrlw",
- int_x86_avx2_psrl_w, int_x86_avx2_psrli_w,
- VR256, 0>, VEX_4V;
-defm VPSRLDY : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "vpsrld",
- int_x86_avx2_psrl_d, int_x86_avx2_psrli_d,
- VR256, 0>, VEX_4V;
-defm VPSRLQY : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "vpsrlq",
- int_x86_avx2_psrl_q, int_x86_avx2_psrli_q,
- VR256, 0>, VEX_4V;
-
-defm VPSRAWY : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "vpsraw",
- int_x86_avx2_psra_w, int_x86_avx2_psrai_w,
- VR256, 0>, VEX_4V;
-defm VPSRADY : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad",
- int_x86_avx2_psra_d, int_x86_avx2_psrai_d,
- VR256, 0>, VEX_4V;
+defm VPSLLWY : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli,
+ VR256, v16i16, v8i16, bc_v8i16, 0>, VEX_4V;
+defm VPSLLDY : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli,
+ VR256, v8i32, v4i32, bc_v4i32, 0>, VEX_4V;
+defm VPSLLQY : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli,
+ VR256, v4i64, v2i64, bc_v2i64, 0>, VEX_4V;
+
+defm VPSRLWY : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli,
+ VR256, v16i16, v8i16, bc_v8i16, 0>, VEX_4V;
+defm VPSRLDY : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli,
+ VR256, v8i32, v4i32, bc_v4i32, 0>, VEX_4V;
+defm VPSRLQY : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli,
+ VR256, v4i64, v2i64, bc_v2i64, 0>, VEX_4V;
+
+defm VPSRAWY : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai,
+ VR256, v16i16, v8i16, bc_v8i16, 0>, VEX_4V;
+defm VPSRADY : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
+ VR256, v8i32, v4i32, bc_v4i32, 0>, VEX_4V;
let ExeDomain = SSEPackedInt in {
// 256-bit logical shifts.
@@ -3820,32 +3806,24 @@ let ExeDomain = SSEPackedInt in {
} // Predicates = [HasAVX2]
let Constraints = "$src1 = $dst" in {
-defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
- int_x86_sse2_psll_w, int_x86_sse2_pslli_w,
- VR128>;
-defm PSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld",
- int_x86_sse2_psll_d, int_x86_sse2_pslli_d,
- VR128>;
-defm PSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq",
- int_x86_sse2_psll_q, int_x86_sse2_pslli_q,
- VR128>;
-
-defm PSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
- int_x86_sse2_psrl_w, int_x86_sse2_psrli_w,
- VR128>;
-defm PSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld",
- int_x86_sse2_psrl_d, int_x86_sse2_psrli_d,
- VR128>;
-defm PSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq",
- int_x86_sse2_psrl_q, int_x86_sse2_psrli_q,
- VR128>;
-
-defm PSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
- int_x86_sse2_psra_w, int_x86_sse2_psrai_w,
- VR128>;
-defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
- int_x86_sse2_psra_d, int_x86_sse2_psrai_d,
- VR128>;
+defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw", X86vshl, X86vshli,
+ VR128, v8i16, v8i16, bc_v8i16>;
+defm PSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld", X86vshl, X86vshli,
+ VR128, v4i32, v4i32, bc_v4i32>;
+defm PSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq", X86vshl, X86vshli,
+ VR128, v2i64, v2i64, bc_v2i64>;
+
+defm PSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw", X86vsrl, X86vsrli,
+ VR128, v8i16, v8i16, bc_v8i16>;
+defm PSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld", X86vsrl, X86vsrli,
+ VR128, v4i32, v4i32, bc_v4i32>;
+defm PSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq", X86vsrl, X86vsrli,
+ VR128, v2i64, v2i64, bc_v2i64>;
+
+defm PSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw", X86vsra, X86vsrai,
+ VR128, v8i16, v8i16, bc_v8i16>;
+defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai,
+ VR128, v4i32, v4i32, bc_v4i32>;
let ExeDomain = SSEPackedInt in {
// 128-bit logical shifts.
@@ -3876,60 +3854,6 @@ let Predicates = [HasAVX] in {
(VPSLLDQri VR128:$src, (BYTE_imm imm:$amt))>;
def : Pat<(v2i64 (X86vshrdq VR128:$src, (i8 imm:$amt))),
(VPSRLDQri VR128:$src, (BYTE_imm imm:$amt))>;
-
- def : Pat<(v8i16 (X86vshli VR128:$src1, (i32 imm:$src2))),
- (VPSLLWri VR128:$src1, imm:$src2)>;
- def : Pat<(v4i32 (X86vshli VR128:$src1, (i32 imm:$src2))),
- (VPSLLDri VR128:$src1, imm:$src2)>;
- def : Pat<(v2i64 (X86vshli VR128:$src1, (i32 imm:$src2))),
- (VPSLLQri VR128:$src1, imm:$src2)>;
-
- def : Pat<(v8i16 (X86vsrli VR128:$src1, (i32 imm:$src2))),
- (VPSRLWri VR128:$src1, imm:$src2)>;
- def : Pat<(v4i32 (X86vsrli VR128:$src1, (i32 imm:$src2))),
- (VPSRLDri VR128:$src1, imm:$src2)>;
- def : Pat<(v2i64 (X86vsrli VR128:$src1, (i32 imm:$src2))),
- (VPSRLQri VR128:$src1, imm:$src2)>;
-
- def : Pat<(v8i16 (X86vsrai VR128:$src1, (i32 imm:$src2))),
- (VPSRAWri VR128:$src1, imm:$src2)>;
- def : Pat<(v4i32 (X86vsrai VR128:$src1, (i32 imm:$src2))),
- (VPSRADri VR128:$src1, imm:$src2)>;
-
- def : Pat<(v8i16 (X86vshl VR128:$src1, (v8i16 VR128:$src2))),
- (VPSLLWrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v8i16 (X86vshl VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))),
- (VPSLLWrm VR128:$src1, addr:$src2)>;
- def : Pat<(v4i32 (X86vshl VR128:$src1, (v4i32 VR128:$src2))),
- (VPSLLDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4i32 (X86vshl VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
- (VPSLLDrm VR128:$src1, addr:$src2)>;
- def : Pat<(v2i64 (X86vshl VR128:$src1, (v2i64 VR128:$src2))),
- (VPSLLQrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v2i64 (X86vshl VR128:$src1, (memopv2i64 addr:$src2))),
- (VPSLLQrm VR128:$src1, addr:$src2)>;
-
- def : Pat<(v8i16 (X86vsrl VR128:$src1, (v8i16 VR128:$src2))),
- (VPSRLWrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v8i16 (X86vsrl VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))),
- (VPSRLWrm VR128:$src1, addr:$src2)>;
- def : Pat<(v4i32 (X86vsrl VR128:$src1, (v4i32 VR128:$src2))),
- (VPSRLDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4i32 (X86vsrl VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
- (VPSRLDrm VR128:$src1, addr:$src2)>;
- def : Pat<(v2i64 (X86vsrl VR128:$src1, (v2i64 VR128:$src2))),
- (VPSRLQrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v2i64 (X86vsrl VR128:$src1, (memopv2i64 addr:$src2))),
- (VPSRLQrm VR128:$src1, addr:$src2)>;
-
- def : Pat<(v8i16 (X86vsra VR128:$src1, (v8i16 VR128:$src2))),
- (VPSRAWrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v8i16 (X86vsra VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))),
- (VPSRAWrm VR128:$src1, addr:$src2)>;
- def : Pat<(v4i32 (X86vsra VR128:$src1, (v4i32 VR128:$src2))),
- (VPSRADrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4i32 (X86vsra VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
- (VPSRADrm VR128:$src1, addr:$src2)>;
}
let Predicates = [HasAVX2] in {
@@ -3937,60 +3861,6 @@ let Predicates = [HasAVX2] in {
(VPSLLDQYri VR256:$src1, (BYTE_imm imm:$src2))>;
def : Pat<(int_x86_avx2_psrl_dq VR256:$src1, imm:$src2),
(VPSRLDQYri VR256:$src1, (BYTE_imm imm:$src2))>;
-
- def : Pat<(v16i16 (X86vshli VR256:$src1, (i32 imm:$src2))),
- (VPSLLWYri VR256:$src1, imm:$src2)>;
- def : Pat<(v8i32 (X86vshli VR256:$src1, (i32 imm:$src2))),
- (VPSLLDYri VR256:$src1, imm:$src2)>;
- def : Pat<(v4i64 (X86vshli VR256:$src1, (i32 imm:$src2))),
- (VPSLLQYri VR256:$src1, imm:$src2)>;
-
- def : Pat<(v16i16 (X86vsrli VR256:$src1, (i32 imm:$src2))),
- (VPSRLWYri VR256:$src1, imm:$src2)>;
- def : Pat<(v8i32 (X86vsrli VR256:$src1, (i32 imm:$src2))),
- (VPSRLDYri VR256:$src1, imm:$src2)>;
- def : Pat<(v4i64 (X86vsrli VR256:$src1, (i32 imm:$src2))),
- (VPSRLQYri VR256:$src1, imm:$src2)>;
-
- def : Pat<(v16i16 (X86vsrai VR256:$src1, (i32 imm:$src2))),
- (VPSRAWYri VR256:$src1, imm:$src2)>;
- def : Pat<(v8i32 (X86vsrai VR256:$src1, (i32 imm:$src2))),
- (VPSRADYri VR256:$src1, imm:$src2)>;
-
- def : Pat<(v16i16 (X86vshl VR256:$src1, (v8i16 VR128:$src2))),
- (VPSLLWYrr VR256:$src1, VR128:$src2)>;
- def : Pat<(v16i16 (X86vshl VR256:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))),
- (VPSLLWYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v8i32 (X86vshl VR256:$src1, (v4i32 VR128:$src2))),
- (VPSLLDYrr VR256:$src1, VR128:$src2)>;
- def : Pat<(v8i32 (X86vshl VR256:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
- (VPSLLDYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v4i64 (X86vshl VR256:$src1, (v2i64 VR128:$src2))),
- (VPSLLQYrr VR256:$src1, VR128:$src2)>;
- def : Pat<(v4i64 (X86vshl VR256:$src1, (memopv2i64 addr:$src2))),
- (VPSLLQYrm VR256:$src1, addr:$src2)>;
-
- def : Pat<(v16i16 (X86vsrl VR256:$src1, (v8i16 VR128:$src2))),
- (VPSRLWYrr VR256:$src1, VR128:$src2)>;
- def : Pat<(v16i16 (X86vsrl VR256:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))),
- (VPSRLWYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v8i32 (X86vsrl VR256:$src1, (v4i32 VR128:$src2))),
- (VPSRLDYrr VR256:$src1, VR128:$src2)>;
- def : Pat<(v8i32 (X86vsrl VR256:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
- (VPSRLDYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v4i64 (X86vsrl VR256:$src1, (v2i64 VR128:$src2))),
- (VPSRLQYrr VR256:$src1, VR128:$src2)>;
- def : Pat<(v4i64 (X86vsrl VR256:$src1, (memopv2i64 addr:$src2))),
- (VPSRLQYrm VR256:$src1, addr:$src2)>;
-
- def : Pat<(v16i16 (X86vsra VR256:$src1, (v8i16 VR128:$src2))),
- (VPSRAWYrr VR256:$src1, VR128:$src2)>;
- def : Pat<(v16i16 (X86vsra VR256:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))),
- (VPSRAWYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v8i32 (X86vsra VR256:$src1, (v4i32 VR128:$src2))),
- (VPSRADYrr VR256:$src1, VR128:$src2)>;
- def : Pat<(v8i32 (X86vsra VR256:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
- (VPSRADYrm VR256:$src1, addr:$src2)>;
}
let Predicates = [HasSSE2] in {
@@ -4006,60 +3876,6 @@ let Predicates = [HasSSE2] in {
(PSLLDQri VR128:$src, (BYTE_imm imm:$amt))>;
def : Pat<(v2i64 (X86vshrdq VR128:$src, (i8 imm:$amt))),
(PSRLDQri VR128:$src, (BYTE_imm imm:$amt))>;
-
- def : Pat<(v8i16 (X86vshli VR128:$src1, (i32 imm:$src2))),
- (PSLLWri VR128:$src1, imm:$src2)>;
- def : Pat<(v4i32 (X86vshli VR128:$src1, (i32 imm:$src2))),
- (PSLLDri VR128:$src1, imm:$src2)>;
- def : Pat<(v2i64 (X86vshli VR128:$src1, (i32 imm:$src2))),
- (PSLLQri VR128:$src1, imm:$src2)>;
-
- def : Pat<(v8i16 (X86vsrli VR128:$src1, (i32 imm:$src2))),
- (PSRLWri VR128:$src1, imm:$src2)>;
- def : Pat<(v4i32 (X86vsrli VR128:$src1, (i32 imm:$src2))),
- (PSRLDri VR128:$src1, imm:$src2)>;
- def : Pat<(v2i64 (X86vsrli VR128:$src1, (i32 imm:$src2))),
- (PSRLQri VR128:$src1, imm:$src2)>;
-
- def : Pat<(v8i16 (X86vsrai VR128:$src1, (i32 imm:$src2))),
- (PSRAWri VR128:$src1, imm:$src2)>;
- def : Pat<(v4i32 (X86vsrai VR128:$src1, (i32 imm:$src2))),
- (PSRADri VR128:$src1, imm:$src2)>;
-
- def : Pat<(v8i16 (X86vshl VR128:$src1, (v8i16 VR128:$src2))),
- (PSLLWrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v8i16 (X86vshl VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))),
- (PSLLWrm VR128:$src1, addr:$src2)>;
- def : Pat<(v4i32 (X86vshl VR128:$src1, (v4i32 VR128:$src2))),
- (PSLLDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4i32 (X86vshl VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
- (PSLLDrm VR128:$src1, addr:$src2)>;
- def : Pat<(v2i64 (X86vshl VR128:$src1, (v2i64 VR128:$src2))),
- (PSLLQrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v2i64 (X86vshl VR128:$src1, (memopv2i64 addr:$src2))),
- (PSLLQrm VR128:$src1, addr:$src2)>;
-
- def : Pat<(v8i16 (X86vsrl VR128:$src1, (v8i16 VR128:$src2))),
- (PSRLWrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v8i16 (X86vsrl VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))),
- (PSRLWrm VR128:$src1, addr:$src2)>;
- def : Pat<(v4i32 (X86vsrl VR128:$src1, (v4i32 VR128:$src2))),
- (PSRLDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4i32 (X86vsrl VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
- (PSRLDrm VR128:$src1, addr:$src2)>;
- def : Pat<(v2i64 (X86vsrl VR128:$src1, (v2i64 VR128:$src2))),
- (PSRLQrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v2i64 (X86vsrl VR128:$src1, (memopv2i64 addr:$src2))),
- (PSRLQrm VR128:$src1, addr:$src2)>;
-
- def : Pat<(v8i16 (X86vsra VR128:$src1, (v8i16 VR128:$src2))),
- (PSRAWrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v8i16 (X86vsra VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))),
- (PSRAWrm VR128:$src1, addr:$src2)>;
- def : Pat<(v4i32 (X86vsra VR128:$src1, (v4i32 VR128:$src2))),
- (PSRADrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4i32 (X86vsra VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
- (PSRADrm VR128:$src1, addr:$src2)>;
}
//===---------------------------------------------------------------------===//