aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp12
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp24
-rw-r--r--lib/Target/X86/X86InstrSSE.td97
3 files changed, 70 insertions, 63 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index dc1fd7f..368dbc2 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -9302,6 +9302,18 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
case Intrinsic::x86_avx_hsub_pd_256:
return DAG.getNode(X86ISD::FHSUB, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::x86_ssse3_phadd_w_128:
+ case Intrinsic::x86_ssse3_phadd_d_128:
+ case Intrinsic::x86_avx2_phadd_w:
+ case Intrinsic::x86_avx2_phadd_d:
+ return DAG.getNode(X86ISD::HADD, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::x86_ssse3_phsub_w_128:
+ case Intrinsic::x86_ssse3_phsub_d_128:
+ case Intrinsic::x86_avx2_phsub_w:
+ case Intrinsic::x86_avx2_phsub_d:
+ return DAG.getNode(X86ISD::HSUB, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_avx2_psllv_d:
case Intrinsic::x86_avx2_psllv_q:
case Intrinsic::x86_avx2_psllv_d_256:
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 86a2e2f..4b6e26b 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -719,12 +719,12 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::PCMPGTDrr, X86::PCMPGTDrm, TB_ALIGN_16 },
{ X86::PCMPGTQrr, X86::PCMPGTQrm, TB_ALIGN_16 },
{ X86::PCMPGTWrr, X86::PCMPGTWrm, TB_ALIGN_16 },
- { X86::PHADDDrr128, X86::PHADDDrm128, TB_ALIGN_16 },
- { X86::PHADDWrr128, X86::PHADDWrm128, TB_ALIGN_16 },
+ { X86::PHADDDrr, X86::PHADDDrm, TB_ALIGN_16 },
+ { X86::PHADDWrr, X86::PHADDWrm, TB_ALIGN_16 },
{ X86::PHADDSWrr128, X86::PHADDSWrm128, TB_ALIGN_16 },
- { X86::PHSUBDrr128, X86::PHSUBDrm128, TB_ALIGN_16 },
+ { X86::PHSUBDrr, X86::PHSUBDrm, TB_ALIGN_16 },
{ X86::PHSUBSWrr128, X86::PHSUBSWrm128, TB_ALIGN_16 },
- { X86::PHSUBWrr128, X86::PHSUBWrm128, TB_ALIGN_16 },
+ { X86::PHSUBWrr, X86::PHSUBWrm, TB_ALIGN_16 },
{ X86::PINSRWrri, X86::PINSRWrmi, TB_ALIGN_16 },
{ X86::PMADDUBSWrr128, X86::PMADDUBSWrm128, TB_ALIGN_16 },
{ X86::PMADDWDrr, X86::PMADDWDrm, TB_ALIGN_16 },
@@ -903,12 +903,12 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VPCMPGTDrr, X86::VPCMPGTDrm, TB_ALIGN_16 },
{ X86::VPCMPGTQrr, X86::VPCMPGTQrm, TB_ALIGN_16 },
{ X86::VPCMPGTWrr, X86::VPCMPGTWrm, TB_ALIGN_16 },
- { X86::VPHADDDrr128, X86::VPHADDDrm128, TB_ALIGN_16 },
+ { X86::VPHADDDrr, X86::VPHADDDrm, TB_ALIGN_16 },
{ X86::VPHADDSWrr128, X86::VPHADDSWrm128, TB_ALIGN_16 },
- { X86::VPHADDWrr128, X86::VPHADDWrm128, TB_ALIGN_16 },
- { X86::VPHSUBDrr128, X86::VPHSUBDrm128, TB_ALIGN_16 },
+ { X86::VPHADDWrr, X86::VPHADDWrm, TB_ALIGN_16 },
+ { X86::VPHSUBDrr, X86::VPHSUBDrm, TB_ALIGN_16 },
{ X86::VPHSUBSWrr128, X86::VPHSUBSWrm128, TB_ALIGN_16 },
- { X86::VPHSUBWrr128, X86::VPHSUBWrm128, TB_ALIGN_16 },
+ { X86::VPHSUBWrr, X86::VPHSUBWrm, TB_ALIGN_16 },
{ X86::VPERMILPDrr, X86::VPERMILPDrm, TB_ALIGN_16 },
{ X86::VPERMILPSrr, X86::VPERMILPSrm, TB_ALIGN_16 },
{ X86::VPINSRWrri, X86::VPINSRWrmi, TB_ALIGN_16 },
@@ -1047,12 +1047,12 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VPERMPDYrr, X86::VPERMPDYrm, TB_ALIGN_32 },
{ X86::VPERMPSYrr, X86::VPERMPSYrm, TB_ALIGN_32 },
{ X86::VPERMQYrr, X86::VPERMQYrm, TB_ALIGN_32 },
- { X86::VPHADDDrr256, X86::VPHADDDrm256, TB_ALIGN_32 },
+ { X86::VPHADDDYrr, X86::VPHADDDYrm, TB_ALIGN_32 },
{ X86::VPHADDSWrr256, X86::VPHADDSWrm256, TB_ALIGN_32 },
- { X86::VPHADDWrr256, X86::VPHADDWrm256, TB_ALIGN_32 },
- { X86::VPHSUBDrr256, X86::VPHSUBDrm256, TB_ALIGN_32 },
+ { X86::VPHADDWYrr, X86::VPHADDWYrm, TB_ALIGN_32 },
+ { X86::VPHSUBDYrr, X86::VPHSUBDYrm, TB_ALIGN_32 },
{ X86::VPHSUBSWrr256, X86::VPHSUBSWrm256, TB_ALIGN_32 },
- { X86::VPHSUBWrr256, X86::VPHSUBWrm256, TB_ALIGN_32 },
+ { X86::VPHSUBWYrr, X86::VPHSUBWYrm, TB_ALIGN_32 },
{ X86::VPMADDUBSWrr256, X86::VPMADDUBSWrm256, TB_ALIGN_32 },
{ X86::VPMADDWDYrr, X86::VPMADDWDYrm, TB_ALIGN_32 },
{ X86::VPMAXSWYrr, X86::VPMAXSWYrm, TB_ALIGN_32 },
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index f3d08cb..51b9601 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -5079,6 +5079,28 @@ defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd",
// SSSE3 - Packed Binary Operator Instructions
//===---------------------------------------------------------------------===//
+/// SS3I_binop_rm - Simple SSSE3 bin op
+multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
+ X86MemOperand x86memop, bit Is2Addr = 1> {
+ let isCommutable = 1 in
+ def rr : SS38I<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>,
+ OpSize;
+ def rm : SS38I<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst,
+ (OpVT (OpNode RC:$src1,
+ (bitconvert (memop_frag addr:$src2)))))]>, OpSize;
+}
+
/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}.
multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
Intrinsic IntId128, bit Is2Addr = 1> {
@@ -5118,16 +5140,16 @@ multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
let ImmT = NoImm, Predicates = [HasAVX] in {
let isCommutable = 0 in {
- defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw",
- int_x86_ssse3_phadd_w_128, 0>, VEX_4V;
- defm VPHADDD : SS3I_binop_rm_int<0x02, "vphaddd",
- int_x86_ssse3_phadd_d_128, 0>, VEX_4V;
+ defm VPHADDW : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, VR128,
+ memopv2i64, i128mem, 0>, VEX_4V;
+ defm VPHADDD : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, VR128,
+ memopv2i64, i128mem, 0>, VEX_4V;
+ defm VPHSUBW : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, VR128,
+ memopv2i64, i128mem, 0>, VEX_4V;
+ defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, VR128,
+ memopv2i64, i128mem, 0>, VEX_4V;
defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw",
int_x86_ssse3_phadd_sw_128, 0>, VEX_4V;
- defm VPHSUBW : SS3I_binop_rm_int<0x05, "vphsubw",
- int_x86_ssse3_phsub_w_128, 0>, VEX_4V;
- defm VPHSUBD : SS3I_binop_rm_int<0x06, "vphsubd",
- int_x86_ssse3_phsub_d_128, 0>, VEX_4V;
defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw",
int_x86_ssse3_phsub_sw_128, 0>, VEX_4V;
defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw",
@@ -5147,16 +5169,16 @@ defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw",
let ImmT = NoImm, Predicates = [HasAVX2] in {
let isCommutable = 0 in {
- defm VPHADDW : SS3I_binop_rm_int_y<0x01, "vphaddw",
- int_x86_avx2_phadd_w>, VEX_4V;
- defm VPHADDD : SS3I_binop_rm_int_y<0x02, "vphaddd",
- int_x86_avx2_phadd_d>, VEX_4V;
+ defm VPHADDWY : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, VR256,
+ memopv4i64, i256mem, 0>, VEX_4V;
+ defm VPHADDDY : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, VR256,
+ memopv4i64, i256mem, 0>, VEX_4V;
+ defm VPHSUBWY : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, VR256,
+ memopv4i64, i256mem, 0>, VEX_4V;
+ defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, VR256,
+ memopv4i64, i256mem, 0>, VEX_4V;
defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw",
int_x86_avx2_phadd_sw>, VEX_4V;
- defm VPHSUBW : SS3I_binop_rm_int_y<0x05, "vphsubw",
- int_x86_avx2_phsub_w>, VEX_4V;
- defm VPHSUBD : SS3I_binop_rm_int_y<0x06, "vphsubd",
- int_x86_avx2_phsub_d>, VEX_4V;
defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw",
int_x86_avx2_phsub_sw>, VEX_4V;
defm VPMADDUBSW : SS3I_binop_rm_int_y<0x04, "vpmaddubsw",
@@ -5177,16 +5199,16 @@ defm VPMULHRSW : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw",
// None of these have i8 immediate fields.
let ImmT = NoImm, Constraints = "$src1 = $dst" in {
let isCommutable = 0 in {
- defm PHADDW : SS3I_binop_rm_int<0x01, "phaddw",
- int_x86_ssse3_phadd_w_128>;
- defm PHADDD : SS3I_binop_rm_int<0x02, "phaddd",
- int_x86_ssse3_phadd_d_128>;
+ defm PHADDW : SS3I_binop_rm<0x01, "phaddw", X86hadd, v8i16, VR128,
+ memopv2i64, i128mem>;
+ defm PHADDD : SS3I_binop_rm<0x02, "phaddd", X86hadd, v4i32, VR128,
+ memopv2i64, i128mem>;
+ defm PHSUBW : SS3I_binop_rm<0x05, "phsubw", X86hsub, v8i16, VR128,
+ memopv2i64, i128mem>;
+ defm PHSUBD : SS3I_binop_rm<0x06, "phsubd", X86hsub, v4i32, VR128,
+ memopv2i64, i128mem>;
defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw",
int_x86_ssse3_phadd_sw_128>;
- defm PHSUBW : SS3I_binop_rm_int<0x05, "phsubw",
- int_x86_ssse3_phsub_w_128>;
- defm PHSUBD : SS3I_binop_rm_int<0x06, "phsubd",
- int_x86_ssse3_phsub_d_128>;
defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw",
int_x86_ssse3_phsub_sw_128>;
defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw",
@@ -5216,15 +5238,6 @@ let Predicates = [HasAVX] in {
(VPSIGNWrr128 VR128:$src1, VR128:$src2)>;
def : Pat<(v4i32 (X86psign VR128:$src1, VR128:$src2)),
(VPSIGNDrr128 VR128:$src1, VR128:$src2)>;
-
- def : Pat<(v8i16 (X86hadd VR128:$src1, VR128:$src2)),
- (VPHADDWrr128 VR128:$src1, VR128:$src2)>;
- def : Pat<(v4i32 (X86hadd VR128:$src1, VR128:$src2)),
- (VPHADDDrr128 VR128:$src1, VR128:$src2)>;
- def : Pat<(v8i16 (X86hsub VR128:$src1, VR128:$src2)),
- (VPHSUBWrr128 VR128:$src1, VR128:$src2)>;
- def : Pat<(v4i32 (X86hsub VR128:$src1, VR128:$src2)),
- (VPHSUBDrr128 VR128:$src1, VR128:$src2)>;
}
let Predicates = [HasAVX2] in {
@@ -5234,15 +5247,6 @@ let Predicates = [HasAVX2] in {
(VPSIGNWrr256 VR256:$src1, VR256:$src2)>;
def : Pat<(v8i32 (X86psign VR256:$src1, VR256:$src2)),
(VPSIGNDrr256 VR256:$src1, VR256:$src2)>;
-
- def : Pat<(v16i16 (X86hadd VR256:$src1, VR256:$src2)),
- (VPHADDWrr256 VR256:$src1, VR256:$src2)>;
- def : Pat<(v8i32 (X86hadd VR256:$src1, VR256:$src2)),
- (VPHADDDrr256 VR256:$src1, VR256:$src2)>;
- def : Pat<(v16i16 (X86hsub VR256:$src1, VR256:$src2)),
- (VPHSUBWrr256 VR256:$src1, VR256:$src2)>;
- def : Pat<(v8i32 (X86hsub VR256:$src1, VR256:$src2)),
- (VPHSUBDrr256 VR256:$src1, VR256:$src2)>;
}
let Predicates = [HasSSSE3] in {
@@ -5257,15 +5261,6 @@ let Predicates = [HasSSSE3] in {
(PSIGNWrr128 VR128:$src1, VR128:$src2)>;
def : Pat<(v4i32 (X86psign VR128:$src1, VR128:$src2)),
(PSIGNDrr128 VR128:$src1, VR128:$src2)>;
-
- def : Pat<(v8i16 (X86hadd VR128:$src1, VR128:$src2)),
- (PHADDWrr128 VR128:$src1, VR128:$src2)>;
- def : Pat<(v4i32 (X86hadd VR128:$src1, VR128:$src2)),
- (PHADDDrr128 VR128:$src1, VR128:$src2)>;
- def : Pat<(v8i16 (X86hsub VR128:$src1, VR128:$src2)),
- (PHSUBWrr128 VR128:$src1, VR128:$src2)>;
- def : Pat<(v4i32 (X86hsub VR128:$src1, VR128:$src2)),
- (PHSUBDrr128 VR128:$src1, VR128:$src2)>;
}
//===---------------------------------------------------------------------===//