diff options
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 12 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.cpp | 24 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 97 |
3 files changed, 70 insertions, 63 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index dc1fd7f..368dbc2 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -9302,6 +9302,18 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const case Intrinsic::x86_avx_hsub_pd_256: return DAG.getNode(X86ISD::FHSUB, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_ssse3_phadd_w_128: + case Intrinsic::x86_ssse3_phadd_d_128: + case Intrinsic::x86_avx2_phadd_w: + case Intrinsic::x86_avx2_phadd_d: + return DAG.getNode(X86ISD::HADD, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_ssse3_phsub_w_128: + case Intrinsic::x86_ssse3_phsub_d_128: + case Intrinsic::x86_avx2_phsub_w: + case Intrinsic::x86_avx2_phsub_d: + return DAG.getNode(X86ISD::HSUB, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); case Intrinsic::x86_avx2_psllv_d: case Intrinsic::x86_avx2_psllv_q: case Intrinsic::x86_avx2_psllv_d_256: diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 86a2e2f..4b6e26b 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -719,12 +719,12 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::PCMPGTDrr, X86::PCMPGTDrm, TB_ALIGN_16 }, { X86::PCMPGTQrr, X86::PCMPGTQrm, TB_ALIGN_16 }, { X86::PCMPGTWrr, X86::PCMPGTWrm, TB_ALIGN_16 }, - { X86::PHADDDrr128, X86::PHADDDrm128, TB_ALIGN_16 }, - { X86::PHADDWrr128, X86::PHADDWrm128, TB_ALIGN_16 }, + { X86::PHADDDrr, X86::PHADDDrm, TB_ALIGN_16 }, + { X86::PHADDWrr, X86::PHADDWrm, TB_ALIGN_16 }, { X86::PHADDSWrr128, X86::PHADDSWrm128, TB_ALIGN_16 }, - { X86::PHSUBDrr128, X86::PHSUBDrm128, TB_ALIGN_16 }, + { X86::PHSUBDrr, X86::PHSUBDrm, TB_ALIGN_16 }, { X86::PHSUBSWrr128, X86::PHSUBSWrm128, TB_ALIGN_16 }, - { X86::PHSUBWrr128, X86::PHSUBWrm128, TB_ALIGN_16 }, + { X86::PHSUBWrr, X86::PHSUBWrm, TB_ALIGN_16 }, { X86::PINSRWrri, X86::PINSRWrmi, TB_ALIGN_16 }, { X86::PMADDUBSWrr128, X86::PMADDUBSWrm128, TB_ALIGN_16 }, { X86::PMADDWDrr, X86::PMADDWDrm, TB_ALIGN_16 }, @@ -903,12 +903,12 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VPCMPGTDrr, X86::VPCMPGTDrm, TB_ALIGN_16 }, { X86::VPCMPGTQrr, X86::VPCMPGTQrm, TB_ALIGN_16 }, { X86::VPCMPGTWrr, X86::VPCMPGTWrm, TB_ALIGN_16 }, - { X86::VPHADDDrr128, X86::VPHADDDrm128, TB_ALIGN_16 }, + { X86::VPHADDDrr, X86::VPHADDDrm, TB_ALIGN_16 }, { X86::VPHADDSWrr128, X86::VPHADDSWrm128, TB_ALIGN_16 }, - { X86::VPHADDWrr128, X86::VPHADDWrm128, TB_ALIGN_16 }, - { X86::VPHSUBDrr128, X86::VPHSUBDrm128, TB_ALIGN_16 }, + { X86::VPHADDWrr, X86::VPHADDWrm, TB_ALIGN_16 }, + { X86::VPHSUBDrr, X86::VPHSUBDrm, TB_ALIGN_16 }, { X86::VPHSUBSWrr128, X86::VPHSUBSWrm128, TB_ALIGN_16 }, - { X86::VPHSUBWrr128, X86::VPHSUBWrm128, TB_ALIGN_16 }, + { X86::VPHSUBWrr, X86::VPHSUBWrm, TB_ALIGN_16 }, { X86::VPERMILPDrr, X86::VPERMILPDrm, TB_ALIGN_16 }, { X86::VPERMILPSrr, X86::VPERMILPSrm, TB_ALIGN_16 }, { X86::VPINSRWrri, X86::VPINSRWrmi, TB_ALIGN_16 }, @@ -1047,12 +1047,12 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VPERMPDYrr, X86::VPERMPDYrm, TB_ALIGN_32 }, { X86::VPERMPSYrr, X86::VPERMPSYrm, TB_ALIGN_32 }, { X86::VPERMQYrr, X86::VPERMQYrm, TB_ALIGN_32 }, - { X86::VPHADDDrr256, X86::VPHADDDrm256, TB_ALIGN_32 }, + { X86::VPHADDDYrr, X86::VPHADDDYrm, TB_ALIGN_32 }, { X86::VPHADDSWrr256, X86::VPHADDSWrm256, TB_ALIGN_32 }, - { X86::VPHADDWrr256, X86::VPHADDWrm256, TB_ALIGN_32 }, - { X86::VPHSUBDrr256, X86::VPHSUBDrm256, TB_ALIGN_32 }, + { X86::VPHADDWYrr, X86::VPHADDWYrm, TB_ALIGN_32 }, + { X86::VPHSUBDYrr, X86::VPHSUBDYrm, TB_ALIGN_32 }, { X86::VPHSUBSWrr256, X86::VPHSUBSWrm256, TB_ALIGN_32 }, - { X86::VPHSUBWrr256, X86::VPHSUBWrm256, TB_ALIGN_32 }, + { X86::VPHSUBWYrr, X86::VPHSUBWYrm, TB_ALIGN_32 }, { X86::VPMADDUBSWrr256, X86::VPMADDUBSWrm256, TB_ALIGN_32 }, { X86::VPMADDWDYrr, X86::VPMADDWDYrm, TB_ALIGN_32 }, { X86::VPMAXSWYrr, X86::VPMAXSWYrm, TB_ALIGN_32 }, diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index f3d08cb..51b9601 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -5079,6 +5079,28 @@ defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", // SSSE3 - Packed Binary Operator Instructions //===---------------------------------------------------------------------===// +/// SS3I_binop_rm - Simple SSSE3 bin op +multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, + ValueType OpVT, RegisterClass RC, PatFrag memop_frag, + X86MemOperand x86memop, bit Is2Addr = 1> { + let isCommutable = 1 in + def rr : SS38I<opc, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, RC:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, + OpSize; + def rm : SS38I<opc, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, x86memop:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set RC:$dst, + (OpVT (OpNode RC:$src1, + (bitconvert (memop_frag addr:$src2)))))]>, OpSize; +} + /// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}. multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId128, bit Is2Addr = 1> { @@ -5118,16 +5140,16 @@ multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr, let ImmT = NoImm, Predicates = [HasAVX] in { let isCommutable = 0 in { - defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw", - int_x86_ssse3_phadd_w_128, 0>, VEX_4V; - defm VPHADDD : SS3I_binop_rm_int<0x02, "vphaddd", - int_x86_ssse3_phadd_d_128, 0>, VEX_4V; + defm VPHADDW : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, VR128, + memopv2i64, i128mem, 0>, VEX_4V; + defm VPHADDD : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, VR128, + memopv2i64, i128mem, 0>, VEX_4V; + defm VPHSUBW : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, VR128, + memopv2i64, i128mem, 0>, VEX_4V; + defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, VR128, + memopv2i64, i128mem, 0>, VEX_4V; defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", int_x86_ssse3_phadd_sw_128, 0>, VEX_4V; - defm VPHSUBW : SS3I_binop_rm_int<0x05, "vphsubw", - int_x86_ssse3_phsub_w_128, 0>, VEX_4V; - defm VPHSUBD : SS3I_binop_rm_int<0x06, "vphsubd", - int_x86_ssse3_phsub_d_128, 0>, VEX_4V; defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", int_x86_ssse3_phsub_sw_128, 0>, VEX_4V; defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", @@ -5147,16 +5169,16 @@ defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", let ImmT = NoImm, Predicates = [HasAVX2] in { let isCommutable = 0 in { - defm VPHADDW : SS3I_binop_rm_int_y<0x01, "vphaddw", - int_x86_avx2_phadd_w>, VEX_4V; - defm VPHADDD : SS3I_binop_rm_int_y<0x02, "vphaddd", - int_x86_avx2_phadd_d>, VEX_4V; + defm VPHADDWY : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, VR256, + memopv4i64, i256mem, 0>, VEX_4V; + defm VPHADDDY : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, VR256, + memopv4i64, i256mem, 0>, VEX_4V; + defm VPHSUBWY : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, VR256, + memopv4i64, i256mem, 0>, VEX_4V; + defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, VR256, + memopv4i64, i256mem, 0>, VEX_4V; defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw", int_x86_avx2_phadd_sw>, VEX_4V; - defm VPHSUBW : SS3I_binop_rm_int_y<0x05, "vphsubw", - int_x86_avx2_phsub_w>, VEX_4V; - defm VPHSUBD : SS3I_binop_rm_int_y<0x06, "vphsubd", - int_x86_avx2_phsub_d>, VEX_4V; defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw", int_x86_avx2_phsub_sw>, VEX_4V; defm VPMADDUBSW : SS3I_binop_rm_int_y<0x04, "vpmaddubsw", @@ -5177,16 +5199,16 @@ defm VPMULHRSW : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw", // None of these have i8 immediate fields. let ImmT = NoImm, Constraints = "$src1 = $dst" in { let isCommutable = 0 in { - defm PHADDW : SS3I_binop_rm_int<0x01, "phaddw", - int_x86_ssse3_phadd_w_128>; - defm PHADDD : SS3I_binop_rm_int<0x02, "phaddd", - int_x86_ssse3_phadd_d_128>; + defm PHADDW : SS3I_binop_rm<0x01, "phaddw", X86hadd, v8i16, VR128, + memopv2i64, i128mem>; + defm PHADDD : SS3I_binop_rm<0x02, "phaddd", X86hadd, v4i32, VR128, + memopv2i64, i128mem>; + defm PHSUBW : SS3I_binop_rm<0x05, "phsubw", X86hsub, v8i16, VR128, + memopv2i64, i128mem>; + defm PHSUBD : SS3I_binop_rm<0x06, "phsubd", X86hsub, v4i32, VR128, + memopv2i64, i128mem>; defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", int_x86_ssse3_phadd_sw_128>; - defm PHSUBW : SS3I_binop_rm_int<0x05, "phsubw", - int_x86_ssse3_phsub_w_128>; - defm PHSUBD : SS3I_binop_rm_int<0x06, "phsubd", - int_x86_ssse3_phsub_d_128>; defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", int_x86_ssse3_phsub_sw_128>; defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", @@ -5216,15 +5238,6 @@ let Predicates = [HasAVX] in { (VPSIGNWrr128 VR128:$src1, VR128:$src2)>; def : Pat<(v4i32 (X86psign VR128:$src1, VR128:$src2)), (VPSIGNDrr128 VR128:$src1, VR128:$src2)>; - - def : Pat<(v8i16 (X86hadd VR128:$src1, VR128:$src2)), - (VPHADDWrr128 VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86hadd VR128:$src1, VR128:$src2)), - (VPHADDDrr128 VR128:$src1, VR128:$src2)>; - def : Pat<(v8i16 (X86hsub VR128:$src1, VR128:$src2)), - (VPHSUBWrr128 VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86hsub VR128:$src1, VR128:$src2)), - (VPHSUBDrr128 VR128:$src1, VR128:$src2)>; } let Predicates = [HasAVX2] in { @@ -5234,15 +5247,6 @@ let Predicates = [HasAVX2] in { (VPSIGNWrr256 VR256:$src1, VR256:$src2)>; def : Pat<(v8i32 (X86psign VR256:$src1, VR256:$src2)), (VPSIGNDrr256 VR256:$src1, VR256:$src2)>; - - def : Pat<(v16i16 (X86hadd VR256:$src1, VR256:$src2)), - (VPHADDWrr256 VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (X86hadd VR256:$src1, VR256:$src2)), - (VPHADDDrr256 VR256:$src1, VR256:$src2)>; - def : Pat<(v16i16 (X86hsub VR256:$src1, VR256:$src2)), - (VPHSUBWrr256 VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (X86hsub VR256:$src1, VR256:$src2)), - (VPHSUBDrr256 VR256:$src1, VR256:$src2)>; } let Predicates = [HasSSSE3] in { @@ -5257,15 +5261,6 @@ let Predicates = [HasSSSE3] in { (PSIGNWrr128 VR128:$src1, VR128:$src2)>; def : Pat<(v4i32 (X86psign VR128:$src1, VR128:$src2)), (PSIGNDrr128 VR128:$src1, VR128:$src2)>; - - def : Pat<(v8i16 (X86hadd VR128:$src1, VR128:$src2)), - (PHADDWrr128 VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86hadd VR128:$src1, VR128:$src2)), - (PHADDDrr128 VR128:$src1, VR128:$src2)>; - def : Pat<(v8i16 (X86hsub VR128:$src1, VR128:$src2)), - (PHSUBWrr128 VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86hsub VR128:$src1, VR128:$src2)), - (PHSUBDrr128 VR128:$src1, VR128:$src2)>; } //===---------------------------------------------------------------------===// |