diff options
author | Bruno Cardoso Lopes <bruno.cardoso@gmail.com> | 2010-07-02 23:27:59 +0000 |
---|---|---|
committer | Bruno Cardoso Lopes <bruno.cardoso@gmail.com> | 2010-07-02 23:27:59 +0000 |
commit | 03560600b4fcaa9896b60fd2f224572a1945426b (patch) | |
tree | fbe3aed534445ab5c66d6a1d4795828539f2ff1e | |
parent | ff23535df64ffdfa73540669ea642f6b84221217 (diff) | |
download | external_llvm-03560600b4fcaa9896b60fd2f224572a1945426b.zip external_llvm-03560600b4fcaa9896b60fd2f224572a1945426b.tar.gz external_llvm-03560600b4fcaa9896b60fd2f224572a1945426b.tar.bz2 |
Simple refactoring of SSE4.1 instructions, making room for the AVX forms
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@107540 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 268 |
1 files changed, 117 insertions, 151 deletions
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 31cf196..c0efb78 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3904,7 +3904,7 @@ def : Pat<(store (v16i8 VR128:$src), addr:$dst), (MOVUPSmr addr:$dst, VR128:$src)>; //===----------------------------------------------------------------------===// -// SSE4.1 Instructions +// SSE4.1 - Misc Instructions //===----------------------------------------------------------------------===// multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, @@ -3948,56 +3948,61 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, OpSize; } -let Constraints = "$src1 = $dst" in { multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd, string OpcodeStr, Intrinsic F32Int, - Intrinsic F64Int> { + Intrinsic F64Int, bit Is2Addr = 1> { // Intrinsic operation, reg. def SSr_Int : SS4AIi8<opcss, MRMSrcReg, - (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, - "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, - (F32Int VR128:$src1, VR128:$src2, imm:$src3))]>, - OpSize; + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), + !if(Is2Addr, + !strconcat(OpcodeStr, + "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(OpcodeStr, + "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2, imm:$src3))]>, + OpSize; // Intrinsic operation, mem. def SSm_Int : SS4AIi8<opcss, MRMSrcMem, - (outs VR128:$dst), - (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, - "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, - (F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>, - OpSize; + (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3), + !if(Is2Addr, + !strconcat(OpcodeStr, + "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(OpcodeStr, + "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + [(set VR128:$dst, + (F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>, + OpSize; // Intrinsic operation, reg. def SDr_Int : SS4AIi8<opcsd, MRMSrcReg, - (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, - "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, - (F64Int VR128:$src1, VR128:$src2, imm:$src3))]>, - OpSize; + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), + !if(Is2Addr, + !strconcat(OpcodeStr, + "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(OpcodeStr, + "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2, imm:$src3))]>, + OpSize; // Intrinsic operation, mem. def SDm_Int : SS4AIi8<opcsd, MRMSrcMem, - (outs VR128:$dst), - (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, - "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, - (F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>, - OpSize; -} + (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3), + !if(Is2Addr, + !strconcat(OpcodeStr, + "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(OpcodeStr, + "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + [(set VR128:$dst, + (F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>, + OpSize; } // FP round - roundss, roundps, roundsd, roundpd defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", int_x86_sse41_round_ps, int_x86_sse41_round_pd>; +let Constraints = "$src1 = $dst" in defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round", int_x86_sse41_round_ss, int_x86_sse41_round_sd>; @@ -4020,146 +4025,107 @@ defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw", int_x86_sse41_phminposuw>; /// SS41I_binop_rm_int - Simple SSE 4.1 binary operator -let Constraints = "$src1 = $dst" in { - multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr, - Intrinsic IntId128, bit Commutable = 0> { - def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, - OpSize { - let isCommutable = Commutable; - } - def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, - (IntId128 VR128:$src1, - (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; - } +multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr, + Intrinsic IntId128, bit Is2Addr = 1> { + let isCommutable = 1 in + def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, OpSize; + def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, + (IntId128 VR128:$src1, + (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; } -defm PCMPEQQ : SS41I_binop_rm_int<0x29, "pcmpeqq", - int_x86_sse41_pcmpeqq, 1>; -defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", - int_x86_sse41_packusdw, 0>; -defm PMINSB : SS41I_binop_rm_int<0x38, "pminsb", - int_x86_sse41_pminsb, 1>; -defm PMINSD : SS41I_binop_rm_int<0x39, "pminsd", - int_x86_sse41_pminsd, 1>; -defm PMINUD : SS41I_binop_rm_int<0x3B, "pminud", - int_x86_sse41_pminud, 1>; -defm PMINUW : SS41I_binop_rm_int<0x3A, "pminuw", - int_x86_sse41_pminuw, 1>; -defm PMAXSB : SS41I_binop_rm_int<0x3C, "pmaxsb", - int_x86_sse41_pmaxsb, 1>; -defm PMAXSD : SS41I_binop_rm_int<0x3D, "pmaxsd", - int_x86_sse41_pmaxsd, 1>; -defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud", - int_x86_sse41_pmaxud, 1>; -defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw", - int_x86_sse41_pmaxuw, 1>; - -defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq, 1>; +let Constraints = "$src1 = $dst" in { + let isCommutable = 0 in + defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>; + defm PCMPEQQ : SS41I_binop_rm_int<0x29, "pcmpeqq", int_x86_sse41_pcmpeqq>; + defm PMINSB : SS41I_binop_rm_int<0x38, "pminsb", int_x86_sse41_pminsb>; + defm PMINSD : SS41I_binop_rm_int<0x39, "pminsd", int_x86_sse41_pminsd>; + defm PMINUD : SS41I_binop_rm_int<0x3B, "pminud", int_x86_sse41_pminud>; + defm PMINUW : SS41I_binop_rm_int<0x3A, "pminuw", int_x86_sse41_pminuw>; + defm PMAXSB : SS41I_binop_rm_int<0x3C, "pmaxsb", int_x86_sse41_pmaxsb>; + defm PMAXSD : SS41I_binop_rm_int<0x3D, "pmaxsd", int_x86_sse41_pmaxsd>; + defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud", int_x86_sse41_pmaxud>; + defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw", int_x86_sse41_pmaxuw>; + defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>; +} def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)), (PCMPEQQrr VR128:$src1, VR128:$src2)>; def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))), (PCMPEQQrm VR128:$src1, addr:$src2)>; -/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator -let Constraints = "$src1 = $dst" in { - multiclass SS41I_binop_patint<bits<8> opc, string OpcodeStr, ValueType OpVT, - SDNode OpNode, Intrinsic IntId128, - bit Commutable = 0> { - def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (OpNode (OpVT VR128:$src1), - VR128:$src2))]>, OpSize { - let isCommutable = Commutable; - } - def rr_int : SS48I<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, - OpSize { - let isCommutable = Commutable; - } - def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, - (OpVT (OpNode VR128:$src1, (memop addr:$src2))))]>, OpSize; - def rm_int : SS48I<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, - (IntId128 VR128:$src1, (memop addr:$src2)))]>, - OpSize; - } -} - /// SS48I_binop_rm - Simple SSE41 binary operator. -let Constraints = "$src1 = $dst" in { multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, - ValueType OpVT, bit Commutable = 0> { + ValueType OpVT, bit Is2Addr = 1> { + let isCommutable = 1 in def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>, - OpSize { - let isCommutable = Commutable; - } + (ins VR128:$src1, VR128:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>, + OpSize; def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (OpNode VR128:$src1, + (ins VR128:$src1, i128mem:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (OpNode VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2))))]>, - OpSize; -} + OpSize; } -defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, 1>; +let Constraints = "$src1 = $dst" in + defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32>; /// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate +multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr, + Intrinsic IntId128, bit Is2Addr = 1> { + let isCommutable = 1 in + def rri : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), + !if(Is2Addr, + !strconcat(OpcodeStr, + "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + [(set VR128:$dst, + (IntId128 VR128:$src1, VR128:$src2, imm:$src3))]>, + OpSize; + def rmi : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2, i32i8imm:$src3), + !if(Is2Addr, + !strconcat(OpcodeStr, + "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + [(set VR128:$dst, + (IntId128 VR128:$src1, + (bitconvert (memopv16i8 addr:$src2)), imm:$src3))]>, + OpSize; +} + let Constraints = "$src1 = $dst" in { - multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr, - Intrinsic IntId128, bit Commutable = 0> { - def rri : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, - (IntId128 VR128:$src1, VR128:$src2, imm:$src3))]>, - OpSize { - let isCommutable = Commutable; - } - def rmi : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, - (IntId128 VR128:$src1, - (bitconvert (memopv16i8 addr:$src2)), imm:$src3))]>, - OpSize; + let isCommutable = 0 in { + defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps>; + defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd>; + defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw>; + defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw>; } + defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps>; + defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd>; } -defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", - int_x86_sse41_blendps, 0>; -defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", - int_x86_sse41_blendpd, 0>; -defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", - int_x86_sse41_pblendw, 0>; -defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", - int_x86_sse41_dpps, 1>; -defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", - int_x86_sse41_dppd, 1>; -defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", - int_x86_sse41_mpsadbw, 0>; - - /// SS41I_ternary_int - SSE 4.1 ternary operator let Uses = [XMM0], Constraints = "$src1 = $dst" in { multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, Intrinsic IntId> { |