aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBruno Cardoso Lopes <bruno.cardoso@gmail.com>2010-07-02 23:27:59 +0000
committerBruno Cardoso Lopes <bruno.cardoso@gmail.com>2010-07-02 23:27:59 +0000
commit03560600b4fcaa9896b60fd2f224572a1945426b (patch)
treefbe3aed534445ab5c66d6a1d4795828539f2ff1e
parentff23535df64ffdfa73540669ea642f6b84221217 (diff)
downloadexternal_llvm-03560600b4fcaa9896b60fd2f224572a1945426b.zip
external_llvm-03560600b4fcaa9896b60fd2f224572a1945426b.tar.gz
external_llvm-03560600b4fcaa9896b60fd2f224572a1945426b.tar.bz2
Simple refactoring of SSE4.1 instructions, making room for the AVX forms
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@107540 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86InstrSSE.td268
1 files changed, 117 insertions, 151 deletions
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 31cf196..c0efb78 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -3904,7 +3904,7 @@ def : Pat<(store (v16i8 VR128:$src), addr:$dst),
(MOVUPSmr addr:$dst, VR128:$src)>;
//===----------------------------------------------------------------------===//
-// SSE4.1 Instructions
+// SSE4.1 - Misc Instructions
//===----------------------------------------------------------------------===//
multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd,
@@ -3948,56 +3948,61 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd,
OpSize;
}
-let Constraints = "$src1 = $dst" in {
multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
string OpcodeStr,
Intrinsic F32Int,
- Intrinsic F64Int> {
+ Intrinsic F64Int, bit Is2Addr = 1> {
// Intrinsic operation, reg.
def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
- (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
- (F32Int VR128:$src1, VR128:$src2, imm:$src3))]>,
- OpSize;
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2, imm:$src3))]>,
+ OpSize;
// Intrinsic operation, mem.
def SSm_Int : SS4AIi8<opcss, MRMSrcMem,
- (outs VR128:$dst),
- (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
- (F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>,
- OpSize;
+ (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>,
+ OpSize;
// Intrinsic operation, reg.
def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
- (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
- (F64Int VR128:$src1, VR128:$src2, imm:$src3))]>,
- OpSize;
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2, imm:$src3))]>,
+ OpSize;
// Intrinsic operation, mem.
def SDm_Int : SS4AIi8<opcsd, MRMSrcMem,
- (outs VR128:$dst),
- (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
- (F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>,
- OpSize;
-}
+ (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>,
+ OpSize;
}
// FP round - roundss, roundps, roundsd, roundpd
defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round",
int_x86_sse41_round_ps, int_x86_sse41_round_pd>;
+let Constraints = "$src1 = $dst" in
defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round",
int_x86_sse41_round_ss, int_x86_sse41_round_sd>;
@@ -4020,146 +4025,107 @@ defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw",
int_x86_sse41_phminposuw>;
/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
-let Constraints = "$src1 = $dst" in {
- multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr,
- Intrinsic IntId128, bit Commutable = 0> {
- def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
- OpSize {
- let isCommutable = Commutable;
- }
- def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst,
- (IntId128 VR128:$src1,
- (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
- }
+multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId128, bit Is2Addr = 1> {
+ let isCommutable = 1 in
+ def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, OpSize;
+ def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1,
+ (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
}
-defm PCMPEQQ : SS41I_binop_rm_int<0x29, "pcmpeqq",
- int_x86_sse41_pcmpeqq, 1>;
-defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw",
- int_x86_sse41_packusdw, 0>;
-defm PMINSB : SS41I_binop_rm_int<0x38, "pminsb",
- int_x86_sse41_pminsb, 1>;
-defm PMINSD : SS41I_binop_rm_int<0x39, "pminsd",
- int_x86_sse41_pminsd, 1>;
-defm PMINUD : SS41I_binop_rm_int<0x3B, "pminud",
- int_x86_sse41_pminud, 1>;
-defm PMINUW : SS41I_binop_rm_int<0x3A, "pminuw",
- int_x86_sse41_pminuw, 1>;
-defm PMAXSB : SS41I_binop_rm_int<0x3C, "pmaxsb",
- int_x86_sse41_pmaxsb, 1>;
-defm PMAXSD : SS41I_binop_rm_int<0x3D, "pmaxsd",
- int_x86_sse41_pmaxsd, 1>;
-defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud",
- int_x86_sse41_pmaxud, 1>;
-defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw",
- int_x86_sse41_pmaxuw, 1>;
-
-defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq, 1>;
+let Constraints = "$src1 = $dst" in {
+ let isCommutable = 0 in
+ defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>;
+ defm PCMPEQQ : SS41I_binop_rm_int<0x29, "pcmpeqq", int_x86_sse41_pcmpeqq>;
+ defm PMINSB : SS41I_binop_rm_int<0x38, "pminsb", int_x86_sse41_pminsb>;
+ defm PMINSD : SS41I_binop_rm_int<0x39, "pminsd", int_x86_sse41_pminsd>;
+ defm PMINUD : SS41I_binop_rm_int<0x3B, "pminud", int_x86_sse41_pminud>;
+ defm PMINUW : SS41I_binop_rm_int<0x3A, "pminuw", int_x86_sse41_pminuw>;
+ defm PMAXSB : SS41I_binop_rm_int<0x3C, "pmaxsb", int_x86_sse41_pmaxsb>;
+ defm PMAXSD : SS41I_binop_rm_int<0x3D, "pmaxsd", int_x86_sse41_pmaxsd>;
+ defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud", int_x86_sse41_pmaxud>;
+ defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw", int_x86_sse41_pmaxuw>;
+ defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>;
+}
def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)),
(PCMPEQQrr VR128:$src1, VR128:$src2)>;
def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))),
(PCMPEQQrm VR128:$src1, addr:$src2)>;
-/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
-let Constraints = "$src1 = $dst" in {
- multiclass SS41I_binop_patint<bits<8> opc, string OpcodeStr, ValueType OpVT,
- SDNode OpNode, Intrinsic IntId128,
- bit Commutable = 0> {
- def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (OpNode (OpVT VR128:$src1),
- VR128:$src2))]>, OpSize {
- let isCommutable = Commutable;
- }
- def rr_int : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
- OpSize {
- let isCommutable = Commutable;
- }
- def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst,
- (OpVT (OpNode VR128:$src1, (memop addr:$src2))))]>, OpSize;
- def rm_int : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst,
- (IntId128 VR128:$src1, (memop addr:$src2)))]>,
- OpSize;
- }
-}
-
/// SS48I_binop_rm - Simple SSE41 binary operator.
-let Constraints = "$src1 = $dst" in {
multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- ValueType OpVT, bit Commutable = 0> {
+ ValueType OpVT, bit Is2Addr = 1> {
+ let isCommutable = 1 in
def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>,
- OpSize {
- let isCommutable = Commutable;
- }
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>,
+ OpSize;
def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (OpNode VR128:$src1,
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (OpNode VR128:$src1,
(bc_v4i32 (memopv2i64 addr:$src2))))]>,
- OpSize;
-}
+ OpSize;
}
-defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, 1>;
+let Constraints = "$src1 = $dst" in
+ defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32>;
/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
+multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId128, bit Is2Addr = 1> {
+ let isCommutable = 1 in
+ def rri : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1, VR128:$src2, imm:$src3))]>,
+ OpSize;
+ def rmi : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1,
+ (bitconvert (memopv16i8 addr:$src2)), imm:$src3))]>,
+ OpSize;
+}
+
let Constraints = "$src1 = $dst" in {
- multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
- Intrinsic IntId128, bit Commutable = 0> {
- def rri : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
- (IntId128 VR128:$src1, VR128:$src2, imm:$src3))]>,
- OpSize {
- let isCommutable = Commutable;
- }
- def rmi : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
- (IntId128 VR128:$src1,
- (bitconvert (memopv16i8 addr:$src2)), imm:$src3))]>,
- OpSize;
+ let isCommutable = 0 in {
+ defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps>;
+ defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd>;
+ defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw>;
+ defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw>;
}
+ defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps>;
+ defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd>;
}
-defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps",
- int_x86_sse41_blendps, 0>;
-defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd",
- int_x86_sse41_blendpd, 0>;
-defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw",
- int_x86_sse41_pblendw, 0>;
-defm DPPS : SS41I_binop_rmi_int<0x40, "dpps",
- int_x86_sse41_dpps, 1>;
-defm DPPD : SS41I_binop_rmi_int<0x41, "dppd",
- int_x86_sse41_dppd, 1>;
-defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw",
- int_x86_sse41_mpsadbw, 0>;
-
-
/// SS41I_ternary_int - SSE 4.1 ternary operator
let Uses = [XMM0], Constraints = "$src1 = $dst" in {
multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, Intrinsic IntId> {