aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBruno Cardoso Lopes <bruno.cardoso@gmail.com>2010-06-19 00:37:31 +0000
committerBruno Cardoso Lopes <bruno.cardoso@gmail.com>2010-06-19 00:37:31 +0000
commitbe4d595afd6e0d1a1c0a511dabf2c5a724bdf366 (patch)
tree5999f608fcf3bd19475aa6c2c52803ff10f08524
parent617e595022504652c083421dbb49a9c5d95aba23 (diff)
downloadexternal_llvm-be4d595afd6e0d1a1c0a511dabf2c5a724bdf366.zip
external_llvm-be4d595afd6e0d1a1c0a511dabf2c5a724bdf366.tar.gz
external_llvm-be4d595afd6e0d1a1c0a511dabf2c5a724bdf366.tar.bz2
Shrink down code and add for free AVX {MIN,MAX}P{S,D}{rm,rr} instructions
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@106366 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86InstrSSE.td87
-rw-r--r--test/MC/AsmParser/X86/x86_32-encoding.s32
-rw-r--r--test/MC/AsmParser/X86/x86_64-encoding.s32
3 files changed, 92 insertions, 59 deletions
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 1c84cc7..88c4771 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -788,6 +788,24 @@ multiclass sse12_fp_binop_rm<bits<8> opc, string OpcodeStr,
defm V#NAME#SD : sse12_fp_scalar<opc,
!strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
OpNode, FR64, f64mem>, XD, VEX_4V;
+
+ defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr,
+ "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), OpNode,
+ VR128, v4f32, f128mem, memopv4f32, SSEPackedSingle>,
+ VEX_4V;
+
+ defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr,
+ "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), OpNode,
+ VR128, v2f64, f128mem, memopv2f64, SSEPackedDouble>,
+ OpSize, VEX_4V;
+
+ defm V#NAME#SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
+ !strconcat(OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ "", "_ss", ssmem, sse_load_f32>, XS, VEX_4V;
+
+ defm V#NAME#SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
+ !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ "2", "_sd", sdmem, sse_load_f64>, XD, VEX_4V;
}
let Constraints = "$src1 = $dst" in {
@@ -798,72 +816,23 @@ multiclass sse12_fp_binop_rm<bits<8> opc, string OpcodeStr,
defm SD : sse12_fp_scalar<opc,
!strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
OpNode, FR64, f64mem>, XD;
- }
-
- // Vector operation, reg+reg.
- def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (v4f32 (OpNode VR128:$src1, VR128:$src2)))]> {
- let isCommutable = Commutable;
- }
-
- def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (v2f64 (OpNode VR128:$src1, VR128:$src2)))]> {
- let isCommutable = Commutable;
- }
-
- // Vector operation, reg+mem.
- def PSrm : PSI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f128mem:$src2),
- !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (OpNode VR128:$src1, (memopv4f32 addr:$src2)))]>;
+ defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr,
+ "ps\t{$src2, $dst|$dst, $src2}"), OpNode, VR128, v4f32,
+ f128mem, memopv4f32, SSEPackedSingle>, TB;
- def PDrm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f128mem:$src2),
- !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (OpNode VR128:$src1, (memopv2f64 addr:$src2)))]>;
+ defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr,
+ "pd\t{$src2, $dst|$dst, $src2}"), OpNode, VR128, v2f64,
+ f128mem, memopv2f64, SSEPackedDouble>, TB, OpSize;
- // Intrinsic operation, reg+reg.
- def SSrr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
+ defm SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
!strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (!nameconcat<Intrinsic>("int_x86_sse_",
- !strconcat(OpcodeStr, "_ss")) VR128:$src1,
- VR128:$src2))]> {
- // int_x86_sse_xxx_ss
- let isCommutable = Commutable;
- }
+ "", "_ss", ssmem, sse_load_f32>, XS;
- def SDrr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
+ defm SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
!strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (!nameconcat<Intrinsic>("int_x86_sse2_",
- !strconcat(OpcodeStr, "_sd")) VR128:$src1,
- VR128:$src2))]> {
- // int_x86_sse2_xxx_sd
- let isCommutable = Commutable;
+ "2", "_sd", sdmem, sse_load_f64>, XD;
}
- // Intrinsic operation, reg+mem.
- def SSrm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, ssmem:$src2),
- !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (!nameconcat<Intrinsic>("int_x86_sse_",
- !strconcat(OpcodeStr, "_ss")) VR128:$src1,
- sse_load_f32:$src2))]>;
- // int_x86_sse_xxx_ss
-
- def SDrm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, sdmem:$src2),
- !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (!nameconcat<Intrinsic>("int_x86_sse2_",
- !strconcat(OpcodeStr, "_sd")) VR128:$src1,
- sse_load_f64:$src2))]>;
- // int_x86_sse2_xxx_sd
-
// Vector intrinsic operation, reg+reg.
def PSrr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
diff --git a/test/MC/AsmParser/X86/x86_32-encoding.s b/test/MC/AsmParser/X86/x86_32-encoding.s
index b8b7c22..4e7eb41 100644
--- a/test/MC/AsmParser/X86/x86_32-encoding.s
+++ b/test/MC/AsmParser/X86/x86_32-encoding.s
@@ -10212,3 +10212,35 @@ pshufb CPI1_0(%rip), %xmm1
// CHECK: encoding: [0xc5,0xeb,0x5d,0x6c,0xcb,0xfc]
vminsd -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: vmaxps %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd8,0x5f,0xf2]
+ vmaxps %xmm2, %xmm4, %xmm6
+
+// CHECK: vmaxpd %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd9,0x5f,0xf2]
+ vmaxpd %xmm2, %xmm4, %xmm6
+
+// CHECK: vminps %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd8,0x5d,0xf2]
+ vminps %xmm2, %xmm4, %xmm6
+
+// CHECK: vminpd %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd9,0x5d,0xf2]
+ vminpd %xmm2, %xmm4, %xmm6
+
+// CHECK: vmaxps -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x5f,0x6c,0xcb,0xfc]
+ vmaxps -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vmaxpd -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x5f,0x6c,0xcb,0xfc]
+ vmaxpd -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vminps -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x5d,0x6c,0xcb,0xfc]
+ vminps -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vminpd -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x5d,0x6c,0xcb,0xfc]
+ vminpd -4(%ebx,%ecx,8), %xmm2, %xmm5
+
diff --git a/test/MC/AsmParser/X86/x86_64-encoding.s b/test/MC/AsmParser/X86/x86_64-encoding.s
index 591274c..8c66024 100644
--- a/test/MC/AsmParser/X86/x86_64-encoding.s
+++ b/test/MC/AsmParser/X86/x86_64-encoding.s
@@ -264,3 +264,35 @@ vdivpd -4(%rcx,%rbx,8), %xmm10, %xmm11
// CHECK: encoding: [0xc5,0x1b,0x5d,0x54,0xcb,0xfc]
vminsd -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: vmaxps %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x08,0x5f,0xe2]
+ vmaxps %xmm10, %xmm14, %xmm12
+
+// CHECK: vmaxpd %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x09,0x5f,0xe2]
+ vmaxpd %xmm10, %xmm14, %xmm12
+
+// CHECK: vminps %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x08,0x5d,0xe2]
+ vminps %xmm10, %xmm14, %xmm12
+
+// CHECK: vminpd %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x09,0x5d,0xe2]
+ vminpd %xmm10, %xmm14, %xmm12
+
+// CHECK: vmaxps -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x18,0x5f,0x54,0xcb,0xfc]
+ vmaxps -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vmaxpd -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x19,0x5f,0x54,0xcb,0xfc]
+ vmaxpd -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vminps -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x18,0x5d,0x54,0xcb,0xfc]
+ vminps -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vminpd -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x19,0x5d,0x54,0xcb,0xfc]
+ vminpd -4(%rbx,%rcx,8), %xmm12, %xmm10
+