aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBruno Cardoso Lopes <bruno.cardoso@gmail.com>2010-07-09 21:20:35 +0000
committerBruno Cardoso Lopes <bruno.cardoso@gmail.com>2010-07-09 21:20:35 +0000
commit2bfb8f6ef8ac6970acfd90f4b93f5e58b7d2e62c (patch)
tree20946b0342585c63a85386d9a43be87b3563fbe3
parent8ea324093cd512acc37f7b5a60e511e64103699e (diff)
downloadexternal_llvm-2bfb8f6ef8ac6970acfd90f4b93f5e58b7d2e62c.zip
external_llvm-2bfb8f6ef8ac6970acfd90f4b93f5e58b7d2e62c.tar.gz
external_llvm-2bfb8f6ef8ac6970acfd90f4b93f5e58b7d2e62c.tar.bz2
Add AVX 256-bit unpack and interleave
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@108017 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86InstrSSE.td17
-rw-r--r--test/MC/AsmParser/X86/x86_32-encoding.s32
-rw-r--r--test/MC/AsmParser/X86/x86_64-encoding.s32
3 files changed, 81 insertions, 0 deletions
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index e873652..45b4801 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -165,6 +165,10 @@ def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>;
def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>;
def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>;
+// FIXME: move this to a more appropriate place after all AVX is done.
+def memopv8f32 : PatFrag<(ops node:$ptr), (v8f32 (memop node:$ptr))>;
+def memopv4f64 : PatFrag<(ops node:$ptr), (v4f64 (memop node:$ptr))>;
+
// SSSE3 uses MMX registers for some instructions. They aren't aligned on a
// 16-byte boundary.
// FIXME: 8 byte alignment for mmx reads is not required
@@ -1539,6 +1543,19 @@ let AddedComplexity = 10 in {
defm VUNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64,
VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
SSEPackedDouble>, OpSize, VEX_4V;
+
+ defm VUNPCKHPSY: sse12_unpack_interleave<0x15, unpckh, v8f32, memopv8f32,
+ VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, VEX_4V;
+ defm VUNPCKHPDY: sse12_unpack_interleave<0x15, unpckh, v4f64, memopv4f64,
+ VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, OpSize, VEX_4V;
+ defm VUNPCKLPSY: sse12_unpack_interleave<0x14, unpckl, v8f32, memopv8f32,
+ VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, VEX_4V;
+ defm VUNPCKLPDY: sse12_unpack_interleave<0x14, unpckl, v4f64, memopv4f64,
+ VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, OpSize, VEX_4V;
}
let Constraints = "$src1 = $dst" in {
diff --git a/test/MC/AsmParser/X86/x86_32-encoding.s b/test/MC/AsmParser/X86/x86_32-encoding.s
index e4bf29e..35ef1fa 100644
--- a/test/MC/AsmParser/X86/x86_32-encoding.s
+++ b/test/MC/AsmParser/X86/x86_32-encoding.s
@@ -12354,3 +12354,35 @@
// CHECK: encoding: [0xc5,0xfd,0x11,0x08]
vmovupd %ymm1, (%eax)
+// CHECK: vunpckhps %ymm1, %ymm2, %ymm4
+// CHECK: encoding: [0xc5,0xec,0x15,0xe1]
+ vunpckhps %ymm1, %ymm2, %ymm4
+
+// CHECK: vunpckhpd %ymm1, %ymm2, %ymm4
+// CHECK: encoding: [0xc5,0xed,0x15,0xe1]
+ vunpckhpd %ymm1, %ymm2, %ymm4
+
+// CHECK: vunpcklps %ymm1, %ymm2, %ymm4
+// CHECK: encoding: [0xc5,0xec,0x14,0xe1]
+ vunpcklps %ymm1, %ymm2, %ymm4
+
+// CHECK: vunpcklpd %ymm1, %ymm2, %ymm4
+// CHECK: encoding: [0xc5,0xed,0x14,0xe1]
+ vunpcklpd %ymm1, %ymm2, %ymm4
+
+// CHECK: vunpckhps -4(%ebx,%ecx,8), %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xec,0x15,0x6c,0xcb,0xfc]
+ vunpckhps -4(%ebx,%ecx,8), %ymm2, %ymm5
+
+// CHECK: vunpckhpd -4(%ebx,%ecx,8), %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xed,0x15,0x6c,0xcb,0xfc]
+ vunpckhpd -4(%ebx,%ecx,8), %ymm2, %ymm5
+
+// CHECK: vunpcklps -4(%ebx,%ecx,8), %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xec,0x14,0x6c,0xcb,0xfc]
+ vunpcklps -4(%ebx,%ecx,8), %ymm2, %ymm5
+
+// CHECK: vunpcklpd -4(%ebx,%ecx,8), %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xed,0x14,0x6c,0xcb,0xfc]
+ vunpcklpd -4(%ebx,%ecx,8), %ymm2, %ymm5
+
diff --git a/test/MC/AsmParser/X86/x86_64-encoding.s b/test/MC/AsmParser/X86/x86_64-encoding.s
index 452aa53..a875cbc 100644
--- a/test/MC/AsmParser/X86/x86_64-encoding.s
+++ b/test/MC/AsmParser/X86/x86_64-encoding.s
@@ -2428,3 +2428,35 @@ pshufb CPI1_0(%rip), %xmm1
// CHECK: encoding: [0xc5,0x7d,0x11,0x18]
vmovupd %ymm11, (%rax)
+// CHECK: vunpckhps %ymm11, %ymm12, %ymm4
+// CHECK: encoding: [0xc4,0xc1,0x1c,0x15,0xe3]
+ vunpckhps %ymm11, %ymm12, %ymm4
+
+// CHECK: vunpckhpd %ymm11, %ymm12, %ymm4
+// CHECK: encoding: [0xc4,0xc1,0x1d,0x15,0xe3]
+ vunpckhpd %ymm11, %ymm12, %ymm4
+
+// CHECK: vunpcklps %ymm11, %ymm12, %ymm4
+// CHECK: encoding: [0xc4,0xc1,0x1c,0x14,0xe3]
+ vunpcklps %ymm11, %ymm12, %ymm4
+
+// CHECK: vunpcklpd %ymm11, %ymm12, %ymm4
+// CHECK: encoding: [0xc4,0xc1,0x1d,0x14,0xe3]
+ vunpcklpd %ymm11, %ymm12, %ymm4
+
+// CHECK: vunpckhps -4(%rbx,%rcx,8), %ymm12, %ymm10
+// CHECK: encoding: [0xc5,0x1c,0x15,0x54,0xcb,0xfc]
+ vunpckhps -4(%rbx,%rcx,8), %ymm12, %ymm10
+
+// CHECK: vunpckhpd -4(%rbx,%rcx,8), %ymm12, %ymm10
+// CHECK: encoding: [0xc5,0x1d,0x15,0x54,0xcb,0xfc]
+ vunpckhpd -4(%rbx,%rcx,8), %ymm12, %ymm10
+
+// CHECK: vunpcklps -4(%rbx,%rcx,8), %ymm12, %ymm10
+// CHECK: encoding: [0xc5,0x1c,0x14,0x54,0xcb,0xfc]
+ vunpcklps -4(%rbx,%rcx,8), %ymm12, %ymm10
+
+// CHECK: vunpcklpd -4(%rbx,%rcx,8), %ymm12, %ymm10
+// CHECK: encoding: [0xc5,0x1d,0x14,0x54,0xcb,0xfc]
+ vunpcklpd -4(%rbx,%rcx,8), %ymm12, %ymm10
+