Add support for 256-bit versions of VPERMIL instruction. This is a new

instruction introduced in AVX, which can operate on 128 and 256-bit vectors. It considers a 256-bit vector as two independent 128-bit lanes. It can permute any 32 or 64 elements inside a lane, and restricts the second lane to have the same permutation of the first one. With the improved splat support introduced early today, adding codegen for this instruction enable more efficient 256-bit code: Instead of: vextractf128 $0, %ymm0, %xmm0 punpcklbw %xmm0, %xmm0 punpckhbw %xmm0, %xmm0 vinsertf128 $0, %xmm0, %ymm0, %ymm1 vinsertf128 $1, %xmm0, %ymm1, %ymm0 vextractf128 $1, %ymm0, %xmm1 shufps $1, %xmm1, %xmm1 movss %xmm1, 28(%rsp) movss %xmm1, 24(%rsp) movss %xmm1, 20(%rsp) movss %xmm1, 16(%rsp) vextractf128 $0, %ymm0, %xmm0 shufps $1, %xmm0, %xmm0 movss %xmm0, 12(%rsp) movss %xmm0, 8(%rsp) movss %xmm0, 4(%rsp) movss %xmm0, (%rsp) vmovaps (%rsp), %ymm0 We get: vextractf128 $0, %ymm0, %xmm0 punpcklbw %xmm0, %xmm0 punpckhbw %xmm0, %xmm0 vinsertf128 $0, %xmm0, %ymm0, %ymm1 vinsertf128 $1, %xmm0, %ymm1, %ymm0 vpermilps $85, %ymm0, %ymm0 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@135662 91177308-0d34-0410-b5e6-96231b3b80d8
author: Bruno Cardoso Lopes <bruno.cardoso@gmail.com> 2011-07-21 01:55:47 +0000
committer: Bruno Cardoso Lopes <bruno.cardoso@gmail.com> 2011-07-21 01:55:47 +0000
commit: 65b74e1d00eef81b596b4c207fba069aa1eb8214 (patch)
tree: e4f8ca2a664f9674f641caee534c42d5d2053a01 /test
parent: 9283b668a1d0b41b83292e7b3577ec34a4ed2810 (diff)
download: external_llvm-65b74e1d00eef81b596b4c207fba069aa1eb8214.zip
external_llvm-65b74e1d00eef81b596b4c207fba069aa1eb8214.tar.gz
external_llvm-65b74e1d00eef81b596b4c207fba069aa1eb8214.tar.bz2
1 files changed, 16 insertions, 0 deletions
diff --git a/test/CodeGen/X86/avx-256-splat.ll b/test/CodeGen/X86/avx-256-splat.ll
new file mode 100644
index 0000000..27ff926
--- /dev/null
+++ b/test/CodeGen/X86/avx-256-splat.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+; FIXME: use avx versions for punpcklbw and punpckhbw
+
+; CHECK: vextractf128 $0
+; CHECK-NEXT: punpcklbw
+; CHECK-NEXT: punpckhbw
+; CHECK-NEXT: vinsertf128 $0
+; CHECK-NEXT: vinsertf128 $1
+; CHECK-NEXT: vpermilps $85
+define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+  ret <32 x i8> %shuffle
+}
+
author	Bruno Cardoso Lopes <bruno.cardoso@gmail.com>	2011-07-21 01:55:47 +0000
committer	Bruno Cardoso Lopes <bruno.cardoso@gmail.com>	2011-07-21 01:55:47 +0000
commit	65b74e1d00eef81b596b4c207fba069aa1eb8214 (patch)
tree	e4f8ca2a664f9674f641caee534c42d5d2053a01 /test
parent	9283b668a1d0b41b83292e7b3577ec34a4ed2810 (diff)
download	external_llvm-65b74e1d00eef81b596b4c207fba069aa1eb8214.zip external_llvm-65b74e1d00eef81b596b4c207fba069aa1eb8214.tar.gz external_llvm-65b74e1d00eef81b596b4c207fba069aa1eb8214.tar.bz2