This patch optimizes shuffle instruction - generates 2 instructions instead of 4.

Since this specific shuffle is widely used in many workloads we have ~10% performance on them. shufflevector <8 x float> %A, <8 x float> %B, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> vmovaps (%rdx), %ymm0 vshufps $8, %ymm0, %ymm0, %ymm0 vmovaps (%rcx), %ymm1 vshufps $8, %ymm0, %ymm1, %ymm1 vunpcklps %ymm0, %ymm1, %ymm0 vmovaps (%rcx), %ymm0 vmovsldup (%rdx), %ymm1 vblendps $85, %ymm0, %ymm1, %ymm0 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@163134 91177308-0d34-0410-b5e6-96231b3b80d8
author: Elena Demikhovsky <elena.demikhovsky@intel.com> 2012-09-04 12:49:02 +0000
committer: Elena Demikhovsky <elena.demikhovsky@intel.com> 2012-09-04 12:49:02 +0000
commit: 32510207382720a97c682e098d45cb1371c48639 (patch)
tree: 6d5f6007ba1667333625c3d331851a160cf3be10 /test
parent: 7765492a7a7e6eab36bc43558ea7c1f91e57cfec (diff)
download: external_llvm-32510207382720a97c682e098d45cb1371c48639.zip
external_llvm-32510207382720a97c682e098d45cb1371c48639.tar.gz
external_llvm-32510207382720a97c682e098d45cb1371c48639.tar.bz2
1 files changed, 4 insertions, 6 deletions
diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll
index 9b41709..ec11654 100644
--- a/test/CodeGen/X86/avx-shuffle.ll
+++ b/test/CodeGen/X86/avx-shuffle.ll
@@ -229,9 +229,8 @@ define   <8 x float> @test17(<4 x float> %y) {
 }
 
 ; CHECK: test18
-; CHECK: vshufps
-; CHECK: vshufps
-; CHECK: vunpcklps
+; CHECK: vmovshdup
+; CHECK: vblendps
 ; CHECK: ret
 define <8 x float> @test18(<8 x float> %A, <8 x float>%B) nounwind {
   %S = shufflevector <8 x float> %A, <8 x float> %B, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
@@ -239,9 +238,8 @@ define <8 x float> @test18(<8 x float> %A, <8 x float>%B) nounwind {
 }
 
 ; CHECK: test19
-; CHECK: vshufps
-; CHECK: vshufps
-; CHECK: vunpcklps
+; CHECK: vmovsldup
+; CHECK: vblendps
 ; CHECK: ret
 define <8 x float> @test19(<8 x float> %A, <8 x float>%B) nounwind {
   %S = shufflevector <8 x float> %A, <8 x float> %B, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
author	Elena Demikhovsky <elena.demikhovsky@intel.com>	2012-09-04 12:49:02 +0000
committer	Elena Demikhovsky <elena.demikhovsky@intel.com>	2012-09-04 12:49:02 +0000
commit	32510207382720a97c682e098d45cb1371c48639 (patch)
tree	6d5f6007ba1667333625c3d331851a160cf3be10 /test
parent	7765492a7a7e6eab36bc43558ea7c1f91e57cfec (diff)
download	external_llvm-32510207382720a97c682e098d45cb1371c48639.zip external_llvm-32510207382720a97c682e098d45cb1371c48639.tar.gz external_llvm-32510207382720a97c682e098d45cb1371c48639.tar.bz2