diff options
| author | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2012-09-04 12:49:02 +0000 |
|---|---|---|
| committer | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2012-09-04 12:49:02 +0000 |
| commit | 32510207382720a97c682e098d45cb1371c48639 (patch) | |
| tree | 6d5f6007ba1667333625c3d331851a160cf3be10 /test | |
| parent | 7765492a7a7e6eab36bc43558ea7c1f91e57cfec (diff) | |
| download | external_llvm-32510207382720a97c682e098d45cb1371c48639.zip external_llvm-32510207382720a97c682e098d45cb1371c48639.tar.gz external_llvm-32510207382720a97c682e098d45cb1371c48639.tar.bz2 | |
This patch optimizes shuffle instruction - generates 2 instructions instead of 4.
Since this specific shuffle is widely used in many workloads we have ~10% performance on them.
shufflevector <8 x float> %A, <8 x float> %B, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
vmovaps (%rdx), %ymm0
vshufps $8, %ymm0, %ymm0, %ymm0
vmovaps (%rcx), %ymm1
vshufps $8, %ymm0, %ymm1, %ymm1
vunpcklps %ymm0, %ymm1, %ymm0
vmovaps (%rcx), %ymm0
vmovsldup (%rdx), %ymm1
vblendps $85, %ymm0, %ymm1, %ymm0
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@163134 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
| -rw-r--r-- | test/CodeGen/X86/avx-shuffle.ll | 10 |
1 files changed, 4 insertions, 6 deletions
diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll index 9b41709..ec11654 100644 --- a/test/CodeGen/X86/avx-shuffle.ll +++ b/test/CodeGen/X86/avx-shuffle.ll @@ -229,9 +229,8 @@ define <8 x float> @test17(<4 x float> %y) { } ; CHECK: test18 -; CHECK: vshufps -; CHECK: vshufps -; CHECK: vunpcklps +; CHECK: vmovshdup +; CHECK: vblendps ; CHECK: ret define <8 x float> @test18(<8 x float> %A, <8 x float>%B) nounwind { %S = shufflevector <8 x float> %A, <8 x float> %B, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> @@ -239,9 +238,8 @@ define <8 x float> @test18(<8 x float> %A, <8 x float>%B) nounwind { } ; CHECK: test19 -; CHECK: vshufps -; CHECK: vshufps -; CHECK: vunpcklps +; CHECK: vmovsldup +; CHECK: vblendps ; CHECK: ret define <8 x float> @test19(<8 x float> %A, <8 x float>%B) nounwind { %S = shufflevector <8 x float> %A, <8 x float> %B, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> |
