diff options
author | Bruno Cardoso Lopes <bruno.cardoso@gmail.com> | 2011-08-16 18:21:54 +0000 |
---|---|---|
committer | Bruno Cardoso Lopes <bruno.cardoso@gmail.com> | 2011-08-16 18:21:54 +0000 |
commit | 3b86598cfaa6fa820af67dc0999f2c59a3bcbe84 (patch) | |
tree | d04a06e8b961f98dc39226594425ee8a5b731e33 /test | |
parent | e26a1352487c350acaee62bc5f7fa4002b9bf524 (diff) | |
download | external_llvm-3b86598cfaa6fa820af67dc0999f2c59a3bcbe84.zip external_llvm-3b86598cfaa6fa820af67dc0999f2c59a3bcbe84.tar.gz external_llvm-3b86598cfaa6fa820af67dc0999f2c59a3bcbe84.tar.bz2 |
Instead of always leaving the work to the generic legalizer when
there is no support for native 256-bit shuffles, be more smart in some
cases, for example, when you can extract specific 128-bit parts and use
regular 128-bit shuffles for them. Example:
For this shuffle:
shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32>
<i32 1, i32 0, i32 7, i32 6>
This was expanded to:
vextractf128 $1, %ymm1, %xmm2
vpextrq $0, %xmm2, %rax
vmovd %rax, %xmm1
vpextrq $1, %xmm2, %rax
vmovd %rax, %xmm2
vpunpcklqdq %xmm1, %xmm2, %xmm1
vpextrq $0, %xmm0, %rax
vmovd %rax, %xmm2
vpextrq $1, %xmm0, %rax
vmovd %rax, %xmm0
vpunpcklqdq %xmm2, %xmm0, %xmm0
vinsertf128 $1, %xmm1, %ymm0, %ymm0
ret
Now we get:
vshufpd $1, %xmm0, %xmm0, %xmm0
vextractf128 $1, %ymm1, %xmm1
vshufpd $1, %xmm1, %xmm1, %xmm1
vinsertf128 $1, %xmm1, %ymm0, %ymm0
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@137733 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r-- | test/CodeGen/X86/avx-basic.ll | 43 |
1 files changed, 43 insertions, 0 deletions
diff --git a/test/CodeGen/X86/avx-basic.ll b/test/CodeGen/X86/avx-basic.ll index 162f29d..1c10814 100644 --- a/test/CodeGen/X86/avx-basic.ll +++ b/test/CodeGen/X86/avx-basic.ll @@ -50,3 +50,46 @@ entry: %shuffle = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 4> ret <4 x i64> %shuffle } + +;;; +;;; Check that some 256-bit vectors are xformed into 128 ops +; CHECK: _A +; CHECK: vshufpd $1 +; CHECK-NEXT: vextractf128 $1 +; CHECK-NEXT: vshufpd $1 +; CHECK-NEXT: vinsertf128 $1 +define <4 x i64> @A(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp { +entry: + %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 7, i32 6> + ret <4 x i64> %shuffle +} + +; CHECK: vpunpckhqdq +; CHECK-NEXT: vextractf128 $1 +; CHECK-NEXT: movlhps +; CHECK-NEXT: vinsertf128 $1 +define <4 x i64> @B(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp { +entry: + %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 undef, i32 undef, i32 6> + ret <4 x i64> %shuffle +} + +; CHECK: movlhps +; CHECK-NEXT: vextractf128 $1 +; CHECK-NEXT: movlhps +; CHECK-NEXT: vinsertf128 $1 +define <4 x i64> @C(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp { +entry: + %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 undef, i32 0, i32 undef, i32 6> + ret <4 x i64> %shuffle +} + +; CHECK: vpshufd $-96 +; CHECK: vpshufd $-6 +; CHECK: vinsertf128 $1 +define <8 x i32> @D(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp { +entry: + %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 10, i32 10, i32 11, i32 11> + ret <8 x i32> %shuffle +} + |