diff options
author | Chris Lattner <sabre@nondot.org> | 2010-08-26 21:55:42 +0000 |
---|---|---|
committer | Chris Lattner <sabre@nondot.org> | 2010-08-26 21:55:42 +0000 |
commit | e5a1426174986951a20ec6ffd119b77a24a65706 (patch) | |
tree | 99c5869da3d33eace7a8e473ac7333819a0aa805 /test/Transforms | |
parent | a6140a14444da1aa104f6ab7c40c5c920552f474 (diff) | |
download | external_llvm-e5a1426174986951a20ec6ffd119b77a24a65706.zip external_llvm-e5a1426174986951a20ec6ffd119b77a24a65706.tar.gz external_llvm-e5a1426174986951a20ec6ffd119b77a24a65706.tar.bz2 |
optimize bitcast(trunc(bitcast(x))) where the result is a float and 'x'
is a vector to be a vector element extraction. This allows clang to
compile:
struct S { float A, B, C, D; };
float foo(struct S A) { return A.A + A.B+A.C+A.D; }
into:
_foo: ## @foo
## BB#0: ## %entry
movd %xmm0, %rax
shrq $32, %rax
movd %eax, %xmm2
addss %xmm0, %xmm2
movapd %xmm1, %xmm3
addss %xmm2, %xmm3
movd %xmm1, %rax
shrq $32, %rax
movd %eax, %xmm0
addss %xmm3, %xmm0
ret
instead of:
_foo: ## @foo
## BB#0: ## %entry
movd %xmm0, %rax
movd %eax, %xmm0
shrq $32, %rax
movd %eax, %xmm2
addss %xmm0, %xmm2
movd %xmm1, %rax
movd %eax, %xmm1
addss %xmm2, %xmm1
shrq $32, %rax
movd %eax, %xmm0
addss %xmm1, %xmm0
ret
... eliminating half of the horribleness.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112227 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/Transforms')
-rw-r--r-- | test/Transforms/InstCombine/bitcast.ll | 22 |
1 files changed, 22 insertions, 0 deletions
diff --git a/test/Transforms/InstCombine/bitcast.ll b/test/Transforms/InstCombine/bitcast.ll index 88fa9a4..c248b5e 100644 --- a/test/Transforms/InstCombine/bitcast.ll +++ b/test/Transforms/InstCombine/bitcast.ll @@ -13,3 +13,25 @@ define i32 @test1(i64 %a) { ; CHECK: ret i32 0 } +; Optimize bitcasts that are extracting low element of vector. This happens +; because of SRoA. +; rdar://7892780 +define float @test2(<2 x float> %A, <2 x i32> %B) { + %tmp28 = bitcast <2 x float> %A to i64 ; <i64> [#uses=2] + %tmp23 = trunc i64 %tmp28 to i32 ; <i32> [#uses=1] + %tmp24 = bitcast i32 %tmp23 to float ; <float> [#uses=1] + + %tmp = bitcast <2 x i32> %B to i64 + %tmp2 = trunc i64 %tmp to i32 ; <i32> [#uses=1] + %tmp4 = bitcast i32 %tmp2 to float ; <float> [#uses=1] + + %add = fadd float %tmp24, %tmp4 + ret float %add + +; CHECK: @test2 +; CHECK-NEXT: %tmp24 = extractelement <2 x float> %A, i32 0 +; CHECK-NEXT: bitcast <2 x i32> %B to <2 x float> +; CHECK-NEXT: %tmp4 = extractelement <2 x float> {{.*}}, i32 0 +; CHECK-NEXT: %add = fadd float %tmp24, %tmp4 +; CHECK-NEXT: ret float %add +} |