diff options
author | Chris Lattner <sabre@nondot.org> | 2009-09-21 05:57:11 +0000 |
---|---|---|
committer | Chris Lattner <sabre@nondot.org> | 2009-09-21 05:57:11 +0000 |
commit | eed919b1ba4c6186258b4beb951625703c1c568e (patch) | |
tree | 04ebb710860897731aa87703d46323d4f2517cdf /test/Transforms/GVN/rle.ll | |
parent | 0c1f688954a087017bcaa0d4e88c1ebd64f11c45 (diff) | |
download | external_llvm-eed919b1ba4c6186258b4beb951625703c1c568e.zip external_llvm-eed919b1ba4c6186258b4beb951625703c1c568e.tar.gz external_llvm-eed919b1ba4c6186258b4beb951625703c1c568e.tar.bz2 |
Improve GVN to be able to forward substitute a small load
from a piece of a large store when both are in the same block.
This allows clang to compile the testcase in PR4216 to this code:
_test_bitfield:
movl 4(%esp), %eax
movl %eax, %ecx
andl $-65536, %ecx
orl $32962, %eax
andl $40186, %eax
orl %ecx, %eax
ret
This is not ideal, but is a whole lot better than the code produced
by llvm-gcc:
_test_bitfield:
movw $-32574, %ax
orw 4(%esp), %ax
andw $-25350, %ax
movw %ax, 4(%esp)
movw 7(%esp), %cx
shlw $8, %cx
movzbl 6(%esp), %edx
orw %cx, %dx
movzwl %dx, %ecx
shll $16, %ecx
movzwl %ax, %eax
orl %ecx, %eax
ret
and dramatically better than that produced by gcc 4.2:
_test_bitfield:
pushl %ebx
call L3
"L00000000001$pb":
L3:
popl %ebx
movl 8(%esp), %eax
leal 0(,%eax,4), %edx
sarb $7, %dl
movl %eax, %ecx
andl $7168, %ecx
andl $-7201, %ebx
movzbl %dl, %edx
andl $1, %edx
sall $5, %edx
orl %ecx, %ebx
orl %edx, %ebx
andl $24, %eax
andl $-58336, %ebx
orl %eax, %ebx
orl $32962, %ebx
movl %ebx, %eax
popl %ebx
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@82439 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/Transforms/GVN/rle.ll')
-rw-r--r-- | test/Transforms/GVN/rle.ll | 50 |
1 files changed, 50 insertions, 0 deletions
diff --git a/test/Transforms/GVN/rle.ll b/test/Transforms/GVN/rle.ll index 503a5bb..736a7e4 100644 --- a/test/Transforms/GVN/rle.ll +++ b/test/Transforms/GVN/rle.ll @@ -141,6 +141,35 @@ Cont: ; CHECK: ret i8 %A } +;; non-local i32/float -> i8 load forwarding. This also tests that the "P3" +;; bitcast equivalence can be properly phi translated. +define i8 @coerce_mustalias_nonlocal1(i32* %P, i1 %cond) { + %P2 = bitcast i32* %P to float* + br i1 %cond, label %T, label %F +T: + store i32 42, i32* %P + br label %Cont + +F: + store float 1.0, float* %P2 + br label %Cont + +Cont: + %P3 = bitcast i32* %P to i8* + %A = load i8* %P3 + ret i8 %A + +;; FIXME: This is disabled because this caused a miscompile in the llvm-gcc +;; bootstrap, see r82411 +; +; HECK: @coerce_mustalias_nonlocal1 +; HECK: Cont: +; HECK: %A = phi i8 [ +; HECK-NOT: load +; HECK: ret i8 %A +} + + ;; non-local i32 -> i8 partial redundancy load forwarding. define i8 @coerce_mustalias_pre0(i32* %P, i1 %cond) { %P3 = bitcast i32* %P to i8* @@ -165,3 +194,24 @@ Cont: ; CHECK: ret i8 %A } +;;===----------------------------------------------------------------------===;; +;; Store -> Load and Load -> Load forwarding where src and dst are different +;; types, and the reload is an offset from the store pointer. +;;===----------------------------------------------------------------------===;; + +;; i32 -> f32 forwarding. +define i8 @coerce_offset0(i32 %V, i32* %P) { + store i32 %V, i32* %P + + %P2 = bitcast i32* %P to i8* + %P3 = getelementptr i8* %P2, i32 2 + + %A = load i8* %P3 + ret i8 %A +; CHECK: @coerce_offset0 +; CHECK-NOT: load +; CHECK: ret i8 +} + + + |