diff options
author | Dan Gohman <djg@cray.com> | 2007-10-22 20:40:42 +0000 |
---|---|---|
committer | Dan Gohman <djg@cray.com> | 2007-10-22 20:40:42 +0000 |
commit | 5766ac76b79aac7a8037ef7ac1e27af05cf68b5a (patch) | |
tree | dd1a099ed5fe939eef704a76816872cdf6323a71 /test/CodeGen | |
parent | 35b992207f18582b5339aa5493054b42d32b82ee (diff) | |
download | external_llvm-5766ac76b79aac7a8037ef7ac1e27af05cf68b5a.zip external_llvm-5766ac76b79aac7a8037ef7ac1e27af05cf68b5a.tar.gz external_llvm-5766ac76b79aac7a8037ef7ac1e27af05cf68b5a.tar.bz2 |
Strength reduction improvements.
- Avoid attempting stride-reuse in the case that there are users that
aren't addresses. In that case, there will be places where the
multiplications won't be folded away, so it's better to try to
strength-reduce them.
- Several SSE intrinsics have operands that strength-reduction can
treat as addresses. The previous item makes this more visible, as
any non-address use of an IV can inhibit stride-reuse.
- Make ValidStride aware of whether there's likely to be a base
register in the address computation. This prevents it from thinking
that things like stride 9 are valid on x86 when the base register is
already occupied.
Also, XFAIL the 2007-08-10-LEA16Use32.ll test; the new logic to avoid
stride-reuse elimintes the LEA in the loop, so the test is no longer
testing what it was intended to test.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@43231 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen')
-rw-r--r-- | test/CodeGen/X86/2007-08-10-LEA16Use32.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/stride-nine-with-base-reg.ll | 34 | ||||
-rw-r--r-- | test/CodeGen/X86/stride-reuse.ll | 30 |
3 files changed, 68 insertions, 0 deletions
diff --git a/test/CodeGen/X86/2007-08-10-LEA16Use32.ll b/test/CodeGen/X86/2007-08-10-LEA16Use32.ll index f6a8482..1a0bcf9 100644 --- a/test/CodeGen/X86/2007-08-10-LEA16Use32.ll +++ b/test/CodeGen/X86/2007-08-10-LEA16Use32.ll @@ -1,4 +1,8 @@ ; RUN: llvm-as < %s | llc -march=x86 | grep {leal} +; XFAIL: * +; This test is XFAIL'd because strength-reduction was improved to +; avoid emitting the lea, so it longer tests whether the 16-bit +; lea is avoided. @X = global i16 0 ; <i16*> [#uses=1] @Y = global i16 0 ; <i16*> [#uses=1] diff --git a/test/CodeGen/X86/stride-nine-with-base-reg.ll b/test/CodeGen/X86/stride-nine-with-base-reg.ll new file mode 100644 index 0000000..f443c76 --- /dev/null +++ b/test/CodeGen/X86/stride-nine-with-base-reg.ll @@ -0,0 +1,34 @@ +; RUN: llvm-as < %s | llc -march=x86 | grep lea | count 1 +; RUN: llvm-as < %s | llc -march=x86-64 | not grep lea + +; For x86 there's an lea above the loop. In both cases, there shouldn't +; be any lea instructions inside the loop. + +@B = external global [1000 x i8], align 32 +@A = external global [1000 x i8], align 32 +@P = external global [1000 x i8], align 32 + +define void @foo(i32 %m, i32 %p) { +entry: + %tmp1 = icmp sgt i32 %m, 0 + br i1 %tmp1, label %bb, label %return + +bb: + %i.019.0 = phi i32 [ %indvar.next, %bb ], [ 0, %entry ] + %tmp2 = getelementptr [1000 x i8]* @B, i32 0, i32 %i.019.0 + %tmp3 = load i8* %tmp2, align 4 + %tmp4 = mul i8 %tmp3, 2 + %tmp5 = getelementptr [1000 x i8]* @A, i32 0, i32 %i.019.0 + store i8 %tmp4, i8* %tmp5, align 4 + %tmp8 = mul i32 %i.019.0, 9 + %tmp0 = add i32 %tmp8, %p + %tmp10 = getelementptr [1000 x i8]* @P, i32 0, i32 %tmp0 + store i8 17, i8* %tmp10, align 4 + %indvar.next = add i32 %i.019.0, 1 + %exitcond = icmp eq i32 %indvar.next, %m + br i1 %exitcond, label %return, label %bb + +return: + ret void +} + diff --git a/test/CodeGen/X86/stride-reuse.ll b/test/CodeGen/X86/stride-reuse.ll new file mode 100644 index 0000000..97f33d8 --- /dev/null +++ b/test/CodeGen/X86/stride-reuse.ll @@ -0,0 +1,30 @@ +; RUN: llvm-as < %s | llc -march=x86 | not grep lea +; RUN: llvm-as < %s | llc -march=x86-64 | not grep lea + +@B = external global [1000 x float], align 32 +@A = external global [1000 x float], align 32 +@P = external global [1000 x i32], align 32 + +define void @foo(i32 %m) { +entry: + %tmp1 = icmp sgt i32 %m, 0 + br i1 %tmp1, label %bb, label %return + +bb: + %i.019.0 = phi i32 [ %indvar.next, %bb ], [ 0, %entry ] + %tmp2 = getelementptr [1000 x float]* @B, i32 0, i32 %i.019.0 + %tmp3 = load float* %tmp2, align 4 + %tmp4 = mul float %tmp3, 2.000000e+00 + %tmp5 = getelementptr [1000 x float]* @A, i32 0, i32 %i.019.0 + store float %tmp4, float* %tmp5, align 4 + %tmp8 = shl i32 %i.019.0, 1 + %tmp9 = add i32 %tmp8, 64 + %tmp10 = getelementptr [1000 x i32]* @P, i32 0, i32 %i.019.0 + store i32 %tmp9, i32* %tmp10, align 4 + %indvar.next = add i32 %i.019.0, 1 + %exitcond = icmp eq i32 %indvar.next, %m + br i1 %exitcond, label %return, label %bb + +return: + ret void +} |