aboutsummaryrefslogtreecommitdiffstats
path: root/test/Transforms
diff options
context:
space:
mode:
authorDale Johannesen <dalej@apple.com>2009-01-14 02:35:31 +0000
committerDale Johannesen <dalej@apple.com>2009-01-14 02:35:31 +0000
commit2f46bb8178e30e3b845859a44b57c048db06ef84 (patch)
tree63629ab7e375c2e6c36e0cfff8de529a198bd06b /test/Transforms
parent15b064d045bfb084772603623f469666f6fb8bb3 (diff)
downloadexternal_llvm-2f46bb8178e30e3b845859a44b57c048db06ef84.zip
external_llvm-2f46bb8178e30e3b845859a44b57c048db06ef84.tar.gz
external_llvm-2f46bb8178e30e3b845859a44b57c048db06ef84.tar.bz2
Fix the time regression I introduced in 464.h264ref with
my earlier patch to this file. The issue there was that all uses of an IV inside a loop are actually references to Base[IV*2], and there was one use outside that was the same but LSR didn't see the base or the scaling because it didn't recurse into uses outside the loop; thus, it used base+IV*scale mode inside the loop instead of pulling base out of the loop. This was extra bad because register pressure later forced both base and IV into memory. Doing that recursion, at least enough to figure out addressing modes, is a good idea in general; the change in AddUsersIfInteresting does this. However, there were side effects.... It is also possible for recursing outside the loop to introduce another IV where there was only 1 before (if the refs inside are not scaled and the ref outside is). I don't think this is a common case, but it's in the testsuite. It is right to be very aggressive about getting rid of such introduced IVs (CheckForIVReuse and the handling of nonzero RewriteFactor in StrengthReduceStridedIVUsers). In the testcase in question the new IV produced this way has both a nonconstant stride and a nonzero base, neither of which was handled before. And when inserting new code that feeds into a PHI, it's right to put such code at the original location rather than in the PHI's immediate predecessor(s) when the original location is outside the loop (a case that couldn't happen before) (RewriteInstructionToUseNewBase); better to avoid making multiple copies of it in this case. Also, the mechanism for keeping SCEV's corresponding to GEP's no longer works, as the GEP might change after its SCEV is remembered, invalidating the SCEV, and we might get a bad SCEV value when looking up the GEP again for a later loop. This also couldn't happen before, as we weren't recursing into GEP's outside the loop. Also, when we build an expression that involves a (possibly non-affine) IV from a different loop as well as an IV from the one we're interested in (containsAddRecFromDifferentLoop), don't recurse into that. We can't do much with it and will get in trouble if we try to create new non-affine IVs or something. More testcases are coming. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@62212 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/Transforms')
-rw-r--r--test/Transforms/LoopStrengthReduce/2009-01-13-nonconstant-stride-outside-loop.ll39
1 files changed, 39 insertions, 0 deletions
diff --git a/test/Transforms/LoopStrengthReduce/2009-01-13-nonconstant-stride-outside-loop.ll b/test/Transforms/LoopStrengthReduce/2009-01-13-nonconstant-stride-outside-loop.ll
new file mode 100644
index 0000000..a707285
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/2009-01-13-nonconstant-stride-outside-loop.ll
@@ -0,0 +1,39 @@
+; RUN: llvm-as < %s | opt -loop-reduce | llvm-dis | grep phi | count 1
+; RUN: llvm-as < %s | opt -loop-reduce | llvm-dis | grep mul | count 1
+; ModuleID = '<stdin>'
+; Make sure examining a fuller expression outside the loop doesn't cause us to create a second
+; IV of stride %3.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.5"
+ %struct.anon = type { %struct.obj*, %struct.obj* }
+ %struct.obj = type { i16, i16, { %struct.anon } }
+@heap_size = external global i32 ; <i32*> [#uses=1]
+@"\01LC85" = external constant [39 x i8] ; <[39 x i8]*> [#uses=1]
+
+declare i32 @sprintf(i8*, i8*, ...) nounwind
+
+define %struct.obj* @gc_status(%struct.obj* %args) nounwind {
+entry:
+ br label %bb1.i
+
+bb.i2: ; preds = %bb2.i3
+ %indvar.next24 = add i32 %m.0.i, 1 ; <i32> [#uses=1]
+ br label %bb1.i
+
+bb1.i: ; preds = %bb.i2, %entry
+ %m.0.i = phi i32 [ 0, %entry ], [ %indvar.next24, %bb.i2 ] ; <i32> [#uses=4]
+ %0 = icmp slt i32 %m.0.i, 0 ; <i1> [#uses=1]
+ br i1 %0, label %bb2.i3, label %nactive_heaps.exit
+
+bb2.i3: ; preds = %bb1.i
+ %1 = load %struct.obj** null, align 4 ; <%struct.obj*> [#uses=1]
+ %2 = icmp eq %struct.obj* %1, null ; <i1> [#uses=1]
+ br i1 %2, label %nactive_heaps.exit, label %bb.i2
+
+nactive_heaps.exit: ; preds = %bb2.i3, %bb1.i
+ %3 = load i32* @heap_size, align 4 ; <i32> [#uses=1]
+ %4 = mul i32 %3, %m.0.i ; <i32> [#uses=1]
+ %5 = sub i32 %4, 0 ; <i32> [#uses=1]
+ %6 = tail call i32 (i8*, i8*, ...)* @sprintf(i8* null, i8* getelementptr ([39 x i8]* @"\01LC85", i32 0, i32 0), i32 %m.0.i, i32 0, i32 %5, i32 0) nounwind ; <i32> [#uses=0]
+ ret %struct.obj* null
+}