aboutsummaryrefslogtreecommitdiffstats
path: root/test
diff options
context:
space:
mode:
authorDan Gohman <gohman@apple.com>2010-02-12 10:34:29 +0000
committerDan Gohman <gohman@apple.com>2010-02-12 10:34:29 +0000
commit572645cf84060c0fc25cb91d38cb9079918b3a88 (patch)
tree0571ce42ea03d210844a627baea045fa36f16df5 /test
parent5cef638855c9f2bb23a9c181cc47ddace8551f50 (diff)
downloadexternal_llvm-572645cf84060c0fc25cb91d38cb9079918b3a88.zip
external_llvm-572645cf84060c0fc25cb91d38cb9079918b3a88.tar.gz
external_llvm-572645cf84060c0fc25cb91d38cb9079918b3a88.tar.bz2
Reapply the new LoopStrengthReduction code, with compile time and
bug fixes, and with improved heuristics for analyzing foreign-loop addrecs. This change also flattens IVUsers, eliminating the stride-oriented groupings, which makes it easier to work with. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@95975 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r--test/CodeGen/ARM/arm-negative-stride.ll26
-rw-r--r--test/CodeGen/ARM/lsr-code-insertion.ll4
-rw-r--r--test/CodeGen/Thumb2/lsr-deficiency.ll18
-rw-r--r--test/CodeGen/Thumb2/thumb2-ifcvt1.ll12
-rw-r--r--test/CodeGen/X86/2006-05-11-InstrSched.ll4
-rw-r--r--test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll2
-rw-r--r--test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll2
-rw-r--r--test/CodeGen/X86/full-lsr.ll9
-rw-r--r--test/CodeGen/X86/iv-users-in-other-loops.ll8
-rw-r--r--test/CodeGen/X86/loop-strength-reduce-2.ll19
-rw-r--r--test/CodeGen/X86/loop-strength-reduce-3.ll13
-rw-r--r--test/CodeGen/X86/loop-strength-reduce.ll13
-rw-r--r--test/CodeGen/X86/loop-strength-reduce4.ll18
-rw-r--r--test/CodeGen/X86/loop-strength-reduce8.ll8
-rw-r--r--test/CodeGen/X86/lsr-reuse.ll386
-rw-r--r--test/CodeGen/X86/masked-iv-safe.ll6
-rw-r--r--test/CodeGen/X86/pr3495.ll3
-rw-r--r--test/Transforms/IndVarSimplify/addrec-gep.ll2
-rw-r--r--test/Transforms/LoopStrengthReduce/2008-08-06-CmpStride.ll5
-rw-r--r--test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-0.ll13
-rw-r--r--test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll4
-rw-r--r--test/Transforms/LoopStrengthReduce/count-to-zero.ll2
-rw-r--r--test/Transforms/LoopStrengthReduce/invariant_value_first.ll2
-rw-r--r--test/Transforms/LoopStrengthReduce/invariant_value_first_arg.ll2
-rw-r--r--test/Transforms/LoopStrengthReduce/ops_after_indvar.ll2
-rw-r--r--test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll2
-rw-r--r--test/Transforms/LoopStrengthReduce/remove_indvar.ll2
-rw-r--r--test/Transforms/LoopStrengthReduce/use_postinc_value_outside_loop.ll2
28 files changed, 523 insertions, 66 deletions
diff --git a/test/CodeGen/ARM/arm-negative-stride.ll b/test/CodeGen/ARM/arm-negative-stride.ll
index 72ec8ef..52ab871 100644
--- a/test/CodeGen/ARM/arm-negative-stride.ll
+++ b/test/CodeGen/ARM/arm-negative-stride.ll
@@ -1,7 +1,32 @@
; RUN: llc < %s -march=arm | FileCheck %s
+; This loop is rewritten with an indvar which counts down, which
+; frees up a register from holding the trip count.
+
define void @test(i32* %P, i32 %A, i32 %i) nounwind {
entry:
+; CHECK: str r1, [{{r.*}}, +{{r.*}}, lsl #2]
+ icmp eq i32 %i, 0 ; <i1>:0 [#uses=1]
+ br i1 %0, label %return, label %bb
+
+bb: ; preds = %bb, %entry
+ %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
+ %i_addr.09.0 = sub i32 %i, %indvar ; <i32> [#uses=1]
+ %tmp2 = getelementptr i32* %P, i32 %i_addr.09.0 ; <i32*> [#uses=1]
+ store i32 %A, i32* %tmp2
+ %indvar.next = add i32 %indvar, 1 ; <i32> [#uses=2]
+ icmp eq i32 %indvar.next, %i ; <i1>:1 [#uses=1]
+ br i1 %1, label %return, label %bb
+
+return: ; preds = %bb, %entry
+ ret void
+}
+
+; This loop has a non-address use of the count-up indvar, so
+; it'll remain. Now the original store uses a negative-stride address.
+
+define void @test_with_forced_iv(i32* %P, i32 %A, i32 %i) nounwind {
+entry:
; CHECK: str r1, [{{r.*}}, -{{r.*}}, lsl #2]
icmp eq i32 %i, 0 ; <i1>:0 [#uses=1]
br i1 %0, label %return, label %bb
@@ -11,6 +36,7 @@ bb: ; preds = %bb, %entry
%i_addr.09.0 = sub i32 %i, %indvar ; <i32> [#uses=1]
%tmp2 = getelementptr i32* %P, i32 %i_addr.09.0 ; <i32*> [#uses=1]
store i32 %A, i32* %tmp2
+ store i32 %indvar, i32* null
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=2]
icmp eq i32 %indvar.next, %i ; <i1>:1 [#uses=1]
br i1 %1, label %return, label %bb
diff --git a/test/CodeGen/ARM/lsr-code-insertion.ll b/test/CodeGen/ARM/lsr-code-insertion.ll
index 507ec2c..1bbb96d 100644
--- a/test/CodeGen/ARM/lsr-code-insertion.ll
+++ b/test/CodeGen/ARM/lsr-code-insertion.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -stats |& grep {40.*Number of machine instrs printed}
-; RUN: llc < %s -stats |& grep {.*Number of re-materialization}
+; RUN: llc < %s -stats |& grep {39.*Number of machine instrs printed}
+; RUN: llc < %s -stats |& not grep {.*Number of re-materialization}
; This test really wants to check that the resultant "cond_true" block only
; has a single store in it, and that cond_true55 only has code to materialize
; the constant and do a store. We do *not* want something like this:
diff --git a/test/CodeGen/Thumb2/lsr-deficiency.ll b/test/CodeGen/Thumb2/lsr-deficiency.ll
index 7b1b57a..ac2cd34 100644
--- a/test/CodeGen/Thumb2/lsr-deficiency.ll
+++ b/test/CodeGen/Thumb2/lsr-deficiency.ll
@@ -1,25 +1,29 @@
; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic | FileCheck %s
; rdar://7387640
-; FIXME: We still need to rewrite array reference iv of stride -4 with loop
-; count iv of stride -1.
+; This now reduces to a single induction variable.
+
+; TODO: It still gets a GPR shuffle at the end of the loop
+; This is because something in instruction selection has decided
+; that comparing the pre-incremented value with zero is better
+; than comparing the post-incremented value with -4.
@G = external global i32 ; <i32*> [#uses=2]
@array = external global i32* ; <i32**> [#uses=1]
define arm_apcscc void @t() nounwind optsize {
; CHECK: t:
-; CHECK: mov.w r2, #4000
-; CHECK: movw r3, #1001
+; CHECK: mov.w r2, #1000
entry:
%.pre = load i32* @G, align 4 ; <i32> [#uses=1]
br label %bb
bb: ; preds = %bb, %entry
; CHECK: LBB1_1:
-; CHECK: subs r3, #1
-; CHECK: cmp r3, #0
-; CHECK: sub.w r2, r2, #4
+; CHECK: cmp r2, #0
+; CHECK: sub.w r9, r2, #1
+; CHECK: mov r2, r9
+
%0 = phi i32 [ %.pre, %entry ], [ %3, %bb ] ; <i32> [#uses=1]
%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
%tmp5 = sub i32 1000, %indvar ; <i32> [#uses=1]
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt1.ll b/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
index 71199ab..1d26756 100644
--- a/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
-define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
+define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
; CHECK: t1:
; CHECK: it ne
; CHECK: cmpne
@@ -20,12 +20,12 @@ cond_next:
}
; FIXME: Check for # of unconditional branch after adding branch folding post ifcvt.
-define i32 @t2(i32 %a, i32 %b) {
+define i32 @t2(i32 %a, i32 %b) nounwind {
entry:
; CHECK: t2:
-; CHECK: ite le
-; CHECK: suble
+; CHECK: ite gt
; CHECK: subgt
+; CHECK: suble
%tmp1434 = icmp eq i32 %a, %b ; <i1> [#uses=1]
br i1 %tmp1434, label %bb17, label %bb.outer
@@ -60,14 +60,14 @@ bb17: ; preds = %cond_false, %cond_true, %entry
@x = external global i32* ; <i32**> [#uses=1]
-define void @foo(i32 %a) {
+define void @foo(i32 %a) nounwind {
entry:
%tmp = load i32** @x ; <i32*> [#uses=1]
store i32 %a, i32* %tmp
ret void
}
-define void @t3(i32 %a, i32 %b) {
+define void @t3(i32 %a, i32 %b) nounwind {
entry:
; CHECK: t3:
; CHECK: it lt
diff --git a/test/CodeGen/X86/2006-05-11-InstrSched.ll b/test/CodeGen/X86/2006-05-11-InstrSched.ll
index bdbe713..56d6aa9 100644
--- a/test/CodeGen/X86/2006-05-11-InstrSched.ll
+++ b/test/CodeGen/X86/2006-05-11-InstrSched.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=x86 -mattr=+sse2 -stats -realign-stack=0 |&\
-; RUN: grep {asm-printer} | grep 31
+; RUN: grep {asm-printer} | grep 34
target datalayout = "e-p:32:32"
define void @foo(i32* %mc, i32* %bp, i32* %ms, i32* %xmb, i32* %mpp, i32* %tpmm, i32* %ip, i32* %tpim, i32* %dpp, i32* %tpdm, i32* %bpi, i32 %M) nounwind {
@@ -40,7 +40,7 @@ cond_true: ; preds = %cond_true, %entry
%tmp137.upgrd.7 = bitcast i32* %tmp137 to <2 x i64>* ; <<2 x i64>*> [#uses=1]
store <2 x i64> %tmp131, <2 x i64>* %tmp137.upgrd.7
%tmp147 = add nsw i32 %tmp.10, 8 ; <i32> [#uses=1]
- %tmp.upgrd.8 = icmp slt i32 %tmp147, %M ; <i1> [#uses=1]
+ %tmp.upgrd.8 = icmp ne i32 %tmp147, %M ; <i1> [#uses=1]
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1]
br i1 %tmp.upgrd.8, label %cond_true, label %return
diff --git a/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll b/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll
index 4cac9b4..e1f8901 100644
--- a/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll
+++ b/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=x86 -mtriple=i686-darwin | \
; RUN: grep push | count 3
-define void @foo(i8** %buf, i32 %size, i32 %col, i8* %p) {
+define void @foo(i8** %buf, i32 %size, i32 %col, i8* %p) nounwind {
entry:
icmp sgt i32 %size, 0 ; <i1>:0 [#uses=1]
br i1 %0, label %bb.preheader, label %return
diff --git a/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll b/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
index 721d4c9..8e315f4 100644
--- a/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
+++ b/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
@@ -35,7 +35,7 @@ cond_next36.i: ; preds = %cond_next.i
bb.i28.i: ; preds = %bb.i28.i, %cond_next36.i
; CHECK: %bb.i28.i
; CHECK: addl $2
-; CHECK: addl $2
+; CHECK: addl $-2
%j.0.reg2mem.0.i16.i = phi i32 [ 0, %cond_next36.i ], [ %indvar.next39.i, %bb.i28.i ] ; <i32> [#uses=2]
%din_addr.1.reg2mem.0.i17.i = phi double [ 0.000000e+00, %cond_next36.i ], [ %tmp16.i25.i, %bb.i28.i ] ; <double> [#uses=1]
%tmp1.i18.i = fptosi double %din_addr.1.reg2mem.0.i17.i to i32 ; <i32> [#uses=2]
diff --git a/test/CodeGen/X86/full-lsr.ll b/test/CodeGen/X86/full-lsr.ll
index 3bd58b6..ff9b1b0 100644
--- a/test/CodeGen/X86/full-lsr.ll
+++ b/test/CodeGen/X86/full-lsr.ll
@@ -1,12 +1,7 @@
; RUN: llc < %s -march=x86 >%t
-; TODO: Enhance full lsr mode to get this:
-; RUNX: grep {addl \\\$4,} %t | count 3
-; RUNX: not grep {,%} %t
-
-; For now, it should find this, which is still pretty good:
-; RUN: not grep {addl \\\$4,} %t
-; RUN: grep {,%} %t | count 6
+; RUN: grep {addl \\\$4,} %t | count 3
+; RUN: not grep {,%} %t
define void @foo(float* nocapture %A, float* nocapture %B, float* nocapture %C, i32 %N) nounwind {
entry:
diff --git a/test/CodeGen/X86/iv-users-in-other-loops.ll b/test/CodeGen/X86/iv-users-in-other-loops.ll
index c695c29..408fb20 100644
--- a/test/CodeGen/X86/iv-users-in-other-loops.ll
+++ b/test/CodeGen/X86/iv-users-in-other-loops.ll
@@ -1,11 +1,11 @@
; RUN: llc < %s -march=x86-64 -o %t
-; RUN: grep inc %t | count 1
+; RUN: not grep inc %t
; RUN: grep dec %t | count 2
; RUN: grep addq %t | count 13
; RUN: not grep addb %t
-; RUN: grep leaq %t | count 9
-; RUN: grep leal %t | count 3
-; RUN: grep movq %t | count 5
+; RUN: not grep leaq %t
+; RUN: not grep leal %t
+; RUN: not grep movq %t
; IV users in each of the loops from other loops shouldn't cause LSR
; to insert new induction variables. Previously it would create a
diff --git a/test/CodeGen/X86/loop-strength-reduce-2.ll b/test/CodeGen/X86/loop-strength-reduce-2.ll
index 30b5114..b546462 100644
--- a/test/CodeGen/X86/loop-strength-reduce-2.ll
+++ b/test/CodeGen/X86/loop-strength-reduce-2.ll
@@ -1,11 +1,24 @@
-; RUN: llc < %s -march=x86 -relocation-model=pic | \
-; RUN: grep {, 4} | count 1
-; RUN: llc < %s -march=x86 | not grep lea
+; RUN: llc < %s -march=x86 -relocation-model=pic | FileCheck %s -check-prefix=PIC
+; RUN: llc < %s -march=x86 -relocation-model=static | FileCheck %s -check-prefix=STATIC
;
; Make sure the common loop invariant A is hoisted up to preheader,
; since too many registers are needed to subsume it into the addressing modes.
; It's safe to sink A in when it's not pic.
+; PIC: align
+; PIC: movl $4, -4([[REG:%e[a-z]+]])
+; PIC: movl $5, ([[REG]])
+; PIC: addl $4, [[REG]]
+; PIC: decl {{%e[[a-z]+}}
+; PIC: jne
+
+; STATIC: align
+; STATIC: movl $4, -4(%ecx)
+; STATIC: movl $5, (%ecx)
+; STATIC: addl $4, %ecx
+; STATIC: decl %eax
+; STATIC: jne
+
@A = global [16 x [16 x i32]] zeroinitializer, align 32 ; <[16 x [16 x i32]]*> [#uses=2]
define void @test(i32 %row, i32 %N.in) nounwind {
diff --git a/test/CodeGen/X86/loop-strength-reduce-3.ll b/test/CodeGen/X86/loop-strength-reduce-3.ll
index 70c9134..b1c9fb9 100644
--- a/test/CodeGen/X86/loop-strength-reduce-3.ll
+++ b/test/CodeGen/X86/loop-strength-reduce-3.ll
@@ -1,8 +1,11 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=dynamic-no-pic | \
-; RUN: grep {A+} | count 2
-;
-; Make sure the common loop invariant A is not hoisted up to preheader,
-; since it can be subsumed it into the addressing modes.
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=dynamic-no-pic | FileCheck %s
+
+; CHECK: align
+; CHECK: movl $4, -4(%ecx)
+; CHECK: movl $5, (%ecx)
+; CHECK: addl $4, %ecx
+; CHECK: decl %eax
+; CHECK: jne
@A = global [16 x [16 x i32]] zeroinitializer, align 32 ; <[16 x [16 x i32]]*> [#uses=2]
diff --git a/test/CodeGen/X86/loop-strength-reduce.ll b/test/CodeGen/X86/loop-strength-reduce.ll
index 4cb56ca..42c6ac4 100644
--- a/test/CodeGen/X86/loop-strength-reduce.ll
+++ b/test/CodeGen/X86/loop-strength-reduce.ll
@@ -1,8 +1,11 @@
-; RUN: llc < %s -march=x86 -relocation-model=static | \
-; RUN: grep {A+} | count 2
-;
-; Make sure the common loop invariant A is not hoisted up to preheader,
-; since it can be subsumed into the addressing mode in all uses.
+; RUN: llc < %s -march=x86 -relocation-model=static | FileCheck %s
+
+; CHECK: align
+; CHECK: movl $4, -4(%ecx)
+; CHECK: movl $5, (%ecx)
+; CHECK: addl $4, %ecx
+; CHECK: decl %eax
+; CHECK: jne
@A = internal global [16 x [16 x i32]] zeroinitializer, align 32 ; <[16 x [16 x i32]]*> [#uses=2]
diff --git a/test/CodeGen/X86/loop-strength-reduce4.ll b/test/CodeGen/X86/loop-strength-reduce4.ll
index 07e46ec..6c0eb8c 100644
--- a/test/CodeGen/X86/loop-strength-reduce4.ll
+++ b/test/CodeGen/X86/loop-strength-reduce4.ll
@@ -1,5 +1,19 @@
-; RUN: llc < %s -march=x86 | grep cmp | grep 64
-; RUN: llc < %s -march=x86 | not grep inc
+; RUN: llc < %s -march=x86 -relocation-model=static -mtriple=i686-apple-darwin | FileCheck %s -check-prefix=STATIC
+; RUN: llc < %s -march=x86 -relocation-model=pic | FileCheck %s -check-prefix=PIC
+
+; By starting the IV at -64 instead of 0, a cmp is eliminated,
+; as the flags from the add can be used directly.
+
+; STATIC: movl $-64, %ecx
+
+; STATIC: movl %eax, _state+76(%ecx)
+; STATIC: addl $16, %ecx
+; STATIC: jne
+
+; In PIC mode the symbol can't be folded, so the change-compare-stride
+; trick applies.
+
+; PIC: cmpl $64
@state = external global [0 x i32] ; <[0 x i32]*> [#uses=4]
@S = external global [0 x i32] ; <[0 x i32]*> [#uses=4]
diff --git a/test/CodeGen/X86/loop-strength-reduce8.ll b/test/CodeGen/X86/loop-strength-reduce8.ll
index e14cd8a..6b2247d 100644
--- a/test/CodeGen/X86/loop-strength-reduce8.ll
+++ b/test/CodeGen/X86/loop-strength-reduce8.ll
@@ -1,4 +1,10 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin | grep leal | not grep 16
+; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
+
+; CHECK: leal 16(%eax), %edx
+; CHECK: align
+; CHECK: addl $4, %edx
+; CHECK: decl %ecx
+; CHECK: jne LBB1_2
%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32 }
%struct.bitmap_element = type { %struct.bitmap_element*, %struct.bitmap_element*, i32, [2 x i64] }
diff --git a/test/CodeGen/X86/lsr-reuse.ll b/test/CodeGen/X86/lsr-reuse.ll
new file mode 100644
index 0000000..7f2b8cc
--- /dev/null
+++ b/test/CodeGen/X86/lsr-reuse.ll
@@ -0,0 +1,386 @@
+; RUN: llc < %s -march=x86-64 -O3 | FileCheck %s
+target datalayout = "e-p:64:64:64"
+target triple = "x86_64-unknown-unknown"
+
+; Full strength reduction reduces register pressure from 5 to 4 here.
+; Instruction selection should use the FLAGS value from the dec for
+; the branch. Scheduling should push the adds upwards.
+
+; CHECK: full_me_0:
+; CHECK: movsd (%rsi), %xmm0
+; CHECK: addq $8, %rsi
+; CHECK: mulsd (%rdx), %xmm0
+; CHECK: addq $8, %rdx
+; CHECK: movsd %xmm0, (%rdi)
+; CHECK: addq $8, %rdi
+; CHECK: decq %rcx
+; CHECK: jne
+
+define void @full_me_0(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
+entry:
+ %t0 = icmp sgt i64 %n, 0
+ br i1 %t0, label %loop, label %return
+
+loop:
+ %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+ %Ai = getelementptr inbounds double* %A, i64 %i
+ %Bi = getelementptr inbounds double* %B, i64 %i
+ %Ci = getelementptr inbounds double* %C, i64 %i
+ %t1 = load double* %Bi
+ %t2 = load double* %Ci
+ %m = fmul double %t1, %t2
+ store double %m, double* %Ai
+ %i.next = add nsw i64 %i, 1
+ %exitcond = icmp eq i64 %i.next, %n
+ br i1 %exitcond, label %return, label %loop
+
+return:
+ ret void
+}
+
+; Mostly-full strength reduction means we do full strength reduction on all
+; except for the offsets.
+;
+; Given a choice between constant offsets -2048 and 2048, choose the negative
+; value, because at boundary conditions it has a smaller encoding.
+; TODO: That's an over-general heuristic. It would be better for the target
+; to indicate what the encoding cost would be. Then using a 2048 offset
+; would be better on x86-64, since the start value would be 0 instead of
+; 2048.
+
+; CHECK: mostly_full_me_0:
+; CHECK: movsd -2048(%rsi), %xmm0
+; CHECK: mulsd -2048(%rdx), %xmm0
+; CHECK: movsd %xmm0, -2048(%rdi)
+; CHECK: movsd (%rsi), %xmm0
+; CHECK: addq $8, %rsi
+; CHECK: divsd (%rdx), %xmm0
+; CHECK: addq $8, %rdx
+; CHECK: movsd %xmm0, (%rdi)
+; CHECK: addq $8, %rdi
+; CHECK: decq %rcx
+; CHECK: jne
+
+define void @mostly_full_me_0(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
+entry:
+ %t0 = icmp sgt i64 %n, 0
+ br i1 %t0, label %loop, label %return
+
+loop:
+ %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+ %Ai = getelementptr inbounds double* %A, i64 %i
+ %Bi = getelementptr inbounds double* %B, i64 %i
+ %Ci = getelementptr inbounds double* %C, i64 %i
+ %t1 = load double* %Bi
+ %t2 = load double* %Ci
+ %m = fmul double %t1, %t2
+ store double %m, double* %Ai
+ %j = add i64 %i, 256
+ %Aj = getelementptr inbounds double* %A, i64 %j
+ %Bj = getelementptr inbounds double* %B, i64 %j
+ %Cj = getelementptr inbounds double* %C, i64 %j
+ %t3 = load double* %Bj
+ %t4 = load double* %Cj
+ %o = fdiv double %t3, %t4
+ store double %o, double* %Aj
+ %i.next = add nsw i64 %i, 1
+ %exitcond = icmp eq i64 %i.next, %n
+ br i1 %exitcond, label %return, label %loop
+
+return:
+ ret void
+}
+
+; A minor variation on mostly_full_me_0.
+; Prefer to start the indvar at 0.
+
+; CHECK: mostly_full_me_1:
+; CHECK: movsd (%rsi), %xmm0
+; CHECK: mulsd (%rdx), %xmm0
+; CHECK: movsd %xmm0, (%rdi)
+; CHECK: movsd -2048(%rsi), %xmm0
+; CHECK: addq $8, %rsi
+; CHECK: divsd -2048(%rdx), %xmm0
+; CHECK: addq $8, %rdx
+; CHECK: movsd %xmm0, -2048(%rdi)
+; CHECK: addq $8, %rdi
+; CHECK: decq %rcx
+; CHECK: jne
+
+define void @mostly_full_me_1(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
+entry:
+ %t0 = icmp sgt i64 %n, 0
+ br i1 %t0, label %loop, label %return
+
+loop:
+ %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+ %Ai = getelementptr inbounds double* %A, i64 %i
+ %Bi = getelementptr inbounds double* %B, i64 %i
+ %Ci = getelementptr inbounds double* %C, i64 %i
+ %t1 = load double* %Bi
+ %t2 = load double* %Ci
+ %m = fmul double %t1, %t2
+ store double %m, double* %Ai
+ %j = sub i64 %i, 256
+ %Aj = getelementptr inbounds double* %A, i64 %j
+ %Bj = getelementptr inbounds double* %B, i64 %j
+ %Cj = getelementptr inbounds double* %C, i64 %j
+ %t3 = load double* %Bj
+ %t4 = load double* %Cj
+ %o = fdiv double %t3, %t4
+ store double %o, double* %Aj
+ %i.next = add nsw i64 %i, 1
+ %exitcond = icmp eq i64 %i.next, %n
+ br i1 %exitcond, label %return, label %loop
+
+return:
+ ret void
+}
+
+; A slightly less minor variation on mostly_full_me_0.
+
+; CHECK: mostly_full_me_2:
+; CHECK: movsd (%rsi), %xmm0
+; CHECK: mulsd (%rdx), %xmm0
+; CHECK: movsd %xmm0, (%rdi)
+; CHECK: movsd -4096(%rsi), %xmm0
+; CHECK: addq $8, %rsi
+; CHECK: divsd -4096(%rdx), %xmm0
+; CHECK: addq $8, %rdx
+; CHECK: movsd %xmm0, -4096(%rdi)
+; CHECK: addq $8, %rdi
+; CHECK: decq %rcx
+; CHECK: jne
+
+define void @mostly_full_me_2(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
+entry:
+ %t0 = icmp sgt i64 %n, 0
+ br i1 %t0, label %loop, label %return
+
+loop:
+ %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+ %k = add i64 %i, 256
+ %Ak = getelementptr inbounds double* %A, i64 %k
+ %Bk = getelementptr inbounds double* %B, i64 %k
+ %Ck = getelementptr inbounds double* %C, i64 %k
+ %t1 = load double* %Bk
+ %t2 = load double* %Ck
+ %m = fmul double %t1, %t2
+ store double %m, double* %Ak
+ %j = sub i64 %i, 256
+ %Aj = getelementptr inbounds double* %A, i64 %j
+ %Bj = getelementptr inbounds double* %B, i64 %j
+ %Cj = getelementptr inbounds double* %C, i64 %j
+ %t3 = load double* %Bj
+ %t4 = load double* %Cj
+ %o = fdiv double %t3, %t4
+ store double %o, double* %Aj
+ %i.next = add nsw i64 %i, 1
+ %exitcond = icmp eq i64 %i.next, %n
+ br i1 %exitcond, label %return, label %loop
+
+return:
+ ret void
+}
+
+; In this test, the counting IV exit value is used, so full strength reduction
+; would not reduce register pressure. IndVarSimplify ought to simplify such
+; cases away, but it's useful here to verify that LSR's register pressure
+; heuristics are working as expected.
+
+; CHECK: count_me_0:
+; CHECK: movsd (%rsi,%rax,8), %xmm0
+; CHECK: mulsd (%rdx,%rax,8), %xmm0
+; CHECK: movsd %xmm0, (%rdi,%rax,8)
+; CHECK: incq %rax
+; CHECK: cmpq %rax, %rcx
+; CHECK: jne
+
+define i64 @count_me_0(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
+entry:
+ %t0 = icmp sgt i64 %n, 0
+ br i1 %t0, label %loop, label %return
+
+loop:
+ %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+ %Ai = getelementptr inbounds double* %A, i64 %i
+ %Bi = getelementptr inbounds double* %B, i64 %i
+ %Ci = getelementptr inbounds double* %C, i64 %i
+ %t1 = load double* %Bi
+ %t2 = load double* %Ci
+ %m = fmul double %t1, %t2
+ store double %m, double* %Ai
+ %i.next = add nsw i64 %i, 1
+ %exitcond = icmp eq i64 %i.next, %n
+ br i1 %exitcond, label %return, label %loop
+
+return:
+ %q = phi i64 [ 0, %entry ], [ %i.next, %loop ]
+ ret i64 %q
+}
+
+; In this test, the trip count value is used, so full strength reduction
+; would not reduce register pressure.
+; (though it would reduce register pressure inside the loop...)
+
+; CHECK: count_me_1:
+; CHECK: movsd (%rsi,%rax,8), %xmm0
+; CHECK: mulsd (%rdx,%rax,8), %xmm0
+; CHECK: movsd %xmm0, (%rdi,%rax,8)
+; CHECK: incq %rax
+; CHECK: cmpq %rax, %rcx
+; CHECK: jne
+
+define i64 @count_me_1(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
+entry:
+ %t0 = icmp sgt i64 %n, 0
+ br i1 %t0, label %loop, label %return
+
+loop:
+ %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+ %Ai = getelementptr inbounds double* %A, i64 %i
+ %Bi = getelementptr inbounds double* %B, i64 %i
+ %Ci = getelementptr inbounds double* %C, i64 %i
+ %t1 = load double* %Bi
+ %t2 = load double* %Ci
+ %m = fmul double %t1, %t2
+ store double %m, double* %Ai
+ %i.next = add nsw i64 %i, 1
+ %exitcond = icmp eq i64 %i.next, %n
+ br i1 %exitcond, label %return, label %loop
+
+return:
+ %q = phi i64 [ 0, %entry ], [ %n, %loop ]
+ ret i64 %q
+}
+
+; Full strength reduction doesn't save any registers here because the
+; loop tripcount is a constant.
+
+; CHECK: count_me_2:
+; CHECK: movl $10, %eax
+; CHECK: align
+; CHECK: BB7_1:
+; CHECK: movsd -40(%rdi,%rax,8), %xmm0
+; CHECK: addsd -40(%rsi,%rax,8), %xmm0
+; CHECK: movsd %xmm0, -40(%rdx,%rax,8)
+; CHECK: movsd (%rdi,%rax,8), %xmm0
+; CHECK: subsd (%rsi,%rax,8), %xmm0
+; CHECK: movsd %xmm0, (%rdx,%rax,8)
+; CHECK: incq %rax
+; CHECK: cmpq $5010, %rax
+; CHECK: jne
+
+define void @count_me_2(double* nocapture %A, double* nocapture %B, double* nocapture %C) nounwind {
+entry:
+ br label %loop
+
+loop:
+ %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
+ %i5 = add i64 %i, 5
+ %Ai = getelementptr double* %A, i64 %i5
+ %t2 = load double* %Ai
+ %Bi = getelementptr double* %B, i64 %i5
+ %t4 = load double* %Bi
+ %t5 = fadd double %t2, %t4
+ %Ci = getelementptr double* %C, i64 %i5
+ store double %t5, double* %Ci
+ %i10 = add i64 %i, 10
+ %Ai10 = getelementptr double* %A, i64 %i10
+ %t9 = load double* %Ai10
+ %Bi10 = getelementptr double* %B, i64 %i10
+ %t11 = load double* %Bi10
+ %t12 = fsub double %t9, %t11
+ %Ci10 = getelementptr double* %C, i64 %i10
+ store double %t12, double* %Ci10
+ %i.next = add i64 %i, 1
+ %exitcond = icmp eq i64 %i.next, 5000
+ br i1 %exitcond, label %return, label %loop
+
+return:
+ ret void
+}
+
+; This should be fully strength-reduced to reduce register pressure.
+
+; CHECK: full_me_1:
+; CHECK: align
+; CHECK: BB8_1:
+; CHECK: movsd (%rdi), %xmm0
+; CHECK: addsd (%rsi), %xmm0
+; CHECK: movsd %xmm0, (%rdx)
+; CHECK: movsd 40(%rdi), %xmm0
+; CHECK: addq $8, %rdi
+; CHECK: subsd 40(%rsi), %xmm0
+; CHECK: addq $8, %rsi
+; CHECK: movsd %xmm0, 40(%rdx)
+; CHECK: addq $8, %rdx
+; CHECK: decq %rcx
+; CHECK: jne
+
+define void @full_me_1(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
+entry:
+ br label %loop
+
+loop:
+ %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
+ %i5 = add i64 %i, 5
+ %Ai = getelementptr double* %A, i64 %i5
+ %t2 = load double* %Ai
+ %Bi = getelementptr double* %B, i64 %i5
+ %t4 = load double* %Bi
+ %t5 = fadd double %t2, %t4
+ %Ci = getelementptr double* %C, i64 %i5
+ store double %t5, double* %Ci
+ %i10 = add i64 %i, 10
+ %Ai10 = getelementptr double* %A, i64 %i10
+ %t9 = load double* %Ai10
+ %Bi10 = getelementptr double* %B, i64 %i10
+ %t11 = load double* %Bi10
+ %t12 = fsub double %t9, %t11
+ %Ci10 = getelementptr double* %C, i64 %i10
+ store double %t12, double* %Ci10
+ %i.next = add i64 %i, 1
+ %exitcond = icmp eq i64 %i.next, %n
+ br i1 %exitcond, label %return, label %loop
+
+return:
+ ret void
+}
+
+; This is a variation on full_me_0 in which the 0,+,1 induction variable
+; has a non-address use, pinning that value in a register.
+
+; CHECK: count_me_3:
+; CHECK: call
+; CHECK: movsd (%r15,%r13,8), %xmm0
+; CHECK: mulsd (%r14,%r13,8), %xmm0
+; CHECK: movsd %xmm0, (%r12,%r13,8)
+; CHECK: incq %r13
+; CHECK: cmpq %r13, %rbx
+; CHECK: jne
+
+declare void @use(i64)
+
+define void @count_me_3(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
+entry:
+ %t0 = icmp sgt i64 %n, 0
+ br i1 %t0, label %loop, label %return
+
+loop:
+ %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+ call void @use(i64 %i)
+ %Ai = getelementptr inbounds double* %A, i64 %i
+ %Bi = getelementptr inbounds double* %B, i64 %i
+ %Ci = getelementptr inbounds double* %C, i64 %i
+ %t1 = load double* %Bi
+ %t2 = load double* %Ci
+ %m = fmul double %t1, %t2
+ store double %m, double* %Ai
+ %i.next = add nsw i64 %i, 1
+ %exitcond = icmp eq i64 %i.next, %n
+ br i1 %exitcond, label %return, label %loop
+
+return:
+ ret void
+}
diff --git a/test/CodeGen/X86/masked-iv-safe.ll b/test/CodeGen/X86/masked-iv-safe.ll
index bc493bd..0b4d73a 100644
--- a/test/CodeGen/X86/masked-iv-safe.ll
+++ b/test/CodeGen/X86/masked-iv-safe.ll
@@ -169,7 +169,7 @@ loop:
%indvar.i24 = and i64 %indvar, 16777215
%t3 = getelementptr double* %d, i64 %indvar.i24
%t4 = load double* %t3
- %t5 = fmul double %t4, 2.3
+ %t5 = fdiv double %t4, 2.3
store double %t5, double* %t3
%t6 = getelementptr double* %d, i64 %indvar
%t7 = load double* %t6
@@ -199,7 +199,7 @@ loop:
%indvar.i24 = ashr i64 %s1, 24
%t3 = getelementptr double* %d, i64 %indvar.i24
%t4 = load double* %t3
- %t5 = fmul double %t4, 2.3
+ %t5 = fdiv double %t4, 2.3
store double %t5, double* %t3
%t6 = getelementptr double* %d, i64 %indvar
%t7 = load double* %t6
@@ -229,7 +229,7 @@ loop:
%indvar.i24 = ashr i64 %s1, 24
%t3 = getelementptr double* %d, i64 %indvar.i24
%t4 = load double* %t3
- %t5 = fmul double %t4, 2.3
+ %t5 = fdiv double %t4, 2.3
store double %t5, double* %t3
%t6 = getelementptr double* %d, i64 %indvar
%t7 = load double* %t6
diff --git a/test/CodeGen/X86/pr3495.ll b/test/CodeGen/X86/pr3495.ll
index 14f2a54..e84a84f 100644
--- a/test/CodeGen/X86/pr3495.ll
+++ b/test/CodeGen/X86/pr3495.ll
@@ -1,8 +1,7 @@
; RUN: llc < %s -march=x86 -stats |& grep {Number of loads added} | grep 2
; RUN: llc < %s -march=x86 -stats |& grep {Number of register spills} | grep 1
-; RUN: llc < %s -march=x86 -stats |& grep {Number of machine instrs printed} | grep 37
+; RUN: llc < %s -march=x86 -stats |& grep {Number of machine instrs printed} | grep 34
; PR3495
-; The loop reversal kicks in once here, resulting in one fewer instruction.
target triple = "i386-pc-linux-gnu"
@x = external global [8 x i32], align 32 ; <[8 x i32]*> [#uses=1]
diff --git a/test/Transforms/IndVarSimplify/addrec-gep.ll b/test/Transforms/IndVarSimplify/addrec-gep.ll
index 9e42734..345f666 100644
--- a/test/Transforms/IndVarSimplify/addrec-gep.ll
+++ b/test/Transforms/IndVarSimplify/addrec-gep.ll
@@ -25,7 +25,7 @@ bb1: ; preds = %bb2, %bb.nph
%j.01 = phi i64 [ %tmp9, %bb2 ], [ 0, %bb.nph ] ; <i64> [#uses=3]
%tmp3 = add i64 %j.01, %tmp1 ; <i64> [#uses=1]
%tmp4 = add i64 %j.01, %tmp2 ; <i64> [#uses=1]
- %z0 = add i64 %tmp4, 5203
+ %z0 = add i64 %tmp3, 5203
%tmp5 = getelementptr double* %p, i64 %z0 ; <double*> [#uses=1]
%tmp6 = load double* %tmp5, align 8 ; <double> [#uses=1]
%tmp7 = fdiv double %tmp6, 2.100000e+00 ; <double> [#uses=1]
diff --git a/test/Transforms/LoopStrengthReduce/2008-08-06-CmpStride.ll b/test/Transforms/LoopStrengthReduce/2008-08-06-CmpStride.ll
index 7c7a21c..99cb856 100644
--- a/test/Transforms/LoopStrengthReduce/2008-08-06-CmpStride.ll
+++ b/test/Transforms/LoopStrengthReduce/2008-08-06-CmpStride.ll
@@ -1,5 +1,4 @@
-; RUN: opt < %s -loop-reduce -S | grep ugt
-; PR2535
+; RUN: llc -march=x86-64 < %s -o - | grep {cmpl \\$\[1\], %}
@.str = internal constant [4 x i8] c"%d\0A\00"
@@ -16,7 +15,7 @@ forbody:
%add166 = or i32 %mul15, 1 ; <i32> [#uses=1] *
call i32 (i8*, ...)* @printf( i8* noalias getelementptr ([4 x i8]* @.str, i32 0, i32 0), i32 %add166 ) nounwind
%inc = add i32 %i.0, 1 ; <i32> [#uses=3]
- %cmp = icmp ult i32 %inc, 1027 ; <i1> [#uses=1]
+ %cmp = icmp ne i32 %inc, 1027 ; <i1> [#uses=1]
br i1 %cmp, label %forbody, label %afterfor
afterfor: ; preds = %forcond
diff --git a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-0.ll b/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-0.ll
index 36941ad..1f7f6ec 100644
--- a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-0.ll
+++ b/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-0.ll
@@ -1,10 +1,15 @@
-; RUN: llc %s -o - --x86-asm-syntax=att | grep {cmpl \$4}
+; RUN: llc < %s -o - | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target triple = "x86_64-apple-darwin9"
-; This is like change-compare-stride-trickiness-1.ll except the comparison
-; happens before the relevant use, so the comparison stride can't be
-; easily changed.
+; The comparison happens before the relevant use, but it can still be rewritten
+; to compare with zero.
+
+; CHECK: foo:
+; CHECK: align
+; CHECK: incl %eax
+; CHECK-NEXT: decl %ecx
+; CHECK-NEXT: jne
define void @foo() nounwind {
entry:
diff --git a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll b/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll
index ea8a259..cb63809 100644
--- a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll
+++ b/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll
@@ -1,10 +1,12 @@
-; RUN: llc %s -o - --x86-asm-syntax=att | grep {cmp. \$8}
+; RUN: llc %s -o - --x86-asm-syntax=att | grep {cmp. \$10}
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target triple = "x86_64-apple-darwin9"
; The comparison happens after the relevant use, so the stride can easily
; be changed. The comparison can be done in a narrower mode than the
; induction variable.
+; TODO: By making the first store post-increment as well, the loop setup
+; could be made simpler.
define void @foo() nounwind {
entry:
diff --git a/test/Transforms/LoopStrengthReduce/count-to-zero.ll b/test/Transforms/LoopStrengthReduce/count-to-zero.ll
index 8cc3b5c..feb79f8 100644
--- a/test/Transforms/LoopStrengthReduce/count-to-zero.ll
+++ b/test/Transforms/LoopStrengthReduce/count-to-zero.ll
@@ -19,7 +19,7 @@ bb3: ; preds = %bb1
%tmp4 = add i32 %c_addr.1, -1 ; <i32> [#uses=1]
%c_addr.1.be = select i1 %tmp2, i32 %tmp3, i32 %tmp4 ; <i32> [#uses=1]
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1]
-; CHECK: sub i32 %lsr.iv, 1
+; CHECK: add i32 %lsr.iv, -1
br label %bb6
bb6: ; preds = %bb3, %entry
diff --git a/test/Transforms/LoopStrengthReduce/invariant_value_first.ll b/test/Transforms/LoopStrengthReduce/invariant_value_first.ll
index f86638b..4094e9c 100644
--- a/test/Transforms/LoopStrengthReduce/invariant_value_first.ll
+++ b/test/Transforms/LoopStrengthReduce/invariant_value_first.ll
@@ -1,5 +1,5 @@
; Check that the index of 'P[outer]' is pulled out of the loop.
-; RUN: opt < %s -loop-reduce -S | \
+; RUN: opt < %s -loop-reduce -S -default-data-layout="e-p:32:32:32" | \
; RUN: not grep {getelementptr.*%outer.*%INDVAR}
declare i1 @pred()
diff --git a/test/Transforms/LoopStrengthReduce/invariant_value_first_arg.ll b/test/Transforms/LoopStrengthReduce/invariant_value_first_arg.ll
index 37acf0f..e2aed78 100644
--- a/test/Transforms/LoopStrengthReduce/invariant_value_first_arg.ll
+++ b/test/Transforms/LoopStrengthReduce/invariant_value_first_arg.ll
@@ -1,5 +1,5 @@
; Check that the index of 'P[outer]' is pulled out of the loop.
-; RUN: opt < %s -loop-reduce -S | \
+; RUN: opt < %s -loop-reduce -S -default-data-layout="e-p:32:32:32" | \
; RUN: not grep {getelementptr.*%outer.*%INDVAR}
declare i1 @pred()
diff --git a/test/Transforms/LoopStrengthReduce/ops_after_indvar.ll b/test/Transforms/LoopStrengthReduce/ops_after_indvar.ll
index a032cc9..410d88f 100644
--- a/test/Transforms/LoopStrengthReduce/ops_after_indvar.ll
+++ b/test/Transforms/LoopStrengthReduce/ops_after_indvar.ll
@@ -1,7 +1,7 @@
; Check that this test makes INDVAR and related stuff dead, because P[indvar]
; gets reduced, making INDVAR dead.
-; RUN: opt < %s -loop-reduce -S | not grep INDVAR
+; RUN: opt < %s -loop-reduce -S -default-data-layout="e-p:32:32:32" | not grep INDVAR
declare i1 @pred()
diff --git a/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll b/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll
index c91f5cd..8959c17 100644
--- a/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll
+++ b/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -analyze -iv-users | grep {Stride i64 {3,+,2}<%loop>:}
+; RUN: opt < %s -analyze -iv-users | grep {\{1,+,3,+,2\}<%loop> (post-inc)}
; The value of %r is dependent on a polynomial iteration expression.
diff --git a/test/Transforms/LoopStrengthReduce/remove_indvar.ll b/test/Transforms/LoopStrengthReduce/remove_indvar.ll
index 53f4b9d..bb39532 100644
--- a/test/Transforms/LoopStrengthReduce/remove_indvar.ll
+++ b/test/Transforms/LoopStrengthReduce/remove_indvar.ll
@@ -7,10 +7,12 @@ define void @test(i32* %P) {
; <label>:0
br label %Loop
Loop: ; preds = %Loop, %0
+ %i = phi i32 [ 0, %0 ], [ %i.next, %Loop ]
%INDVAR = phi i32 [ 0, %0 ], [ %INDVAR2, %Loop ] ; <i32> [#uses=2]
%STRRED = getelementptr i32* %P, i32 %INDVAR ; <i32*> [#uses=1]
store i32 0, i32* %STRRED
%INDVAR2 = add i32 %INDVAR, 1 ; <i32> [#uses=1]
+ %i.next = add i32 %i, 1
%cond = call i1 @pred( ) ; <i1> [#uses=1]
br i1 %cond, label %Loop, label %Out
Out: ; preds = %Loop
diff --git a/test/Transforms/LoopStrengthReduce/use_postinc_value_outside_loop.ll b/test/Transforms/LoopStrengthReduce/use_postinc_value_outside_loop.ll
index a99a823..5ed37dd 100644
--- a/test/Transforms/LoopStrengthReduce/use_postinc_value_outside_loop.ll
+++ b/test/Transforms/LoopStrengthReduce/use_postinc_value_outside_loop.ll
@@ -1,5 +1,5 @@
; RUN: opt < %s -loop-reduce -S | \
-; RUN: grep {add i32 %lsr.iv.next, 1}
+; RUN: grep {add i32 %indvar630.ui, 1}
;
; Make sure that the use of the IV outside of the loop (the store) uses the
; post incremented value of the IV, not the preincremented value. This