13 files changed, 642 insertions, 21 deletions
diff --git a/test/Transforms/IndVarSimplify/2011-09-10-widen-nsw.ll b/test/Transforms/IndVarSimplify/2011-09-10-widen-nsw.ll
index af9f1b3..64fef10 100644
--- a/test/Transforms/IndVarSimplify/2011-09-10-widen-nsw.ll
+++ b/test/Transforms/IndVarSimplify/2011-09-10-widen-nsw.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -indvars -S | FileCheck %s
 ; Test WidenIV::GetExtendedOperandRecurrence.
-; add219 should be extended to i64 because it is nsw, even though its
+; %add, %sub and %mul should be extended to i64 because it is nsw, even though its
 ; sext cannot be hoisted outside the loop.
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
@@ -18,13 +18,26 @@ for.body153:                                      ; preds = %for.body153, %for.b
   br i1 undef, label %for.body170, label %for.body153
 
 ; CHECK: add nsw i64 %indvars.iv, 1
+; CHECK: sub nsw i64 %indvars.iv, 2
+; CHECK: sub nsw i64 4, %indvars.iv
+; CHECK: mul nsw i64 %indvars.iv, 8
 for.body170:                                      ; preds = %for.body170, %for.body153
   %i2.19 = phi i32 [ %add249, %for.body170 ], [ 0, %for.body153 ]
-  %add219 = add nsw i32 %i2.19, 1
-  %idxprom220 = sext i32 %add219 to i64
+
+  %add = add nsw i32 %i2.19, 1
+  %add.idxprom = sext i32 %add to i64
+
+  %sub = sub nsw i32 %i2.19, 2
+  %sub.idxprom = sext i32 %sub to i64
+
+  %sub.neg = sub nsw i32 4, %i2.19
+  %sub.neg.idxprom = sext i32 %sub.neg to i64
+
+  %mul = mul nsw i32 %i2.19, 8
+  %mul.idxprom = sext i32 %mul to i64
+
   %add249 = add nsw i32 %i2.19, %shl132
   br label %for.body170
-
 for.end285:                                       ; preds = %entry
   ret void
 }
diff --git a/test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll b/test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll
index a8020e6..e462712 100644
--- a/test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll
+++ b/test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll
@@ -6,7 +6,7 @@ target triple = "thumbv7-apple-darwin"
 
 ; CHECK-LABEL: @test(
 ; CHECK: if.end.i126:
-; CHECK: %exitcond = icmp ne i8* %incdec.ptr.i, getelementptr (i8* null, i32 undef)
+; CHECK: %exitcond = icmp ne i8* %destYPixelPtr.010.i, getelementptr (i8* null, i32 undef)
 define void @test() nounwind {
 entry:
   br label %while.cond
diff --git a/test/Transforms/IndVarSimplify/NVPTX/lit.local.cfg b/test/Transforms/IndVarSimplify/NVPTX/lit.local.cfg
new file mode 100644
index 0000000..2cb98eb
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/NVPTX/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'NVPTX' in config.root.targets:
+    config.unsupported = True
diff --git a/test/Transforms/IndVarSimplify/NVPTX/no-widen-expensive.ll b/test/Transforms/IndVarSimplify/NVPTX/no-widen-expensive.ll
new file mode 100644
index 0000000..8744b19
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/NVPTX/no-widen-expensive.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+
+target triple = "nvptx64-unknown-unknown"
+
+; For the nvptx64 architecture, the cost of an arithmetic instruction on a
+; 64-bit integer is twice as expensive as that on a 32-bit integer, because the
+; hardware needs to simulate a 64-bit integer using two 32-bit integers.
+; Therefore, in this particular architecture, we should not widen induction
+; variables to 64-bit integers even though i64 is a legal type in the 64-bit
+; PTX ISA.
+
+define void @indvar_32_bit(i32 %n, i32* nocapture %output) {
+; CHECK-LABEL: @indvar_32_bit
+entry:
+  %cmp5 = icmp sgt i32 %n, 0
+  br i1 %cmp5, label %for.body.preheader, label %for.end
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i.06 = phi i32 [ 0, %for.body.preheader ], [ %add, %for.body ]
+; CHECK: phi i32
+  %mul = mul nsw i32 %i.06, %i.06
+  %0 = sext i32 %i.06 to i64
+  %arrayidx = getelementptr inbounds i32* %output, i64 %0
+  store i32 %mul, i32* %arrayidx, align 4
+  %add = add nsw i32 %i.06, 3
+  %cmp = icmp slt i32 %add, %n
+  br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret void
+}
diff --git a/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll b/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll
index e4c31d1..9e55a17 100644
--- a/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll
+++ b/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll
@@ -11,7 +11,7 @@ entry:
   br i1 %cmp1, label %for.body, label %for.end
 
 ; Make sure the added GEP has the right index type
-; CHECK: %lftr.limit = getelementptr i8 addrspace(2)* %base, i8 %0
+; CHECK: %lftr.limit = getelementptr i8 addrspace(2)* %base, i8
 
 ; CHECK: for.body:
 ; CHECK: phi i8 addrspace(2)*
@@ -43,7 +43,7 @@ entry:
   br i1 %cmp1, label %for.body, label %for.end
 
 ; Make sure the added GEP has the right index type
-; CHECK: %lftr.limit = getelementptr i8 addrspace(3)* %base, i16 %0
+; CHECK: %lftr.limit = getelementptr i8 addrspace(3)* %base, i16
 
 ; CHECK: for.body:
 ; CHECK: phi i8 addrspace(3)*
diff --git a/test/Transforms/IndVarSimplify/lftr-extend-const.ll b/test/Transforms/IndVarSimplify/lftr-extend-const.ll
index 4736f85..f12c68c 100644
--- a/test/Transforms/IndVarSimplify/lftr-extend-const.ll
+++ b/test/Transforms/IndVarSimplify/lftr-extend-const.ll
@@ -21,7 +21,7 @@ for.end:                                          ; preds = %for.body
 
 ; Check that post-incrementing the backedge taken count does not overflow.
 ; CHECK-LABEL: @postinc(
-; CHECK: icmp eq i32 %indvars.iv.next, 256
+; CHECK: icmp eq i32 %indvars.iv, 255
 define i32 @postinc() #0 {
 entry:
   br label %do.body
diff --git a/test/Transforms/IndVarSimplify/lftr-reuse.ll b/test/Transforms/IndVarSimplify/lftr-reuse.ll
index 1fdcdd1..efb96bd 100644
--- a/test/Transforms/IndVarSimplify/lftr-reuse.ll
+++ b/test/Transforms/IndVarSimplify/lftr-reuse.ll
@@ -82,15 +82,23 @@ exit:
 ; Perform LFTR without generating extra preheader code.
 define void @guardedloop([0 x double]* %matrix, [0 x double]* %vector,
                          i32 %irow, i32 %ilead) nounwind {
-; CHECK: entry:
-; CHECK-NOT: zext
-; CHECK-NOT: add
-; CHECK: loop:
-; CHECK: phi i64
-; CHECK: phi i64
+; CHECK-LABEL: @guardedloop(
+; CHECK-LABEL: entry:
+; CHECK-NEXT: %[[cmp:.*]] = icmp slt i32 1, %irow
+; CHECK-NEXT: br i1 %[[cmp]], label %[[loop_preheader:.*]], label %[[return:.*]]
+
+; CHECK: [[loop_preheader]]:
+; CHECK-NEXT: %[[sext:.*]] = sext i32 %ilead to i64
+; CHECK-NEXT: %[[add:.*]] = add i32 %irow, -1
+; CHECK-NEXT: br label %[[loop:.*]]
+
+; CHECK: [[loop]]:
+; CHECK-NEXT: %[[indvars_iv2:.*]] = phi i64
+; CHECK-NEXT: phi i64
 ; CHECK-NOT: phi
-; CHECK: icmp ne
-; CHECK: br i1
+; CHECK: %[[lftr_wideiv:.*]] = trunc i64 %[[indvars_iv2]] to i32
+; CHECK-NEXT: %[[exitcond:.*]] = icmp ne i32 %[[lftr_wideiv]], %[[add]]
+; CHECK-NEXT: br i1 %[[exitcond]], label %[[loop]], label
 entry:
   %cmp = icmp slt i32 1, %irow
   br i1 %cmp, label %loop, label %return
diff --git a/test/Transforms/IndVarSimplify/no-iv-rewrite.ll b/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
index 0576692..a7023f2 100644
--- a/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
+++ b/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
@@ -229,10 +229,11 @@ entry:
 ; loop and the OR instruction is replaced by an ADD keeping the result
 ; equivalent.
 ;
+; CHECK: sext
 ; CHECK: loop:
 ; CHECK: phi i64
 ; CHECK-NOT: sext
-; CHECK: icmp slt i32
+; CHECK: icmp slt i64
 ; CHECK: exit:
 ; CHECK: add i64
 loop:
diff --git a/test/Transforms/IndVarSimplify/pr20680.ll b/test/Transforms/IndVarSimplify/pr20680.ll
new file mode 100644
index 0000000..88a7fd7
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/pr20680.ll
@@ -0,0 +1,219 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+
+@a = common global i32 0, align 4
+@c = common global i32 0, align 4
+@b = common global i32 0, align 4
+
+define void @f() {
+; CHECK-LABEL: @f(
+; CHECK-LABEL: entry:
+; CHECK: br label %[[for_cond2_preheader:.*]]
+
+; CHECK: [[for_cond2_preheader]]:
+; CHECK-NEXT: %[[indvars_iv:.*]] = phi i32 [ %[[indvars_iv_next:.*]], %[[for_inc13:.*]] ], [ -14, %entry ]
+; br i1 {{.*}}, label %[[for_inc13]], label %
+entry:
+  %0 = load i32* @a, align 4
+  %tobool2 = icmp eq i32 %0, 0
+  %1 = load i32* @a, align 4
+  %tobool = icmp eq i32 %1, 0
+  br label %for.cond2.preheader
+
+for.cond2.preheader:                              ; preds = %for.inc13, %entry
+  %storemerge15 = phi i8 [ -14, %entry ], [ %inc14, %for.inc13 ]
+  br i1 %tobool2, label %for.inc13, label %for.body3.lr.ph
+
+for.body3.lr.ph:                                  ; preds = %for.cond2.preheader
+  %tobool5 = icmp eq i8 %storemerge15, 0
+  %conv7 = sext i8 %storemerge15 to i32
+  %2 = add nsw i32 %conv7, 1
+  %3 = icmp ult i32 %2, 3
+  %div = select i1 %3, i32 %conv7, i32 0
+  br i1 %tobool5, label %for.body3.lr.ph.split.us, label %for.body3.lr.ph.for.body3.lr.ph.split_crit_edge
+
+for.body3.lr.ph.for.body3.lr.ph.split_crit_edge:  ; preds = %for.body3.lr.ph
+  br label %for.body3.lr.ph.split
+
+for.body3.lr.ph.split.us:                         ; preds = %for.body3.lr.ph
+  br i1 %tobool, label %for.body3.lr.ph.split.us.split.us, label %for.body3.lr.ph.split.us.for.body3.lr.ph.split.us.split_crit_edge
+
+for.body3.lr.ph.split.us.for.body3.lr.ph.split.us.split_crit_edge: ; preds = %for.body3.lr.ph.split.us
+  br label %for.body3.lr.ph.split.us.split
+
+for.body3.lr.ph.split.us.split.us:                ; preds = %for.body3.lr.ph.split.us
+  br label %for.body3.us.us
+
+for.body3.us.us:                                  ; preds = %for.cond2.loopexit.us.us, %for.body3.lr.ph.split.us.split.us
+  br i1 true, label %cond.false.us.us, label %cond.end.us.us
+
+cond.false.us.us:                                 ; preds = %for.body3.us.us
+  br label %cond.end.us.us
+
+cond.end.us.us:                                   ; preds = %cond.false.us.us, %for.body3.us.us
+  %cond.us.us = phi i32 [ %div, %cond.false.us.us ], [ %conv7, %for.body3.us.us ]
+  %4 = load i32* @b, align 4
+  %cmp91.us.us = icmp slt i32 %4, 1
+  br i1 %cmp91.us.us, label %for.inc.lr.ph.us.us, label %for.cond2.loopexit.us.us
+
+for.cond2.loopexit.us.us:                         ; preds = %for.cond8.for.cond2.loopexit_crit_edge.us.us, %cond.end.us.us
+  br i1 true, label %for.cond2.for.inc13_crit_edge.us-lcssa.us.us-lcssa.us, label %for.body3.us.us
+
+for.inc.lr.ph.us.us:                              ; preds = %cond.end.us.us
+  br label %for.inc.us.us
+
+for.cond8.for.cond2.loopexit_crit_edge.us.us:     ; preds = %for.inc.us.us
+  %inc.lcssa.us.us = phi i32 [ %inc.us.us, %for.inc.us.us ]
+  store i32 %inc.lcssa.us.us, i32* @b, align 4
+  br label %for.cond2.loopexit.us.us
+
+for.inc.us.us:                                    ; preds = %for.inc.us.us, %for.inc.lr.ph.us.us
+  %5 = phi i32 [ %4, %for.inc.lr.ph.us.us ], [ %inc.us.us, %for.inc.us.us ]
+  %inc.us.us = add nsw i32 %5, 1
+  %cmp9.us.us = icmp slt i32 %inc.us.us, 1
+  br i1 %cmp9.us.us, label %for.inc.us.us, label %for.cond8.for.cond2.loopexit_crit_edge.us.us
+
+for.cond2.for.inc13_crit_edge.us-lcssa.us.us-lcssa.us: ; preds = %for.cond2.loopexit.us.us
+  %cond.lcssa.ph.us.ph.us = phi i32 [ %cond.us.us, %for.cond2.loopexit.us.us ]
+  br label %for.cond2.for.inc13_crit_edge.us-lcssa.us
+
+for.body3.lr.ph.split.us.split:                   ; preds = %for.body3.lr.ph.split.us.for.body3.lr.ph.split.us.split_crit_edge
+  br label %for.body3.us
+
+for.body3.us:                                     ; preds = %for.cond2.loopexit.us, %for.body3.lr.ph.split.us.split
+  br i1 true, label %cond.false.us, label %cond.end.us
+
+cond.false.us:                                    ; preds = %for.body3.us
+  br label %cond.end.us
+
+cond.end.us:                                      ; preds = %cond.false.us, %for.body3.us
+  %cond.us = phi i32 [ %div, %cond.false.us ], [ %conv7, %for.body3.us ]
+  %6 = load i32* @b, align 4
+  %cmp91.us = icmp slt i32 %6, 1
+  br i1 %cmp91.us, label %for.inc.lr.ph.us, label %for.cond2.loopexit.us
+
+for.inc.us:                                       ; preds = %for.inc.lr.ph.us, %for.inc.us
+  %7 = phi i32 [ %6, %for.inc.lr.ph.us ], [ %inc.us, %for.inc.us ]
+  %inc.us = add nsw i32 %7, 1
+  %cmp9.us = icmp slt i32 %inc.us, 1
+  br i1 %cmp9.us, label %for.inc.us, label %for.cond8.for.cond2.loopexit_crit_edge.us
+
+for.cond2.loopexit.us:                            ; preds = %for.cond8.for.cond2.loopexit_crit_edge.us, %cond.end.us
+  br i1 false, label %for.cond2.for.inc13_crit_edge.us-lcssa.us.us-lcssa, label %for.body3.us
+
+for.inc.lr.ph.us:                                 ; preds = %cond.end.us
+  br label %for.inc.us
+
+for.cond8.for.cond2.loopexit_crit_edge.us:        ; preds = %for.inc.us
+  %inc.lcssa.us = phi i32 [ %inc.us, %for.inc.us ]
+  store i32 %inc.lcssa.us, i32* @b, align 4
+  br label %for.cond2.loopexit.us
+
+for.cond2.for.inc13_crit_edge.us-lcssa.us.us-lcssa: ; preds = %for.cond2.loopexit.us
+  %cond.lcssa.ph.us.ph = phi i32 [ %cond.us, %for.cond2.loopexit.us ]
+  br label %for.cond2.for.inc13_crit_edge.us-lcssa.us
+
+for.cond2.for.inc13_crit_edge.us-lcssa.us:        ; preds = %for.cond2.for.inc13_crit_edge.us-lcssa.us.us-lcssa, %for.cond2.for.inc13_crit_edge.us-lcssa.us.us-lcssa.us
+  %cond.lcssa.ph.us = phi i32 [ %cond.lcssa.ph.us.ph, %for.cond2.for.inc13_crit_edge.us-lcssa.us.us-lcssa ], [ %cond.lcssa.ph.us.ph.us, %for.cond2.for.inc13_crit_edge.us-lcssa.us.us-lcssa.us ]
+  br label %for.cond2.for.inc13_crit_edge
+
+for.body3.lr.ph.split:                            ; preds = %for.body3.lr.ph.for.body3.lr.ph.split_crit_edge
+  br i1 %tobool, label %for.body3.lr.ph.split.split.us, label %for.body3.lr.ph.split.for.body3.lr.ph.split.split_crit_edge
+
+for.body3.lr.ph.split.for.body3.lr.ph.split.split_crit_edge: ; preds = %for.body3.lr.ph.split
+  br label %for.body3.lr.ph.split.split
+
+for.body3.lr.ph.split.split.us:                   ; preds = %for.body3.lr.ph.split
+  br label %for.body3.us3
+
+for.body3.us3:                                    ; preds = %for.cond2.loopexit.us11, %for.body3.lr.ph.split.split.us
+  br i1 false, label %cond.false.us4, label %cond.end.us5
+
+cond.false.us4:                                   ; preds = %for.body3.us3
+  br label %cond.end.us5
+
+cond.end.us5:                                     ; preds = %cond.false.us4, %for.body3.us3
+  %cond.us6 = phi i32 [ %div, %cond.false.us4 ], [ %conv7, %for.body3.us3 ]
+  %8 = load i32* @b, align 4
+  %cmp91.us7 = icmp slt i32 %8, 1
+  br i1 %cmp91.us7, label %for.inc.lr.ph.us12, label %for.cond2.loopexit.us11
+
+for.inc.us8:                                      ; preds = %for.inc.lr.ph.us12, %for.inc.us8
+  %9 = phi i32 [ %8, %for.inc.lr.ph.us12 ], [ %inc.us9, %for.inc.us8 ]
+  %inc.us9 = add nsw i32 %9, 1
+  %cmp9.us10 = icmp slt i32 %inc.us9, 1
+  br i1 %cmp9.us10, label %for.inc.us8, label %for.cond8.for.cond2.loopexit_crit_edge.us13
+
+for.cond2.loopexit.us11:                          ; preds = %for.cond8.for.cond2.loopexit_crit_edge.us13, %cond.end.us5
+  br i1 true, label %for.cond2.for.inc13_crit_edge.us-lcssa.us-lcssa.us, label %for.body3.us3
+
+for.inc.lr.ph.us12:                               ; preds = %cond.end.us5
+  br label %for.inc.us8
+
+for.cond8.for.cond2.loopexit_crit_edge.us13:      ; preds = %for.inc.us8
+  %inc.lcssa.us14 = phi i32 [ %inc.us9, %for.inc.us8 ]
+  store i32 %inc.lcssa.us14, i32* @b, align 4
+  br label %for.cond2.loopexit.us11
+
+for.cond2.for.inc13_crit_edge.us-lcssa.us-lcssa.us: ; preds = %for.cond2.loopexit.us11
+  %cond.lcssa.ph.ph.us = phi i32 [ %cond.us6, %for.cond2.loopexit.us11 ]
+  br label %for.cond2.for.inc13_crit_edge.us-lcssa
+
+for.body3.lr.ph.split.split:                      ; preds = %for.body3.lr.ph.split.for.body3.lr.ph.split.split_crit_edge
+  br label %for.body3
+
+for.cond8.for.cond2.loopexit_crit_edge:           ; preds = %for.inc
+  %inc.lcssa = phi i32 [ %inc, %for.inc ]
+  store i32 %inc.lcssa, i32* @b, align 4
+  br label %for.cond2.loopexit
+
+for.cond2.loopexit:                               ; preds = %cond.end, %for.cond8.for.cond2.loopexit_crit_edge
+  br i1 false, label %for.cond2.for.inc13_crit_edge.us-lcssa.us-lcssa, label %for.body3
+
+for.body3:                                        ; preds = %for.cond2.loopexit, %for.body3.lr.ph.split.split
+  br i1 false, label %cond.false, label %cond.end
+
+cond.false:                                       ; preds = %for.body3
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %for.body3
+  %cond = phi i32 [ %div, %cond.false ], [ %conv7, %for.body3 ]
+  %10 = load i32* @b, align 4
+  %cmp91 = icmp slt i32 %10, 1
+  br i1 %cmp91, label %for.inc.lr.ph, label %for.cond2.loopexit
+
+for.inc.lr.ph:                                    ; preds = %cond.end
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.inc, %for.inc.lr.ph
+  %11 = phi i32 [ %10, %for.inc.lr.ph ], [ %inc, %for.inc ]
+  %inc = add nsw i32 %11, 1
+  %cmp9 = icmp slt i32 %inc, 1
+  br i1 %cmp9, label %for.inc, label %for.cond8.for.cond2.loopexit_crit_edge
+
+for.cond2.for.inc13_crit_edge.us-lcssa.us-lcssa:  ; preds = %for.cond2.loopexit
+  %cond.lcssa.ph.ph = phi i32 [ %cond, %for.cond2.loopexit ]
+  br label %for.cond2.for.inc13_crit_edge.us-lcssa
+
+for.cond2.for.inc13_crit_edge.us-lcssa:           ; preds = %for.cond2.for.inc13_crit_edge.us-lcssa.us-lcssa, %for.cond2.for.inc13_crit_edge.us-lcssa.us-lcssa.us
+  %cond.lcssa.ph = phi i32 [ %cond.lcssa.ph.ph, %for.cond2.for.inc13_crit_edge.us-lcssa.us-lcssa ], [ %cond.lcssa.ph.ph.us, %for.cond2.for.inc13_crit_edge.us-lcssa.us-lcssa.us ]
+  br label %for.cond2.for.inc13_crit_edge
+
+for.cond2.for.inc13_crit_edge:                    ; preds = %for.cond2.for.inc13_crit_edge.us-lcssa, %for.cond2.for.inc13_crit_edge.us-lcssa.us
+  %cond.lcssa = phi i32 [ %cond.lcssa.ph, %for.cond2.for.inc13_crit_edge.us-lcssa ], [ %cond.lcssa.ph.us, %for.cond2.for.inc13_crit_edge.us-lcssa.us ]
+  store i32 %cond.lcssa, i32* @c, align 4
+  br label %for.inc13
+
+; CHECK: [[for_inc13]]:
+; CHECK-NEXT: %[[indvars_iv_next]] = add nuw nsw i32 %[[indvars_iv]], 1
+; CHECK-NEXT: %[[exitcond4:.*]] = icmp ne i32 %[[indvars_iv]], -1
+; CHECK-NEXT: br i1 %[[exitcond4]], label %[[for_cond2_preheader]], label %[[for_end15:.*]]
+for.inc13:                                        ; preds = %for.cond2.for.inc13_crit_edge, %for.cond2.preheader
+  %inc14 = add i8 %storemerge15, 1
+  %cmp = icmp ugt i8 %inc14, 50
+  br i1 %cmp, label %for.cond2.preheader, label %for.end15
+
+; CHECK: [[for_end15]]:
+; CHECK-NEXT: ret void
+for.end15:                                        ; preds = %for.inc13
+  ret void
+}
diff --git a/test/Transforms/IndVarSimplify/sharpen-range.ll b/test/Transforms/IndVarSimplify/sharpen-range.ll
new file mode 100644
index 0000000..6a9d352
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/sharpen-range.ll
@@ -0,0 +1,113 @@
+;; RUN: opt -S < %s -indvars | FileCheck %s
+
+;; Check if llvm can narrow !range metadata based on loop entry
+;; predicates.
+
+declare void @abort()
+
+define i1 @bounded_below_slt(i32* nocapture readonly %buffer) {
+; CHECK-LABEL: bounded_below_slt
+entry:
+  %length = load i32* %buffer, !range !0
+  %entry.pred = icmp eq i32 %length, 0
+  br i1 %entry.pred, label %abort, label %loop.preheader
+
+loop.preheader:
+  br label %loop
+
+loop:
+; CHECK: loop
+  %idx = phi i32 [ %idx.inc, %loop.next ], [ 0, %loop.preheader ]
+  %oob.pred = icmp slt i32 %idx, %length
+  br i1 %oob.pred, label %loop.next, label %oob
+; CHECK: br i1 true, label %loop.next, label %oob
+
+loop.next:
+; CHECK: loop.next
+  %idx.inc = add i32 %idx, 1
+  %exit.pred = icmp slt i32 %idx.inc, %length
+  br i1 %exit.pred, label %loop, label %abort.loopexit
+
+abort.loopexit:
+  br label %abort
+
+abort:
+  ret i1 false
+
+oob:
+  tail call void @abort()
+  ret i1 false
+}
+
+define i1 @bounded_below_sle(i32* nocapture readonly %buffer) {
+; CHECK-LABEL: bounded_below_sle
+entry:
+  %length = load i32* %buffer, !range !0
+  %entry.pred = icmp eq i32 %length, 0
+  br i1 %entry.pred, label %abort, label %loop.preheader
+
+loop.preheader:
+  br label %loop
+
+loop:
+; CHECK: loop
+  %idx = phi i32 [ %idx.inc, %loop.next ], [ 0, %loop.preheader ]
+  %oob.pred = icmp sle i32 %idx, %length
+  br i1 %oob.pred, label %loop.next, label %oob
+; CHECK: br i1 true, label %loop.next, label %oob
+
+loop.next:
+; CHECK: loop.next
+  %idx.inc = add i32 %idx, 1
+  %exit.pred = icmp sle i32 %idx.inc, %length
+  br i1 %exit.pred, label %loop, label %abort.loopexit
+
+abort.loopexit:
+  br label %abort
+
+abort:
+  ret i1 false
+
+oob:
+  tail call void @abort()
+  ret i1 false
+}
+
+;; Assert that we're not making an incorrect transform.
+
+declare i32 @check(i8*)
+
+define void @NoChange() {
+; CHECK-LABEL: NoChange
+entry:
+  br label %loop.begin
+
+loop.begin:
+; CHECK: loop.begin:
+  %i.01 = phi i64 [ 2, %entry ], [ %add, %loop.end ]
+  %cmp = icmp ugt i64 %i.01, 1
+; CHECK: %cmp = icmp ugt i64 %i.01, 1
+  br i1 %cmp, label %loop, label %loop.end
+
+loop:
+; CHECK: loop
+  %.sum = add i64 %i.01, -2
+  %v = getelementptr inbounds i8* null, i64 %.sum
+  %r = tail call i32 @check(i8* %v)
+  %c = icmp eq i32 %r, 0
+  br i1 %c, label %loop.end, label %abort.now
+
+abort.now:
+  tail call void @abort()
+  unreachable
+
+loop.end:
+  %add = add i64 %i.01, -1
+  %eq = icmp eq i64 %add, 0
+  br i1 %eq, label %exit, label %loop.begin
+
+exit:
+  ret void
+}
+
+!0 = metadata !{i32 0, i32 100}
diff --git a/test/Transforms/IndVarSimplify/use-range-metadata.ll b/test/Transforms/IndVarSimplify/use-range-metadata.ll
new file mode 100644
index 0000000..7ac4f11
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/use-range-metadata.ll
@@ -0,0 +1,37 @@
+;; RUN: opt -S < %s -indvars | FileCheck %s
+
+;; Check if IndVarSimplify understands !range metadata.
+
+declare void @abort()
+
+define i1 @iterate(i32* nocapture readonly %buffer) {
+entry:
+  %length = load i32* %buffer, !range !0
+  br label %loop.preheader
+
+loop.preheader:
+  br label %loop
+
+loop:
+  %idx = phi i32 [ %idx.inc, %loop.next ], [ 0, %loop.preheader ]
+  %oob.pred = icmp slt i32 %idx, %length
+  br i1 %oob.pred, label %loop.next, label %oob
+; CHECK: br i1 true, label %loop.next, label %oob
+
+loop.next:
+  %idx.inc = add i32 %idx, 1
+  %exit.pred = icmp slt i32 %idx.inc, %length
+  br i1 %exit.pred, label %loop, label %abort.loopexit
+
+abort.loopexit:
+  br label %abort
+
+abort:
+  ret i1 false
+
+oob:
+  tail call void @abort()
+  ret i1 false
+}
+
+!0 = metadata !{i32 1, i32 100}
diff --git a/test/Transforms/IndVarSimplify/verify-scev.ll b/test/Transforms/IndVarSimplify/verify-scev.ll
index 019f583..b9ce3d6 100644
--- a/test/Transforms/IndVarSimplify/verify-scev.ll
+++ b/test/Transforms/IndVarSimplify/verify-scev.ll
@@ -380,11 +380,11 @@ for.body48:                                       ; preds = %for.inc221, %for.bo
 
 for.body65.lr.ph:                                 ; preds = %for.body48
   %0 = load i32* undef, align 4
+  %1 = sext i32 %0 to i64
   br label %for.body65.us
 
 for.body65.us:                                    ; preds = %for.inc219.us, %for.body65.lr.ph
-  %k.09.us = phi i32 [ %inc.us, %for.inc219.us ], [ 1, %for.body65.lr.ph ]
-  %idxprom66.us = sext i32 %k.09.us to i64
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc219.us ], [ 1, %for.body65.lr.ph ]
   br i1 undef, label %for.inc219.us, label %if.end72.us
 
 if.end72.us:                                      ; preds = %for.body65.us
@@ -406,8 +406,8 @@ for.cond152.us:                                   ; preds = %for.cond152.us, %fo
   br i1 undef, label %for.cond139.loopexit.us, label %for.cond152.us
 
 for.inc219.us:                                    ; preds = %for.cond139.loopexit.us, %if.end110.us, %if.end93.us, %for.body65.us
-  %inc.us = add nsw i32 %k.09.us, 1
-  %cmp64.us = icmp sgt i32 %inc.us, %0
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %cmp64.us = icmp sgt i64 %indvars.iv.next, %1
   br i1 %cmp64.us, label %for.inc221, label %for.body65.us
 
 for.cond139.loopexit.us:                          ; preds = %for.cond152.us
diff --git a/test/Transforms/IndVarSimplify/widen-loop-comp.ll b/test/Transforms/IndVarSimplify/widen-loop-comp.ll
new file mode 100644
index 0000000..0930a0c
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/widen-loop-comp.ll
@@ -0,0 +1,191 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+target triple = "aarch64--linux-gnu"
+
+; Check the loop exit i32 compare instruction and operand are widened to i64
+; instead of truncating IV before its use in the i32 compare instruction.
+
+@idx = common global i32 0, align 4
+@e = common global i32 0, align 4
+@ptr = common global i32* null, align 8
+
+; CHECK-LABEL: @test1
+; CHECK: for.body.lr.ph:
+; CHECK: sext i32
+; CHECK: for.cond:
+; CHECK: icmp slt i64
+; CHECK: for.body:
+; CHECK: phi i64
+
+define i32 @test1() {
+entry:
+  store i32 -1, i32* @idx, align 4
+  %0 = load i32* @e, align 4
+  %cmp4 = icmp slt i32 %0, 0
+  br i1 %cmp4, label %for.end.loopexit, label %for.body.lr.ph
+
+for.body.lr.ph:
+  %1 = load i32** @ptr, align 8
+  %2 = load i32* @e, align 4
+  br label %for.body
+
+for.cond:
+  %inc = add nsw i32 %i.05, 1
+  %cmp = icmp slt i32 %i.05, %2
+  br i1 %cmp, label %for.body, label %for.cond.for.end.loopexit_crit_edge
+
+for.body:
+  %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.cond ]
+  %idxprom = sext i32 %i.05 to i64
+  %arrayidx = getelementptr inbounds i32* %1, i64 %idxprom
+  %3 = load i32* %arrayidx, align 4
+  %tobool = icmp eq i32 %3, 0
+  br i1 %tobool, label %if.then, label %for.cond
+
+if.then:
+  %i.05.lcssa = phi i32 [ %i.05, %for.body ]
+  store i32 %i.05.lcssa, i32* @idx, align 4
+  br label %for.end
+
+for.cond.for.end.loopexit_crit_edge:
+  br label %for.end.loopexit
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  %4 = load i32* @idx, align 4
+  ret i32 %4
+}
+
+; CHECK-LABEL: @test2
+; CHECK: for.body4.us
+; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+; CHECK: %cmp2.us = icmp slt i64
+; CHECK-NOT: %2 = trunc i64 %indvars.iv.next to i32
+; CHECK-NOT: %cmp2.us = icmp slt i32
+
+define void @test2([8 x i8]* %a, i8* %b, i8 %limit) {
+entry:
+  %conv = zext i8 %limit to i32
+  br i1 undef, label %for.cond1.preheader, label %for.cond1.preheader.us
+
+for.cond1.preheader.us:
+  %storemerge5.us = phi i32 [ 0, %entry ], [ %inc14.us, %for.inc13.us ]
+  br i1 true, label %for.body4.lr.ph.us, label %for.inc13.us
+
+for.inc13.us:
+  %inc14.us = add nsw i32 %storemerge5.us, 1
+  %cmp.us = icmp slt i32 %inc14.us, 4
+  br i1 %cmp.us, label %for.cond1.preheader.us, label %for.end
+
+for.body4.us:
+  %storemerge14.us = phi i32 [ 0, %for.body4.lr.ph.us ], [ %inc.us, %for.body4.us ]
+  %idxprom.us = sext i32 %storemerge14.us to i64
+  %arrayidx6.us = getelementptr inbounds [8 x i8]* %a, i64 %idxprom5.us, i64 %idxprom.us
+  %0 = load i8* %arrayidx6.us, align 1
+  %idxprom7.us = zext i8 %0 to i64
+  %arrayidx8.us = getelementptr inbounds i8* %b, i64 %idxprom7.us
+  %1 = load i8* %arrayidx8.us, align 1
+  store i8 %1, i8* %arrayidx6.us, align 1
+  %inc.us = add nsw i32 %storemerge14.us, 1
+  %cmp2.us = icmp slt i32 %inc.us, %conv
+  br i1 %cmp2.us, label %for.body4.us, label %for.inc13.us
+
+for.body4.lr.ph.us:
+  %idxprom5.us = sext i32 %storemerge5.us to i64
+  br label %for.body4.us
+
+for.cond1.preheader:
+  %storemerge5 = phi i32 [ 0, %entry ], [ %inc14, %for.inc13 ]
+  br i1 false, label %for.inc13, label %for.inc13
+
+for.inc13:
+  %inc14 = add nsw i32 %storemerge5, 1
+  %cmp = icmp slt i32 %inc14, 4
+  br i1 %cmp, label %for.cond1.preheader, label %for.end
+
+for.end:
+  ret void
+}
+
+; CHECK-LABEL: @test3
+; CHECK: sext i32 %b
+; CHECK: for.cond:
+; CHECK: phi i64
+; CHECK: icmp slt i64
+
+define i32 @test3(i32* %a, i32 %b) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %sum.0 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp slt i32 %i.0, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  %idxprom = sext i32 %i.0 to i64
+  %arrayidx = getelementptr inbounds i32* %a, i64 %idxprom
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %sum.0, %0
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  ret i32 %sum.0
+}
+
+declare i32 @fn1(i8 signext)
+
+; PR21030
+; CHECK-LABEL: @test4
+; CHECK: for.body:
+; CHECK: phi i32
+; CHECK: icmp sgt i8
+
+define i32 @test4(i32 %a) {
+entry:
+  br label %for.body
+
+for.body:
+  %c.07 = phi i8 [ -3, %entry ], [ %dec, %for.body ]
+  %conv6 = zext i8 %c.07 to i32
+  %or = or i32 %a, %conv6
+  %conv3 = trunc i32 %or to i8
+  %call = call i32 @fn1(i8 signext %conv3)
+  %dec = add i8 %c.07, -1
+  %cmp = icmp sgt i8 %dec, -14
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret i32 0
+}
+
+; CHECK-LABEL: @test5
+; CHECK: zext i32 %b
+; CHECK: for.cond:
+; CHECK: phi i64
+; CHECK: icmp ule i64
+
+define i32 @test5(i32* %a, i32 %b) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %sum.0 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp ule i32 %i.0, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  %idxprom = zext i32 %i.0 to i64
+  %arrayidx = getelementptr inbounds i32* %a, i64 %idxprom
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %sum.0, %0
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  ret i32 %sum.0
+}