2 files changed, 46 insertions, 36 deletions
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 069c3fc..5047c66 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -6398,13 +6398,6 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
   if (!AddRec || AddRec->getLoop() != L)
     return getCouldNotCompute();
 
-  // Check to see if we have a flag which makes analysis easy.
-  bool NoWrap = false;
-  if (!IsSubExpr) {
-    NoWrap = AddRec->getNoWrapFlags(
-      (SCEV::NoWrapFlags)(((isSigned ? SCEV::FlagNSW : SCEV::FlagNUW))
-                          | SCEV::FlagNW));
-  }
   if (AddRec->isAffine()) {
     unsigned BitWidth = getTypeSizeInBits(AddRec->getType());
     const SCEV *Step = AddRec->getStepRecurrence(*this);
@@ -6414,20 +6407,21 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
     if (Step->isOne()) {
       // With unit stride, the iteration never steps past the limit value.
     } else if (isKnownPositive(Step)) {
-      // Test whether a positive iteration can step past the limit
-      // value and past the maximum value for its type in a single step.
-      // Note that it's not sufficient to check NoWrap here, because even
-      // though the value after a wrap is undefined, it's not undefined
-      // behavior, so if wrap does occur, the loop could either terminate or
-      // loop infinitely, but in either case, the loop is guaranteed to
-      // iterate at least until the iteration where the wrapping occurs.
+      // Test whether a positive iteration can step past the limit value and
+      // past the maximum value for its type in a single step. The NSW/NUW flags
+      // can imply that stepping past RHS would immediately result in undefined
+      // behavior. No self-wrap is not useful here because the loop counter may
+      // signed or unsigned wrap but continue iterating and terminate with
+      // defined behavior without ever self-wrapping.
       const SCEV *One = getConstant(Step->getType(), 1);
       if (isSigned) {
-        APInt Max = APInt::getSignedMaxValue(BitWidth);
-        if ((Max - getSignedRange(getMinusSCEV(Step, One)).getSignedMax())
+        if (!AddRec->getNoWrapFlags(SCEV::FlagNSW)) {
+          APInt Max = APInt::getSignedMaxValue(BitWidth);
+          if ((Max - getSignedRange(getMinusSCEV(Step, One)).getSignedMax())
               .slt(getSignedRange(RHS).getSignedMax()))
-          return getCouldNotCompute();
-      } else {
+            return getCouldNotCompute();
+        }
+      } else if (!AddRec->getNoWrapFlags(SCEV::FlagNUW)){
         APInt Max = APInt::getMaxValue(BitWidth);
         if ((Max - getUnsignedRange(getMinusSCEV(Step, One)).getUnsignedMax())
               .ult(getUnsignedRange(RHS).getUnsignedMax()))
@@ -6481,6 +6475,15 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
                   getMinusSCEV(getConstant(APInt::getMaxValue(BitWidth)),
                                StepMinusOne));
 
+    // If the loop counter does not self-wrap, then the trip count may be
+    // computed by dividing the distance by the step. This is independent of
+    // signed or unsigned wrap.
+    bool NoWrap = false;
+    if (!IsSubExpr) {
+      NoWrap = AddRec->getNoWrapFlags(
+        (SCEV::NoWrapFlags)(((isSigned ? SCEV::FlagNSW : SCEV::FlagNUW))
+                            | SCEV::FlagNW));
+    }
     // Finally, we subtract these two values and divide, rounding up, to get
     // the number of times the backedge is executed.
     const SCEV *BECount = getBECount(Start, End, Step, NoWrap);
diff --git a/test/Analysis/ScalarEvolution/trip-count9.ll b/test/Analysis/ScalarEvolution/trip-count9.ll
index 9180f2b..85d4050 100644
--- a/test/Analysis/ScalarEvolution/trip-count9.ll
+++ b/test/Analysis/ScalarEvolution/trip-count9.ll
@@ -25,8 +25,8 @@ exit:
 }
 
 ; CHECK: Determining loop execution counts for: @step2
-; CHECK: Loop %loop: Unpredictable backedge-taken count. 
-; CHECK: Loop %loop: Unpredictable max backedge-taken count. 
+; CHECK: Loop %loop: Unpredictable backedge-taken count.
+; CHECK: Loop %loop: Unpredictable max backedge-taken count.
 define void @step2(i4 %n) {
 entry:
   %s = icmp sgt i4 %n, 0
@@ -57,8 +57,8 @@ exit:
 }
 
 ; CHECK: Determining loop execution counts for: @start1_step2
-; CHECK: Loop %loop: Unpredictable backedge-taken count. 
-; CHECK: Loop %loop: Unpredictable max backedge-taken count. 
+; CHECK: Loop %loop: Unpredictable backedge-taken count.
+; CHECK: Loop %loop: Unpredictable max backedge-taken count.
 define void @start1_step2(i4 %n) {
 entry:
   %s = icmp sgt i4 %n, 0
@@ -89,8 +89,8 @@ exit:
 }
 
 ; CHECK: Determining loop execution counts for: @startx_step2
-; CHECK: Loop %loop: Unpredictable backedge-taken count. 
-; CHECK: Loop %loop: Unpredictable max backedge-taken count. 
+; CHECK: Loop %loop: Unpredictable backedge-taken count.
+; CHECK: Loop %loop: Unpredictable max backedge-taken count.
 define void @startx_step2(i4 %n, i4 %x) {
 entry:
   %s = icmp sgt i4 %n, 0
@@ -120,12 +120,18 @@ exit:
   ret void
 }
 
-; Be careful with this one. If %n is INT4_MAX, %i.next will wrap. The nsw bit
-; says that the result is undefined, but ScalarEvolution must respect that
-; subsequent passes may result the undefined behavior in predictable ways.
+; If %n is INT4_MAX, %i.next will wrap. The nsw bit says that the
+; result is undefined. Therefore, after the loop's second iteration,
+; we are free to assume that the loop exits. This is valid because:
+; (a) %i.next is a poison value after the second iteration, which can
+; also be considered an undef value.
+; (b) the return instruction enacts a side effect that is control
+; dependent on the poison value.
+;
+; CHECK-LABEL: nsw_step2
 ; CHECK: Determining loop execution counts for: @nsw_step2
-; CHECK: Loop %loop: Unpredictable backedge-taken count. 
-; CHECK: Loop %loop: Unpredictable max backedge-taken count. 
+; CHECK: Loop %loop: backedge-taken count is ((-1 + %n) /u 2)
+; CHECK: Loop %loop: max backedge-taken count is 2
 define void @nsw_step2(i4 %n) {
 entry:
   %s = icmp sgt i4 %n, 0
@@ -139,6 +145,7 @@ exit:
   ret void
 }
 
+; CHECK-LABEL: nsw_start1
 ; CHECK: Determining loop execution counts for: @nsw_start1
 ; CHECK: Loop %loop: backedge-taken count is (-2 + (2 smax %n))
 ; CHECK: Loop %loop: max backedge-taken count is 5
@@ -156,8 +163,8 @@ exit:
 }
 
 ; CHECK: Determining loop execution counts for: @nsw_start1_step2
-; CHECK: Loop %loop: Unpredictable backedge-taken count. 
-; CHECK: Loop %loop: Unpredictable max backedge-taken count. 
+; CHECK: Loop %loop: backedge-taken count is ((-2 + (3 smax %n)) /u 2)
+; CHECK: Loop %loop: max backedge-taken count is 2
 define void @nsw_start1_step2(i4 %n) {
 entry:
   %s = icmp sgt i4 %n, 0
@@ -188,8 +195,8 @@ exit:
 }
 
 ; CHECK: Determining loop execution counts for: @nsw_startx_step2
-; CHECK: Loop %loop: Unpredictable backedge-taken count. 
-; CHECK: Loop %loop: Unpredictable max backedge-taken count. 
+; CHECK: Loop %loop: backedge-taken count is ((-1 + (-1 * %x) + ((2 + %x) smax %n)) /u 2)
+; CHECK: Loop %loop: max backedge-taken count is 7
 define void @nsw_startx_step2(i4 %n, i4 %x) {
 entry:
   %s = icmp sgt i4 %n, 0
@@ -221,7 +228,7 @@ exit:
 }
 
 ; CHECK: Determining loop execution counts for: @even_step2
-; CHECK: Loop %loop: Unpredictable backedge-taken count. 
+; CHECK: Loop %loop: Unpredictable backedge-taken count.
 ; CHECK: Loop %loop: max backedge-taken count is 2
 define void @even_step2(i4 %n) {
 entry:
@@ -255,7 +262,7 @@ exit:
 }
 
 ; CHECK: Determining loop execution counts for: @even_start1_step2
-; CHECK: Loop %loop: Unpredictable backedge-taken count. 
+; CHECK: Loop %loop: Unpredictable backedge-taken count.
 ; CHECK: Loop %loop: max backedge-taken count is 2
 define void @even_start1_step2(i4 %n) {
 entry:
@@ -289,7 +296,7 @@ exit:
 }
 
 ; CHECK: Determining loop execution counts for: @even_startx_step2
-; CHECK: Loop %loop: Unpredictable backedge-taken count. 
+; CHECK: Loop %loop: Unpredictable backedge-taken count.
 ; CHECK: Loop %loop: max backedge-taken count is 7
 define void @even_startx_step2(i4 %n, i4 %x) {
 entry: