SCEV should use NSW to get trip count for positive nonunit stride loops.

SCEV currently fails to compute loop counts for nonunit stride loops. This comes up frequently. It prevents loop optimization and forces vectorization to insert extra loop checks. For example: void foo(int n, int *x) { for (int i = 0; i < n; i += 3) { x[i] = i; x[i+1] = i+1; x[i+2] = i+2; } } We need to properly handle the case in which limit > INT_MAX-stride. In the above case: n > INT_MAX-3. In this case the loop counter will step beyond the limit and overflow at the same time. However, knowing that signed integer overlow in undefined, we can assume the loop test behavior is arbitrary after overflow. This obeys both C undefined behavior rules, and the more strict LLVM poison value rules. I'm finally fixing this in response to Hal Finkel's persistence. The most probable reason that we never optimized this before is that we were being careful to handle case where the developer expected a side-effect free infinite loop relying on overflow: for (int i = 0; i < n; i += s) { ++j; } return j; If INT_MAX+1 is a multiple of s and n > INT_MAX-s, then we might expect an infinite loop. However there are plenty of ways to achieve this effect without relying on undefined behavior of signed overflow. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193015 91177308-0d34-0410-b5e6-96231b3b80d8
author: Andrew Trick <atrick@apple.com> 2013-10-18 23:43:53 +0000
committer: Andrew Trick <atrick@apple.com> 2013-10-18 23:43:53 +0000
commit: a5c5bc994875d34ec465b86c7539edd19dd95f22 (patch)
tree: 8451d37155f21e351653a6691cad46c3055fea5f
parent: 4216b993caf5a5bb24c5308eda32242fd06aaaca (diff)
download: external_llvm-a5c5bc994875d34ec465b86c7539edd19dd95f22.zip
external_llvm-a5c5bc994875d34ec465b86c7539edd19dd95f22.tar.gz
external_llvm-a5c5bc994875d34ec465b86c7539edd19dd95f22.tar.bz2
2 files changed, 46 insertions, 36 deletions
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 069c3fc..5047c66 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -6398,13 +6398,6 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
   if (!AddRec || AddRec->getLoop() != L)
     return getCouldNotCompute();
 
-  // Check to see if we have a flag which makes analysis easy.
-  bool NoWrap = false;
-  if (!IsSubExpr) {
-    NoWrap = AddRec->getNoWrapFlags(
-      (SCEV::NoWrapFlags)(((isSigned ? SCEV::FlagNSW : SCEV::FlagNUW))
-                          | SCEV::FlagNW));
-  }
   if (AddRec->isAffine()) {
     unsigned BitWidth = getTypeSizeInBits(AddRec->getType());
     const SCEV *Step = AddRec->getStepRecurrence(*this);
@@ -6414,20 +6407,21 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
     if (Step->isOne()) {
       // With unit stride, the iteration never steps past the limit value.
     } else if (isKnownPositive(Step)) {
-      // Test whether a positive iteration can step past the limit
-      // value and past the maximum value for its type in a single step.
-      // Note that it's not sufficient to check NoWrap here, because even
-      // though the value after a wrap is undefined, it's not undefined
-      // behavior, so if wrap does occur, the loop could either terminate or
-      // loop infinitely, but in either case, the loop is guaranteed to
-      // iterate at least until the iteration where the wrapping occurs.
+      // Test whether a positive iteration can step past the limit value and
+      // past the maximum value for its type in a single step. The NSW/NUW flags
+      // can imply that stepping past RHS would immediately result in undefined
+      // behavior. No self-wrap is not useful here because the loop counter may
+      // signed or unsigned wrap but continue iterating and terminate with
+      // defined behavior without ever self-wrapping.
       const SCEV *One = getConstant(Step->getType(), 1);
       if (isSigned) {
-        APInt Max = APInt::getSignedMaxValue(BitWidth);
-        if ((Max - getSignedRange(getMinusSCEV(Step, One)).getSignedMax())
+        if (!AddRec->getNoWrapFlags(SCEV::FlagNSW)) {
+          APInt Max = APInt::getSignedMaxValue(BitWidth);
+          if ((Max - getSignedRange(getMinusSCEV(Step, One)).getSignedMax())
               .slt(getSignedRange(RHS).getSignedMax()))
-          return getCouldNotCompute();
-      } else {
+            return getCouldNotCompute();
+        }
+      } else if (!AddRec->getNoWrapFlags(SCEV::FlagNUW)){
         APInt Max = APInt::getMaxValue(BitWidth);
         if ((Max - getUnsignedRange(getMinusSCEV(Step, One)).getUnsignedMax())
               .ult(getUnsignedRange(RHS).getUnsignedMax()))
@@ -6481,6 +6475,15 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
                   getMinusSCEV(getConstant(APInt::getMaxValue(BitWidth)),
                                StepMinusOne));
 
+    // If the loop counter does not self-wrap, then the trip count may be
+    // computed by dividing the distance by the step. This is independent of
+    // signed or unsigned wrap.
+    bool NoWrap = false;
+    if (!IsSubExpr) {
+      NoWrap = AddRec->getNoWrapFlags(
+        (SCEV::NoWrapFlags)(((isSigned ? SCEV::FlagNSW : SCEV::FlagNUW))
+                            | SCEV::FlagNW));
+    }
     // Finally, we subtract these two values and divide, rounding up, to get
     // the number of times the backedge is executed.
     const SCEV *BECount = getBECount(Start, End, Step, NoWrap);
diff --git a/test/Analysis/ScalarEvolution/trip-count9.ll b/test/Analysis/ScalarEvolution/trip-count9.ll
index 9180f2b..85d4050 100644
--- a/test/Analysis/ScalarEvolution/trip-count9.ll
+++ b/test/Analysis/ScalarEvolution/trip-count9.ll
@@ -25,8 +25,8 @@ exit:
 }
 
 ; CHECK: Determining loop execution counts for: @step2
-; CHECK: Loop %loop: Unpredictable backedge-taken count. 
-; CHECK: Loop %loop: Unpredictable max backedge-taken count. 
+; CHECK: Loop %loop: Unpredictable backedge-taken count.
+; CHECK: Loop %loop: Unpredictable max backedge-taken count.
 define void @step2(i4 %n) {
 entry:
   %s = icmp sgt i4 %n, 0
@@ -57,8 +57,8 @@ exit:
 }
 
 ; CHECK: Determining loop execution counts for: @start1_step2
-; CHECK: Loop %loop: Unpredictable backedge-taken count. 
-; CHECK: Loop %loop: Unpredictable max backedge-taken count. 
+; CHECK: Loop %loop: Unpredictable backedge-taken count.
+; CHECK: Loop %loop: Unpredictable max backedge-taken count.
 define void @start1_step2(i4 %n) {
 entry:
   %s = icmp sgt i4 %n, 0
@@ -89,8 +89,8 @@ exit:
 }
 
 ; CHECK: Determining loop execution counts for: @startx_step2
-; CHECK: Loop %loop: Unpredictable backedge-taken count. 
-; CHECK: Loop %loop: Unpredictable max backedge-taken count. 
+; CHECK: Loop %loop: Unpredictable backedge-taken count.
+; CHECK: Loop %loop: Unpredictable max backedge-taken count.
 define void @startx_step2(i4 %n, i4 %x) {
 entry:
   %s = icmp sgt i4 %n, 0
@@ -120,12 +120,18 @@ exit:
   ret void
 }
 
-; Be careful with this one. If %n is INT4_MAX, %i.next will wrap. The nsw bit
-; says that the result is undefined, but ScalarEvolution must respect that
-; subsequent passes may result the undefined behavior in predictable ways.
+; If %n is INT4_MAX, %i.next will wrap. The nsw bit says that the
+; result is undefined. Therefore, after the loop's second iteration,
+; we are free to assume that the loop exits. This is valid because:
+; (a) %i.next is a poison value after the second iteration, which can
+; also be considered an undef value.
+; (b) the return instruction enacts a side effect that is control
+; dependent on the poison value.
+;
+; CHECK-LABEL: nsw_step2
 ; CHECK: Determining loop execution counts for: @nsw_step2
-; CHECK: Loop %loop: Unpredictable backedge-taken count. 
-; CHECK: Loop %loop: Unpredictable max backedge-taken count. 
+; CHECK: Loop %loop: backedge-taken count is ((-1 + %n) /u 2)
+; CHECK: Loop %loop: max backedge-taken count is 2
 define void @nsw_step2(i4 %n) {
 entry:
   %s = icmp sgt i4 %n, 0
@@ -139,6 +145,7 @@ exit:
   ret void
 }
 
+; CHECK-LABEL: nsw_start1
 ; CHECK: Determining loop execution counts for: @nsw_start1
 ; CHECK: Loop %loop: backedge-taken count is (-2 + (2 smax %n))
 ; CHECK: Loop %loop: max backedge-taken count is 5
@@ -156,8 +163,8 @@ exit:
 }
 
 ; CHECK: Determining loop execution counts for: @nsw_start1_step2
-; CHECK: Loop %loop: Unpredictable backedge-taken count. 
-; CHECK: Loop %loop: Unpredictable max backedge-taken count. 
+; CHECK: Loop %loop: backedge-taken count is ((-2 + (3 smax %n)) /u 2)
+; CHECK: Loop %loop: max backedge-taken count is 2
 define void @nsw_start1_step2(i4 %n) {
 entry:
   %s = icmp sgt i4 %n, 0
@@ -188,8 +195,8 @@ exit:
 }
 
 ; CHECK: Determining loop execution counts for: @nsw_startx_step2
-; CHECK: Loop %loop: Unpredictable backedge-taken count. 
-; CHECK: Loop %loop: Unpredictable max backedge-taken count. 
+; CHECK: Loop %loop: backedge-taken count is ((-1 + (-1 * %x) + ((2 + %x) smax %n)) /u 2)
+; CHECK: Loop %loop: max backedge-taken count is 7
 define void @nsw_startx_step2(i4 %n, i4 %x) {
 entry:
   %s = icmp sgt i4 %n, 0
@@ -221,7 +228,7 @@ exit:
 }
 
 ; CHECK: Determining loop execution counts for: @even_step2
-; CHECK: Loop %loop: Unpredictable backedge-taken count. 
+; CHECK: Loop %loop: Unpredictable backedge-taken count.
 ; CHECK: Loop %loop: max backedge-taken count is 2
 define void @even_step2(i4 %n) {
 entry:
@@ -255,7 +262,7 @@ exit:
 }
 
 ; CHECK: Determining loop execution counts for: @even_start1_step2
-; CHECK: Loop %loop: Unpredictable backedge-taken count. 
+; CHECK: Loop %loop: Unpredictable backedge-taken count.
 ; CHECK: Loop %loop: max backedge-taken count is 2
 define void @even_start1_step2(i4 %n) {
 entry:
@@ -289,7 +296,7 @@ exit:
 }
 
 ; CHECK: Determining loop execution counts for: @even_startx_step2
-; CHECK: Loop %loop: Unpredictable backedge-taken count. 
+; CHECK: Loop %loop: Unpredictable backedge-taken count.
 ; CHECK: Loop %loop: max backedge-taken count is 7
 define void @even_startx_step2(i4 %n, i4 %x) {
 entry:
author	Andrew Trick <atrick@apple.com>	2013-10-18 23:43:53 +0000
committer	Andrew Trick <atrick@apple.com>	2013-10-18 23:43:53 +0000
commit	a5c5bc994875d34ec465b86c7539edd19dd95f22 (patch)
tree	8451d37155f21e351653a6691cad46c3055fea5f
parent	4216b993caf5a5bb24c5308eda32242fd06aaaca (diff)
download	external_llvm-a5c5bc994875d34ec465b86c7539edd19dd95f22.zip external_llvm-a5c5bc994875d34ec465b86c7539edd19dd95f22.tar.gz external_llvm-a5c5bc994875d34ec465b86c7539edd19dd95f22.tar.bz2