6 files changed, 191 insertions, 181 deletions
diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h
index cc9f9cc..4286ec3 100644
--- a/include/llvm/Analysis/ScalarEvolution.h
+++ b/include/llvm/Analysis/ScalarEvolution.h
@@ -334,6 +334,12 @@ namespace llvm {
     /// found.
     BasicBlock* getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB);
 
+    /// isNecessaryCond - Test whether the given CondValue value is a condition
+    /// which is at least as strict as the one described by Pred, LHS, and RHS.
+    bool isNecessaryCond(Value *Cond, ICmpInst::Predicate Pred,
+                         const SCEV *LHS, const SCEV *RHS,
+                         bool Inverse);
+
     /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
     /// in the header of its containing loop, we know the loop executes a
     /// constant number of times, and the PHI node is just a recurrence
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 436b79d..d1f6679 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -2813,7 +2813,6 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
   const SCEV* BECount = CouldNotCompute;
   const SCEV* MaxBECount = CouldNotCompute;
   bool CouldNotComputeBECount = false;
-  bool CouldNotComputeMaxBECount = false;
   for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
     BackedgeTakenInfo NewBTI =
       ComputeBackedgeTakenCountFromExit(L, ExitingBlocks[i]);
@@ -2826,25 +2825,13 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
     } else if (!CouldNotComputeBECount) {
       if (BECount == CouldNotCompute)
         BECount = NewBTI.Exact;
-      else {
-        // TODO: More analysis could be done here. For example, a
-        // loop with a short-circuiting && operator has an exact count
-        // of the min of both sides.
-        CouldNotComputeBECount = true;
-        BECount = CouldNotCompute;
-      }
-    }
-    if (NewBTI.Max == CouldNotCompute) {
-      // We couldn't compute an maximum value for this exit, so
-      // we won't be able to compute an maximum value for the loop.
-      CouldNotComputeMaxBECount = true;
-      MaxBECount = CouldNotCompute;
-    } else if (!CouldNotComputeMaxBECount) {
-      if (MaxBECount == CouldNotCompute)
-        MaxBECount = NewBTI.Max;
       else
-        MaxBECount = getUMaxFromMismatchedTypes(MaxBECount, NewBTI.Max);
+        BECount = getUMinFromMismatchedTypes(BECount, NewBTI.Exact);
     }
+    if (MaxBECount == CouldNotCompute)
+      MaxBECount = NewBTI.Max;
+    else if (NewBTI.Max != CouldNotCompute)
+      MaxBECount = getUMinFromMismatchedTypes(MaxBECount, NewBTI.Max);
   }
 
   return BackedgeTakenInfo(BECount, MaxBECount);
@@ -2925,9 +2912,7 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L,
                                                        Value *ExitCond,
                                                        BasicBlock *TBB,
                                                        BasicBlock *FBB) {
-  // Check if the controlling expression for this loop is an and or or. In
-  // such cases, an exact backedge-taken count may be infeasible, but a
-  // maximum count may still be feasible.
+  // Check if the controlling expression for this loop is an And or Or.
   if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) {
     if (BO->getOpcode() == Instruction::And) {
       // Recurse on the operands of the and.
@@ -3899,88 +3884,111 @@ bool ScalarEvolution::isLoopGuardedByCond(const Loop *L,
         LoopEntryPredicate->isUnconditional())
       continue;
 
-    ICmpInst *ICI = dyn_cast<ICmpInst>(LoopEntryPredicate->getCondition());
-    if (!ICI) continue;
+    if (isNecessaryCond(LoopEntryPredicate->getCondition(), Pred, LHS, RHS,
+                        LoopEntryPredicate->getSuccessor(0) != PredecessorDest))
+      return true;
+  }
 
-    // Now that we found a conditional branch that dominates the loop, check to
-    // see if it is the comparison we are looking for.
-    Value *PreCondLHS = ICI->getOperand(0);
-    Value *PreCondRHS = ICI->getOperand(1);
-    ICmpInst::Predicate Cond;
-    if (LoopEntryPredicate->getSuccessor(0) == PredecessorDest)
-      Cond = ICI->getPredicate();
-    else
-      Cond = ICI->getInversePredicate();
+  return false;
+}
 
-    if (Cond == Pred)
-      ; // An exact match.
-    else if (!ICmpInst::isTrueWhenEqual(Cond) && Pred == ICmpInst::ICMP_NE)
-      ; // The actual condition is beyond sufficient.
-    else
-      // Check a few special cases.
-      switch (Cond) {
-      case ICmpInst::ICMP_UGT:
-        if (Pred == ICmpInst::ICMP_ULT) {
-          std::swap(PreCondLHS, PreCondRHS);
-          Cond = ICmpInst::ICMP_ULT;
-          break;
-        }
-        continue;
-      case ICmpInst::ICMP_SGT:
-        if (Pred == ICmpInst::ICMP_SLT) {
-          std::swap(PreCondLHS, PreCondRHS);
-          Cond = ICmpInst::ICMP_SLT;
+/// isNecessaryCond - Test whether the given CondValue value is a condition
+/// which is at least as strict as the one described by Pred, LHS, and RHS.
+bool ScalarEvolution::isNecessaryCond(Value *CondValue,
+                                      ICmpInst::Predicate Pred,
+                                      const SCEV *LHS, const SCEV *RHS,
+                                      bool Inverse) {
+  // Recursivly handle And and Or conditions.
+  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CondValue)) {
+    if (BO->getOpcode() == Instruction::And) {
+      if (!Inverse)
+        return isNecessaryCond(BO->getOperand(0), Pred, LHS, RHS, Inverse) ||
+               isNecessaryCond(BO->getOperand(1), Pred, LHS, RHS, Inverse);
+    } else if (BO->getOpcode() == Instruction::Or) {
+      if (Inverse)
+        return isNecessaryCond(BO->getOperand(0), Pred, LHS, RHS, Inverse) ||
+               isNecessaryCond(BO->getOperand(1), Pred, LHS, RHS, Inverse);
+    }
+  }
+
+  ICmpInst *ICI = dyn_cast<ICmpInst>(CondValue);
+  if (!ICI) return false;
+
+  // Now that we found a conditional branch that dominates the loop, check to
+  // see if it is the comparison we are looking for.
+  Value *PreCondLHS = ICI->getOperand(0);
+  Value *PreCondRHS = ICI->getOperand(1);
+  ICmpInst::Predicate Cond;
+  if (Inverse)
+    Cond = ICI->getInversePredicate();
+  else
+    Cond = ICI->getPredicate();
+
+  if (Cond == Pred)
+    ; // An exact match.
+  else if (!ICmpInst::isTrueWhenEqual(Cond) && Pred == ICmpInst::ICMP_NE)
+    ; // The actual condition is beyond sufficient.
+  else
+    // Check a few special cases.
+    switch (Cond) {
+    case ICmpInst::ICMP_UGT:
+      if (Pred == ICmpInst::ICMP_ULT) {
+        std::swap(PreCondLHS, PreCondRHS);
+        Cond = ICmpInst::ICMP_ULT;
+        break;
+      }
+      return false;
+    case ICmpInst::ICMP_SGT:
+      if (Pred == ICmpInst::ICMP_SLT) {
+        std::swap(PreCondLHS, PreCondRHS);
+        Cond = ICmpInst::ICMP_SLT;
+        break;
+      }
+      return false;
+    case ICmpInst::ICMP_NE:
+      // Expressions like (x >u 0) are often canonicalized to (x != 0),
+      // so check for this case by checking if the NE is comparing against
+      // a minimum or maximum constant.
+      if (!ICmpInst::isTrueWhenEqual(Pred))
+        if (ConstantInt *CI = dyn_cast<ConstantInt>(PreCondRHS)) {
+          const APInt &A = CI->getValue();
+          switch (Pred) {
+          case ICmpInst::ICMP_SLT:
+            if (A.isMaxSignedValue()) break;
+            return false;
+          case ICmpInst::ICMP_SGT:
+            if (A.isMinSignedValue()) break;
+            return false;
+          case ICmpInst::ICMP_ULT:
+            if (A.isMaxValue()) break;
+            return false;
+          case ICmpInst::ICMP_UGT:
+            if (A.isMinValue()) break;
+            return false;
+          default:
+            return false;
+          }
+          Cond = ICmpInst::ICMP_NE;
+          // NE is symmetric but the original comparison may not be. Swap
+          // the operands if necessary so that they match below.
+          if (isa<SCEVConstant>(LHS))
+            std::swap(PreCondLHS, PreCondRHS);
           break;
         }
-        continue;
-      case ICmpInst::ICMP_NE:
-        // Expressions like (x >u 0) are often canonicalized to (x != 0),
-        // so check for this case by checking if the NE is comparing against
-        // a minimum or maximum constant.
-        if (!ICmpInst::isTrueWhenEqual(Pred))
-          if (ConstantInt *CI = dyn_cast<ConstantInt>(PreCondRHS)) {
-            const APInt &A = CI->getValue();
-            switch (Pred) {
-            case ICmpInst::ICMP_SLT:
-              if (A.isMaxSignedValue()) break;
-              continue;
-            case ICmpInst::ICMP_SGT:
-              if (A.isMinSignedValue()) break;
-              continue;
-            case ICmpInst::ICMP_ULT:
-              if (A.isMaxValue()) break;
-              continue;
-            case ICmpInst::ICMP_UGT:
-              if (A.isMinValue()) break;
-              continue;
-            default:
-              continue;
-            }
-            Cond = ICmpInst::ICMP_NE;
-            // NE is symmetric but the original comparison may not be. Swap
-            // the operands if necessary so that they match below.
-            if (isa<SCEVConstant>(LHS))
-              std::swap(PreCondLHS, PreCondRHS);
-            break;
-          }
-        continue;
-      default:
-        // We weren't able to reconcile the condition.
-        continue;
-      }
-
-    if (!PreCondLHS->getType()->isInteger()) continue;
+      return false;
+    default:
+      // We weren't able to reconcile the condition.
+      return false;
+    }
 
-    const SCEV* PreCondLHSSCEV = getSCEV(PreCondLHS);
-    const SCEV* PreCondRHSSCEV = getSCEV(PreCondRHS);
-    if ((HasSameValue(LHS, PreCondLHSSCEV) &&
-         HasSameValue(RHS, PreCondRHSSCEV)) ||
-        (HasSameValue(LHS, getNotSCEV(PreCondRHSSCEV)) &&
-         HasSameValue(RHS, getNotSCEV(PreCondLHSSCEV))))
-      return true;
-  }
+  if (!PreCondLHS->getType()->isInteger()) return false;
 
-  return false;
+  const SCEV *PreCondLHSSCEV = getSCEV(PreCondLHS);
+  const SCEV *PreCondRHSSCEV = getSCEV(PreCondRHS);
+  return (HasSameValue(LHS, PreCondLHSSCEV) &&
+          HasSameValue(RHS, PreCondRHSSCEV)) ||
+         (HasSameValue(LHS, getNotSCEV(PreCondRHSSCEV)) &&
+          HasSameValue(RHS, getNotSCEV(PreCondLHSSCEV)));
 }
 
 /// getBECount - Subtract the end and start values and divide by the step,
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index c5591d7..6d7abc0 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -51,21 +51,26 @@ Value *SCEVExpander::InsertCastOfTo(Instruction::CastOps opcode, Value *V,
   if (Argument *A = dyn_cast<Argument>(V)) {
     // Check to see if there is already a cast!
     for (Value::use_iterator UI = A->use_begin(), E = A->use_end();
-         UI != E; ++UI) {
+         UI != E; ++UI)
       if ((*UI)->getType() == Ty)
         if (CastInst *CI = dyn_cast<CastInst>(cast<Instruction>(*UI)))
           if (CI->getOpcode() == opcode) {
             // If the cast isn't the first instruction of the function, move it.
-            if (BasicBlock::iterator(CI) != 
+            if (BasicBlock::iterator(CI) !=
                 A->getParent()->getEntryBlock().begin()) {
-              // If the CastInst is the insert point, change the insert point.
-              if (CI == InsertPt) ++InsertPt;
-              // Splice the cast at the beginning of the entry block.
-              CI->moveBefore(A->getParent()->getEntryBlock().begin());
+              // Recreate the cast at the beginning of the entry block.
+              // The old cast is left in place in case it is being used
+              // as an insert point.
+              Instruction *NewCI =
+                CastInst::Create(opcode, V, Ty, "",
+                                 A->getParent()->getEntryBlock().begin());
+              NewCI->takeName(CI);
+              CI->replaceAllUsesWith(NewCI);
+              return NewCI;
             }
             return CI;
           }
-    }
+
     Instruction *I = CastInst::Create(opcode, V, Ty, V->getName(),
                                       A->getParent()->getEntryBlock().begin());
     InsertedValues.insert(I);
@@ -85,10 +90,13 @@ Value *SCEVExpander::InsertCastOfTo(Instruction::CastOps opcode, Value *V,
             It = cast<InvokeInst>(I)->getNormalDest()->begin();
           while (isa<PHINode>(It)) ++It;
           if (It != BasicBlock::iterator(CI)) {
-            // If the CastInst is the insert point, change the insert point.
-            if (CI == InsertPt) ++InsertPt;
-            // Splice the cast immediately after the operand in question.
-            CI->moveBefore(It);
+            // Recreate the cast at the beginning of the entry block.
+            // The old cast is left in place in case it is being used
+            // as an insert point.
+            Instruction *NewCI = CastInst::Create(opcode, V, Ty, "", It);
+            NewCI->takeName(CI);
+            CI->replaceAllUsesWith(NewCI);
+            return NewCI;
           }
           return CI;
         }
@@ -497,8 +505,9 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
       }
     }
 
-    Value *RestV = expand(Rest);
-    return expand(SE.getAddExpr(S->getStart(), SE.getUnknown(RestV)));
+    // Just do a normal add. Pre-expand the operands to suppress folding.
+    return expand(SE.getAddExpr(SE.getUnknown(expand(S->getStart())),
+                                SE.getUnknown(expand(Rest))));
   }
 
   // {0,+,1} --> Insert a canonical induction variable into the loop!
@@ -546,36 +555,13 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
              getOrInsertCanonicalInductionVariable(L, Ty);
 
   // If this is a simple linear addrec, emit it now as a special case.
-  if (S->isAffine()) {   // {0,+,F} --> i*F
-    Value *F = expandCodeFor(S->getOperand(1), Ty);
-
-    // If the insert point is directly inside of the loop, emit the multiply at
-    // the insert point.  Otherwise, L is a loop that is a parent of the insert
-    // point loop.  If we can, move the multiply to the outer most loop that it
-    // is safe to be in.
-    BasicBlock::iterator MulInsertPt = getInsertionPoint();
-    Loop *InsertPtLoop = SE.LI->getLoopFor(MulInsertPt->getParent());
-    if (InsertPtLoop != L && InsertPtLoop &&
-        L->contains(InsertPtLoop->getHeader())) {
-      do {
-        // If we cannot hoist the multiply out of this loop, don't.
-        if (!InsertPtLoop->isLoopInvariant(F)) break;
-
-        BasicBlock *InsertPtLoopPH = InsertPtLoop->getLoopPreheader();
-
-        // If this loop hasn't got a preheader, we aren't able to hoist the
-        // multiply.
-        if (!InsertPtLoopPH)
-          break;
-
-        // Otherwise, move the insert point to the preheader.
-        MulInsertPt = InsertPtLoopPH->getTerminator();
-        InsertPtLoop = InsertPtLoop->getParentLoop();
-      } while (InsertPtLoop != L);
-    }
-    
-    return InsertBinop(Instruction::Mul, I, F, MulInsertPt);
-  }
+  if (S->isAffine())    // {0,+,F} --> i*F
+    return
+      expand(SE.getTruncateOrNoop(
+        SE.getMulExpr(SE.getUnknown(I),
+                      SE.getNoopOrAnyExtend(S->getOperand(1),
+                                            I->getType())),
+        Ty));
 
   // If this is a chain of recurrences, turn it into a closed form, using the
   // folders, then expandCodeFor the closed form.  This allows the folders to
@@ -671,8 +657,31 @@ Value *SCEVExpander::expand(const SCEV *S) {
     InsertedExpressions.find(S);
   if (I != InsertedExpressions.end())
     return I->second;
-  
+
+  // Compute an insertion point for this SCEV object. Hoist the instructions
+  // as far out in the loop nest as possible.
+  BasicBlock::iterator InsertPt = getInsertionPoint();
+  BasicBlock::iterator SaveInsertPt = InsertPt;
+  for (Loop *L = SE.LI->getLoopFor(InsertPt->getParent()); ;
+       L = L->getParentLoop())
+    if (S->isLoopInvariant(L)) {
+      if (!L) break;
+      if (BasicBlock *Preheader = L->getLoopPreheader())
+        InsertPt = Preheader->getTerminator();
+    } else {
+      // If the SCEV is computable at this level, insert it into the header
+      // after the PHIs (and after any other instructions that we've inserted
+      // there) so that it is guaranteed to dominate any user inside the loop.
+      if (L && S->hasComputableLoopEvolution(L))
+        InsertPt = L->getHeader()->getFirstNonPHI();
+      while (isInsertedInstruction(InsertPt)) ++InsertPt;
+      break;
+    }
+  setInsertionPoint(InsertPt);
+
   Value *V = visit(S);
+
+  setInsertionPoint(SaveInsertPt);
   InsertedExpressions[S] = V;
   return V;
 }
@@ -686,6 +695,9 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
                                                     const Type *Ty) {
   assert(Ty->isInteger() && "Can only insert integer induction variables!");
   const SCEV* H = SE.getAddRecExpr(SE.getIntegerSCEV(0, Ty),
-                                  SE.getIntegerSCEV(1, Ty), L);
-  return expand(H);
+                                   SE.getIntegerSCEV(1, Ty), L);
+  BasicBlock::iterator SaveInsertPt = getInsertionPoint();
+  Value *V = expandCodeFor(H, 0, L->getHeader()->begin());
+  setInsertionPoint(SaveInsertPt);
+  return V;
 }
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 6c20e7d..658b788 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -104,7 +104,8 @@ namespace {
     void RewriteLoopExitValues(Loop *L, const SCEV *BackedgeTakenCount);
 
     void RewriteIVExpressions(Loop *L, const Type *LargestType,
-                              SCEVExpander &Rewriter);
+                              SCEVExpander &Rewriter,
+                              BasicBlock::iterator InsertPt);
 
     void SinkUnusedInvariants(Loop *L, SCEVExpander &Rewriter);
 
@@ -170,6 +171,8 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
   }
 
   // Expand the code for the iteration count into the preheader of the loop.
+  assert(RHS->isLoopInvariant(L) &&
+         "Computed iteration count is not loop invariant!");
   BasicBlock *Preheader = L->getLoopPreheader();
   Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(),
                                           Preheader->getTerminator());
@@ -434,10 +437,10 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
                                           ExitingBlock, BI, Rewriter);
   }
 
-  Rewriter.setInsertionPoint(Header->getFirstNonPHI());
+  BasicBlock::iterator InsertPt = Header->getFirstNonPHI();
 
   // Rewrite IV-derived expressions. Clears the rewriter cache.
-  RewriteIVExpressions(L, LargestType, Rewriter);
+  RewriteIVExpressions(L, LargestType, Rewriter, InsertPt);
 
   // The Rewriter may only be used for isInsertedInstruction queries from this
   // point on.
@@ -462,7 +465,8 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
 }
 
 void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,
-                                          SCEVExpander &Rewriter) {
+                                          SCEVExpander &Rewriter,
+                                          BasicBlock::iterator InsertPt) {
   SmallVector<WeakVH, 16> DeadInsts;
 
   // Rewrite all induction variable expressions in terms of the canonical
@@ -488,29 +492,17 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,
       // Compute the final addrec to expand into code.
       const SCEV* AR = IU->getReplacementExpr(*UI);
 
-      Value *NewVal = 0;
-      if (AR->isLoopInvariant(L)) {
-        BasicBlock::iterator I = Rewriter.getInsertionPoint();
-        // Expand loop-invariant values in the loop preheader. They will
-        // be sunk to the exit block later, if possible.
-        NewVal =
-          Rewriter.expandCodeFor(AR, UseTy,
-                                 L->getLoopPreheader()->getTerminator());
-        Rewriter.setInsertionPoint(I);
-        ++NumReplaced;
-      } else {
-        // FIXME: It is an extremely bad idea to indvar substitute anything more
-        // complex than affine induction variables.  Doing so will put expensive
-        // polynomial evaluations inside of the loop, and the str reduction pass
-        // currently can only reduce affine polynomials.  For now just disable
-        // indvar subst on anything more complex than an affine addrec, unless
-        // it can be expanded to a trivial value.
-        if (!Stride->isLoopInvariant(L))
-          continue;
-
-        // Now expand it into actual Instructions and patch it into place.
-        NewVal = Rewriter.expandCodeFor(AR, UseTy);
-      }
+      // FIXME: It is an extremely bad idea to indvar substitute anything more
+      // complex than affine induction variables.  Doing so will put expensive
+      // polynomial evaluations inside of the loop, and the str reduction pass
+      // currently can only reduce affine polynomials.  For now just disable
+      // indvar subst on anything more complex than an affine addrec, unless
+      // it can be expanded to a trivial value.
+      if (!AR->isLoopInvariant(L) && !Stride->isLoopInvariant(L))
+        continue;
+
+      // Now expand it into actual Instructions and patch it into place.
+      Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt);
 
       // Patch the new value into place.
       if (Op->hasName())
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 312b957..a877c4e 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -409,16 +409,8 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV* const &NewBase,
 
   const SCEV* NewValSCEV = SE->getUnknown(Base);
 
-  // If there is no immediate value, skip the next part.
-  if (!Imm->isZero()) {
-    // If we are inserting the base and imm values in the same block, make sure
-    // to adjust the IP position if insertion reused a result.
-    if (IP == BaseInsertPt)
-      IP = Rewriter.getInsertionPoint();
-
-    // Always emit the immediate (if non-zero) into the same block as the user.
-    NewValSCEV = SE->getAddExpr(NewValSCEV, Imm);
-  }
+  // Always emit the immediate into the same block as the user.
+  NewValSCEV = SE->getAddExpr(NewValSCEV, Imm);
 
   return Rewriter.expandCodeFor(NewValSCEV, Ty, IP);
 }
diff --git a/test/CodeGen/X86/pr3495.ll b/test/CodeGen/X86/pr3495.ll
index 62382c6..ca6204c 100644
--- a/test/CodeGen/X86/pr3495.ll
+++ b/test/CodeGen/X86/pr3495.ll
@@ -1,6 +1,6 @@
 ; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of reloads omited} | grep 2
 ; RUN: llvm-as < %s | llc -march=x86 -stats |& not grep {Number of available reloads turned into copies}
-; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of machine instrs printed} | grep 38
+; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of machine instrs printed} | grep 39
 ; PR3495
 ; The loop reversal kicks in once here, resulting in one fewer instruction.