aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/llvm/Analysis/IVUsers.h235
-rw-r--r--lib/Analysis/IVUsers.cpp391
-rw-r--r--lib/Target/README.txt10
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp935
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp569
-rw-r--r--test/CodeGen/X86/iv-users-in-other-loops.ll9
-rw-r--r--test/CodeGen/X86/masked-iv-safe.ll7
-rw-r--r--test/CodeGen/X86/subreg-to-reg-5.ll7
-rw-r--r--test/Transforms/IndVarSimplify/2009-04-15-shorten-iv-vars-2.ll3
-rw-r--r--test/Transforms/IndVarSimplify/ada-loops.ll90
-rw-r--r--test/Transforms/IndVarSimplify/iv-zext.ll33
-rw-r--r--test/Transforms/IndVarSimplify/loop_evaluate_6.ll29
-rw-r--r--test/Transforms/LoopStrengthReduce/2009-04-28-no-reduce-mul.ll2
13 files changed, 1376 insertions, 944 deletions
diff --git a/include/llvm/Analysis/IVUsers.h b/include/llvm/Analysis/IVUsers.h
new file mode 100644
index 0000000..36ff07b
--- /dev/null
+++ b/include/llvm/Analysis/IVUsers.h
@@ -0,0 +1,235 @@
+//===- llvm/Analysis/IVUsers.h - Induction Variable Users -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements bookkeeping for "interesting" users of expressions
+// computed from induction variables.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_IVUSERS_H
+#define LLVM_ANALYSIS_IVUSERS_H
+
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include <llvm/ADT/SmallVector.h>
+#include <map>
+
+namespace llvm {
+
+class DominatorTree;
+class Instruction;
+class Value;
+class IVUsersOfOneStride;
+
+/// IVStrideUse - Keep track of one use of a strided induction variable, where
+/// the stride is stored externally. The Offset member keeps track of the
+/// offset from the IV, User is the actual user of the operand, and
+/// 'OperandValToReplace' is the operand of the User that is the use.
+class IVStrideUse : public CallbackVH, public ilist_node<IVStrideUse> {
+public:
+ IVStrideUse(IVUsersOfOneStride *parent,
+ const SCEVHandle &offset,
+ Instruction* U, Value *O, bool issigned)
+ : CallbackVH(U), Parent(parent), Offset(offset),
+ OperandValToReplace(O), IsSigned(issigned),
+ IsUseOfPostIncrementedValue(false) {
+ }
+
+ /// getUser - Return the user instruction for this use.
+ Instruction *getUser() const {
+ return cast<Instruction>(getValPtr());
+ }
+
+ /// setUser - Assign a new user instruction for this use.
+ void setUser(Instruction *NewUser) {
+ setValPtr(NewUser);
+ }
+
+ /// getParent - Return a pointer to the IVUsersOfOneStride that owns
+ /// this IVStrideUse.
+ IVUsersOfOneStride *getParent() const { return Parent; }
+
+ /// getOffset - Return the offset to add to a theoeretical induction
+ /// variable that starts at zero and counts up by the stride to compute
+ /// the value for the use. This always has the same type as the stride,
+ /// which may need to be casted to match the type of the use.
+ SCEVHandle getOffset() const { return Offset; }
+
+ /// setOffset - Assign a new offset to this use.
+ void setOffset(SCEVHandle Val) {
+ Offset = Val;
+ }
+
+ /// getOperandValToReplace - Return the Value of the operand in the user
+ /// instruction that this IVStrideUse is representing.
+ Value *getOperandValToReplace() const {
+ return OperandValToReplace;
+ }
+
+ /// setOperandValToReplace - Assign a new Value as the operand value
+ /// to replace.
+ void setOperandValToReplace(Value *Op) {
+ OperandValToReplace = Op;
+ }
+
+ /// isSigned - The stride (and thus also the Offset) of this use may be in
+ /// a narrower type than the use itself (OperandValToReplace->getType()).
+ /// When this is the case, isSigned() indicates whether the IV expression
+ /// should be signed-extended instead of zero-extended to fit the type of
+ /// the use.
+ bool isSigned() const { return IsSigned; }
+
+ /// isUseOfPostIncrementedValue - True if this should use the
+ /// post-incremented version of this IV, not the preincremented version.
+ /// This can only be set in special cases, such as the terminating setcc
+ /// instruction for a loop or uses dominated by the loop.
+ bool isUseOfPostIncrementedValue() const {
+ return IsUseOfPostIncrementedValue;
+ }
+
+ /// setIsUseOfPostIncrmentedValue - set the flag that indicates whether
+ /// this is a post-increment use.
+ void setIsUseOfPostIncrementedValue(bool Val) {
+ IsUseOfPostIncrementedValue = Val;
+ }
+
+private:
+ /// Parent - a pointer to the IVUsersOfOneStride that owns this IVStrideUse.
+ IVUsersOfOneStride *Parent;
+
+ /// Offset - The offset to add to the base induction expression.
+ SCEVHandle Offset;
+
+ /// OperandValToReplace - The Value of the operand in the user instruction
+ /// that this IVStrideUse is representing.
+ WeakVH OperandValToReplace;
+
+ /// IsSigned - Determines whether the replacement value is sign or
+ /// zero extended to the type of the use.
+ bool IsSigned;
+
+ /// IsUseOfPostIncrementedValue - True if this should use the
+ /// post-incremented version of this IV, not the preincremented version.
+ bool IsUseOfPostIncrementedValue;
+
+ /// Deleted - Implementation of CallbackVH virtual function to
+ /// recieve notification when the User is deleted.
+ virtual void deleted();
+};
+
+template<> struct ilist_traits<IVStrideUse>
+ : public ilist_default_traits<IVStrideUse> {
+ // createSentinel is used to get hold of a node that marks the end of
+ // the list...
+ // The sentinel is relative to this instance, so we use a non-static
+ // method.
+ IVStrideUse *createSentinel() const {
+ // since i(p)lists always publicly derive from the corresponding
+ // traits, placing a data member in this class will augment i(p)list.
+ // But since the NodeTy is expected to publicly derive from
+ // ilist_node<NodeTy>, there is a legal viable downcast from it
+ // to NodeTy. We use this trick to superpose i(p)list with a "ghostly"
+ // NodeTy, which becomes the sentinel. Dereferencing the sentinel is
+ // forbidden (save the ilist_node<NodeTy>) so no one will ever notice
+ // the superposition.
+ return static_cast<IVStrideUse*>(&Sentinel);
+ }
+ static void destroySentinel(IVStrideUse*) {}
+
+ IVStrideUse *provideInitialHead() const { return createSentinel(); }
+ IVStrideUse *ensureHead(IVStrideUse*) const { return createSentinel(); }
+ static void noteHead(IVStrideUse*, IVStrideUse*) {}
+
+private:
+ mutable ilist_node<IVStrideUse> Sentinel;
+};
+
+/// IVUsersOfOneStride - This structure keeps track of all instructions that
+/// have an operand that is based on the trip count multiplied by some stride.
+struct IVUsersOfOneStride : public ilist_node<IVUsersOfOneStride> {
+private:
+ IVUsersOfOneStride(const IVUsersOfOneStride &I); // do not implement
+ void operator=(const IVUsersOfOneStride &I); // do not implement
+
+public:
+ IVUsersOfOneStride() : Stride(0) {}
+
+ explicit IVUsersOfOneStride(const SCEV *stride) : Stride(stride) {}
+
+ /// Stride - The stride for all the contained IVStrideUses. This is
+ /// a constant for affine strides.
+ const SCEV *Stride;
+
+ /// Users - Keep track of all of the users of this stride as well as the
+ /// initial value and the operand that uses the IV.
+ ilist<IVStrideUse> Users;
+
+ void addUser(const SCEVHandle &Offset,Instruction *User, Value *Operand,
+ bool isSigned) {
+ Users.push_back(new IVStrideUse(this, Offset, User, Operand, isSigned));
+ }
+};
+
+class IVUsers : public LoopPass {
+ friend class IVStrideUserVH;
+ Loop *L;
+ LoopInfo *LI;
+ DominatorTree *DT;
+ ScalarEvolution *SE;
+ SmallPtrSet<Instruction*,16> Processed;
+
+public:
+ /// IVUses - A list of all tracked IV uses of induction variable expressions
+ /// we are interested in.
+ ilist<IVUsersOfOneStride> IVUses;
+
+ /// IVUsesByStride - A mapping from the strides in StrideOrder to the
+ /// uses in IVUses.
+ std::map<SCEVHandle, IVUsersOfOneStride*> IVUsesByStride;
+
+ /// StrideOrder - An ordering of the keys in IVUsesByStride that is stable:
+ /// We use this to iterate over the IVUsesByStride collection without being
+ /// dependent on random ordering of pointers in the process.
+ SmallVector<SCEVHandle, 16> StrideOrder;
+
+private:
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+ virtual void releaseMemory();
+
+public:
+ static char ID; // Pass ID, replacement for typeid
+ IVUsers();
+
+ /// AddUsersIfInteresting - Inspect the specified Instruction. If it is a
+ /// reducible SCEV, recursively add its users to the IVUsesByStride set and
+ /// return true. Otherwise, return false.
+ bool AddUsersIfInteresting(Instruction *I);
+
+ /// getReplacementExpr - Return a SCEV expression which computes the
+ /// value of the OperandValToReplace of the given IVStrideUse.
+ SCEVHandle getReplacementExpr(const IVStrideUse &U) const;
+
+ void print(raw_ostream &OS, const Module* = 0) const;
+ virtual void print(std::ostream &OS, const Module* = 0) const;
+ void print(std::ostream *OS, const Module* M = 0) const {
+ if (OS) print(*OS, M);
+ }
+
+ /// dump - This method is used for debugging.
+ void dump() const;
+};
+
+Pass *createIVUsersPass();
+
+}
+
+#endif
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
new file mode 100644
index 0000000..9ec9cac
--- /dev/null
+++ b/lib/Analysis/IVUsers.cpp
@@ -0,0 +1,391 @@
+//===- IVUsers.cpp - Induction Variable Users -------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements bookkeeping for "interesting" users of expressions
+// computed from induction variables.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "iv-users"
+#include "llvm/Analysis/IVUsers.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Type.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+char IVUsers::ID = 0;
+static RegisterPass<IVUsers>
+X("iv-users", "Induction Variable Users", false, true);
+
+Pass *llvm::createIVUsersPass() {
+ return new IVUsers();
+}
+
+/// containsAddRecFromDifferentLoop - Determine whether expression S involves a
+/// subexpression that is an AddRec from a loop other than L. An outer loop
+/// of L is OK, but not an inner loop nor a disjoint loop.
+static bool containsAddRecFromDifferentLoop(SCEVHandle S, Loop *L) {
+ // This is very common, put it first.
+ if (isa<SCEVConstant>(S))
+ return false;
+ if (const SCEVCommutativeExpr *AE = dyn_cast<SCEVCommutativeExpr>(S)) {
+ for (unsigned int i=0; i< AE->getNumOperands(); i++)
+ if (containsAddRecFromDifferentLoop(AE->getOperand(i), L))
+ return true;
+ return false;
+ }
+ if (const SCEVAddRecExpr *AE = dyn_cast<SCEVAddRecExpr>(S)) {
+ if (const Loop *newLoop = AE->getLoop()) {
+ if (newLoop == L)
+ return false;
+ // if newLoop is an outer loop of L, this is OK.
+ if (!LoopInfoBase<BasicBlock>::isNotAlreadyContainedIn(L, newLoop))
+ return false;
+ }
+ return true;
+ }
+ if (const SCEVUDivExpr *DE = dyn_cast<SCEVUDivExpr>(S))
+ return containsAddRecFromDifferentLoop(DE->getLHS(), L) ||
+ containsAddRecFromDifferentLoop(DE->getRHS(), L);
+#if 0
+ // SCEVSDivExpr has been backed out temporarily, but will be back; we'll
+ // need this when it is.
+ if (const SCEVSDivExpr *DE = dyn_cast<SCEVSDivExpr>(S))
+ return containsAddRecFromDifferentLoop(DE->getLHS(), L) ||
+ containsAddRecFromDifferentLoop(DE->getRHS(), L);
+#endif
+ if (const SCEVCastExpr *CE = dyn_cast<SCEVCastExpr>(S))
+ return containsAddRecFromDifferentLoop(CE->getOperand(), L);
+ return false;
+}
+
+/// getSCEVStartAndStride - Compute the start and stride of this expression,
+/// returning false if the expression is not a start/stride pair, or true if it
+/// is. The stride must be a loop invariant expression, but the start may be
+/// a mix of loop invariant and loop variant expressions. The start cannot,
+/// however, contain an AddRec from a different loop, unless that loop is an
+/// outer loop of the current loop.
+static bool getSCEVStartAndStride(const SCEVHandle &SH, Loop *L, Loop *UseLoop,
+ SCEVHandle &Start, SCEVHandle &Stride,
+ bool &isSigned,
+ ScalarEvolution *SE, DominatorTree *DT) {
+ SCEVHandle TheAddRec = Start; // Initialize to zero.
+ bool isSExt = false;
+ bool isZExt = false;
+
+ // If the outer level is an AddExpr, the operands are all start values except
+ // for a nested AddRecExpr.
+ if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(SH)) {
+ for (unsigned i = 0, e = AE->getNumOperands(); i != e; ++i)
+ if (const SCEVAddRecExpr *AddRec =
+ dyn_cast<SCEVAddRecExpr>(AE->getOperand(i))) {
+ if (AddRec->getLoop() == L)
+ TheAddRec = SE->getAddExpr(AddRec, TheAddRec);
+ else
+ return false; // Nested IV of some sort?
+ } else {
+ Start = SE->getAddExpr(Start, AE->getOperand(i));
+ }
+
+ } else if (const SCEVZeroExtendExpr *Z = dyn_cast<SCEVZeroExtendExpr>(SH)) {
+ TheAddRec = Z->getOperand();
+ isZExt = true;
+ } else if (const SCEVSignExtendExpr *S = dyn_cast<SCEVSignExtendExpr>(SH)) {
+ TheAddRec = S->getOperand();
+ isSExt = true;
+ } else if (isa<SCEVAddRecExpr>(SH)) {
+ TheAddRec = SH;
+ } else {
+ return false; // not analyzable.
+ }
+
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(TheAddRec);
+ if (!AddRec || AddRec->getLoop() != L) return false;
+
+ // Use getSCEVAtScope to attempt to simplify other loops out of
+ // the picture.
+ SCEVHandle AddRecStart = AddRec->getStart();
+ SCEVHandle BetterAddRecStart = SE->getSCEVAtScope(AddRecStart, UseLoop);
+ if (!isa<SCEVCouldNotCompute>(BetterAddRecStart))
+ AddRecStart = BetterAddRecStart;
+
+ // FIXME: If Start contains an SCEVAddRecExpr from a different loop, other
+ // than an outer loop of the current loop, reject it. LSR has no concept of
+ // operating on more than one loop at a time so don't confuse it with such
+ // expressions.
+ if (containsAddRecFromDifferentLoop(AddRecStart, L))
+ return false;
+
+ if (isSExt || isZExt)
+ Start = SE->getTruncateExpr(Start, AddRec->getType());
+
+ Start = SE->getAddExpr(Start, AddRecStart);
+
+ if (!isa<SCEVConstant>(AddRec->getStepRecurrence(*SE))) {
+ // If stride is an instruction, make sure it dominates the loop preheader.
+ // Otherwise we could end up with a use before def situation.
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!AddRec->getStepRecurrence(*SE)->dominates(Preheader, DT))
+ return false;
+
+ DOUT << "[" << L->getHeader()->getName()
+ << "] Variable stride: " << *AddRec << "\n";
+ }
+
+ Stride = AddRec->getStepRecurrence(*SE);
+ isSigned = isSExt;
+ return true;
+}
+
+/// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression
+/// and now we need to decide whether the user should use the preinc or post-inc
+/// value. If this user should use the post-inc version of the IV, return true.
+///
+/// Choosing wrong here can break dominance properties (if we choose to use the
+/// post-inc value when we cannot) or it can end up adding extra live-ranges to
+/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we
+/// should use the post-inc value).
+static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV,
+ Loop *L, LoopInfo *LI, DominatorTree *DT,
+ Pass *P) {
+ // If the user is in the loop, use the preinc value.
+ if (L->contains(User->getParent())) return false;
+
+ BasicBlock *LatchBlock = L->getLoopLatch();
+
+ // Ok, the user is outside of the loop. If it is dominated by the latch
+ // block, use the post-inc value.
+ if (DT->dominates(LatchBlock, User->getParent()))
+ return true;
+
+ // There is one case we have to be careful of: PHI nodes. These little guys
+ // can live in blocks that are not dominated by the latch block, but (since
+ // their uses occur in the predecessor block, not the block the PHI lives in)
+ // should still use the post-inc value. Check for this case now.
+ PHINode *PN = dyn_cast<PHINode>(User);
+ if (!PN) return false; // not a phi, not dominated by latch block.
+
+ // Look at all of the uses of IV by the PHI node. If any use corresponds to
+ // a block that is not dominated by the latch block, give up and use the
+ // preincremented value.
+ unsigned NumUses = 0;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == IV) {
+ ++NumUses;
+ if (!DT->dominates(LatchBlock, PN->getIncomingBlock(i)))
+ return false;
+ }
+
+ // Okay, all uses of IV by PN are in predecessor blocks that really are
+ // dominated by the latch block. Use the post-incremented value.
+ return true;
+}
+
+/// AddUsersIfInteresting - Inspect the specified instruction. If it is a
+/// reducible SCEV, recursively add its users to the IVUsesByStride set and
+/// return true. Otherwise, return false.
+bool IVUsers::AddUsersIfInteresting(Instruction *I) {
+ if (!SE->isSCEVable(I->getType()))
+ return false; // Void and FP expressions cannot be reduced.
+
+ // LSR is not APInt clean, do not touch integers bigger than 64-bits.
+ if (SE->getTypeSizeInBits(I->getType()) > 64)
+ return false;
+
+ if (!Processed.insert(I))
+ return true; // Instruction already handled.
+
+ // Get the symbolic expression for this instruction.
+ SCEVHandle ISE = SE->getSCEV(I);
+ if (isa<SCEVCouldNotCompute>(ISE)) return false;
+
+ // Get the start and stride for this expression.
+ Loop *UseLoop = LI->getLoopFor(I->getParent());
+ SCEVHandle Start = SE->getIntegerSCEV(0, ISE->getType());
+ SCEVHandle Stride = Start;
+ bool isSigned;
+
+ if (!getSCEVStartAndStride(ISE, L, UseLoop, Start, Stride, isSigned, SE, DT))
+ return false; // Non-reducible symbolic expression, bail out.
+
+ SmallPtrSet<Instruction *, 4> UniqueUsers;
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+ UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ if (!UniqueUsers.insert(User))
+ continue;
+
+ // Do not infinitely recurse on PHI nodes.
+ if (isa<PHINode>(User) && Processed.count(User))
+ continue;
+
+ // Descend recursively, but not into PHI nodes outside the current loop.
+ // It's important to see the entire expression outside the loop to get
+ // choices that depend on addressing mode use right, although we won't
+ // consider references ouside the loop in all cases.
+ // If User is already in Processed, we don't want to recurse into it again,
+ // but do want to record a second reference in the same instruction.
+ bool AddUserToIVUsers = false;
+ if (LI->getLoopFor(User->getParent()) != L) {
+ if (isa<PHINode>(User) || Processed.count(User) ||
+ !AddUsersIfInteresting(User)) {
+ DOUT << "FOUND USER in other loop: " << *User
+ << " OF SCEV: " << *ISE << "\n";
+ AddUserToIVUsers = true;
+ }
+ } else if (Processed.count(User) ||
+ !AddUsersIfInteresting(User)) {
+ DOUT << "FOUND USER: " << *User
+ << " OF SCEV: " << *ISE << "\n";
+ AddUserToIVUsers = true;
+ }
+
+ if (AddUserToIVUsers) {
+ IVUsersOfOneStride *StrideUses = IVUsesByStride[Stride];
+ if (!StrideUses) { // First occurrence of this stride?
+ StrideOrder.push_back(Stride);
+ StrideUses = new IVUsersOfOneStride(Stride);
+ IVUses.push_back(StrideUses);
+ IVUsesByStride[Stride] = StrideUses;
+ }
+
+ // Okay, we found a user that we cannot reduce. Analyze the instruction
+ // and decide what to do with it. If we are a use inside of the loop, use
+ // the value before incrementation, otherwise use it after incrementation.
+ if (IVUseShouldUsePostIncValue(User, I, L, LI, DT, this)) {
+ // The value used will be incremented by the stride more than we are
+ // expecting, so subtract this off.
+ SCEVHandle NewStart = SE->getMinusSCEV(Start, Stride);
+ StrideUses->addUser(NewStart, User, I, isSigned);
+ StrideUses->Users.back().setIsUseOfPostIncrementedValue(true);
+ DOUT << " USING POSTINC SCEV, START=" << *NewStart<< "\n";
+ } else {
+ StrideUses->addUser(Start, User, I, isSigned);
+ }
+ }
+ }
+ return true;
+}
+
+IVUsers::IVUsers()
+ : LoopPass(&ID) {
+}
+
+void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LoopInfo>();
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<ScalarEvolution>();
+ AU.setPreservesAll();
+}
+
+bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
+
+ L = l;
+ LI = &getAnalysis<LoopInfo>();
+ DT = &getAnalysis<DominatorTree>();
+ SE = &getAnalysis<ScalarEvolution>();
+
+ // Find all uses of induction variables in this loop, and categorize
+ // them by stride. Start by finding all of the PHI nodes in the header for
+ // this loop. If they are induction variables, inspect their uses.
+ for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I)
+ AddUsersIfInteresting(I);
+
+ return false;
+}
+
+/// getReplacementExpr - Return a SCEV expression which computes the
+/// value of the OperandValToReplace of the given IVStrideUse.
+SCEVHandle IVUsers::getReplacementExpr(const IVStrideUse &U) const {
+ const Type *UseTy = U.getOperandValToReplace()->getType();
+ // Start with zero.
+ SCEVHandle RetVal = SE->getIntegerSCEV(0, U.getParent()->Stride->getType());
+ // Create the basic add recurrence.
+ RetVal = SE->getAddRecExpr(RetVal, U.getParent()->Stride, L);
+ // Add the offset in a separate step, because it may be loop-variant.
+ RetVal = SE->getAddExpr(RetVal, U.getOffset());
+ // For uses of post-incremented values, add an extra stride to compute
+ // the actual replacement value.
+ if (U.isUseOfPostIncrementedValue())
+ RetVal = SE->getAddExpr(RetVal, U.getParent()->Stride);
+ // Evaluate the expression out of the loop, if possible.
+ if (!L->contains(U.getUser()->getParent())) {
+ SCEVHandle ExitVal = SE->getSCEVAtScope(RetVal, L->getParentLoop());
+ if (!isa<SCEVCouldNotCompute>(ExitVal) && ExitVal->isLoopInvariant(L))
+ RetVal = ExitVal;
+ }
+ // Promote the result to the type of the use.
+ if (SE->getTypeSizeInBits(RetVal->getType()) !=
+ SE->getTypeSizeInBits(UseTy)) {
+ if (U.isSigned())
+ RetVal = SE->getSignExtendExpr(RetVal, UseTy);
+ else
+ RetVal = SE->getZeroExtendExpr(RetVal, UseTy);
+ }
+ return RetVal;
+}
+
+void IVUsers::print(raw_ostream &OS, const Module *M) const {
+ OS << "IV Users for loop ";
+ WriteAsOperand(OS, L->getHeader(), false);
+ if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
+ OS << " with backedge-taken count "
+ << *SE->getBackedgeTakenCount(L);
+ }
+ OS << ":\n";
+
+ for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e; ++Stride) {
+ std::map<SCEVHandle, IVUsersOfOneStride*>::const_iterator SI =
+ IVUsesByStride.find(StrideOrder[Stride]);
+ assert(SI != IVUsesByStride.end() && "Stride doesn't exist!");
+ OS << " Stride " << *SI->first->getType() << " " << *SI->first << ":\n";
+
+ for (ilist<IVStrideUse>::const_iterator UI = SI->second->Users.begin(),
+ E = SI->second->Users.end(); UI != E; ++UI) {
+ OS << " ";
+ WriteAsOperand(OS, UI->getOperandValToReplace(), false);
+ OS << " = ";
+ OS << *getReplacementExpr(*UI);
+ if (UI->isUseOfPostIncrementedValue())
+ OS << " (post-inc)";
+ OS << " in ";
+ UI->getUser()->print(OS);
+ }
+ }
+}
+
+void IVUsers::print(std::ostream &o, const Module *M) const {
+ raw_os_ostream OS(o);
+ print(OS, M);
+}
+
+void IVUsers::dump() const {
+ print(errs());
+}
+
+void IVUsers::releaseMemory() {
+ IVUsesByStride.clear();
+ StrideOrder.clear();
+ Processed.clear();
+}
+
+void IVStrideUse::deleted() {
+ // Remove this user from the list.
+ Parent->Users.erase(this);
+ // this now dangles!
+}
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index 538d137..f68cf0e 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -749,16 +749,6 @@ be done safely if "b" isn't modified between the strlen and memcpy of course.
//===---------------------------------------------------------------------===//
-We should be able to evaluate this loop:
-
-int test(int x_offs) {
- while (x_offs > 4)
- x_offs -= 4;
- return x_offs;
-}
-
-//===---------------------------------------------------------------------===//
-
Reassociate should turn things like:
int factorial(int X) {
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 3d9017d..80d34f6 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -43,6 +43,8 @@
#include "llvm/Constants.h"
#include "llvm/Instructions.h"
#include "llvm/Type.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/IVUsers.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
@@ -51,11 +53,12 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
using namespace llvm;
STATISTIC(NumRemoved , "Number of aux indvars removed");
@@ -65,6 +68,7 @@ STATISTIC(NumLFTR , "Number of loop exit tests replaced");
namespace {
class VISIBILITY_HIDDEN IndVarSimplify : public LoopPass {
+ IVUsers *IU;
LoopInfo *LI;
ScalarEvolution *SE;
bool Changed;
@@ -76,12 +80,15 @@ namespace {
virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DominatorTree>();
AU.addRequired<ScalarEvolution>();
AU.addRequiredID(LCSSAID);
AU.addRequiredID(LoopSimplifyID);
AU.addRequired<LoopInfo>();
+ AU.addRequired<IVUsers>();
AU.addPreserved<ScalarEvolution>();
AU.addPreservedID(LoopSimplifyID);
+ AU.addPreserved<IVUsers>();
AU.addPreservedID(LCSSAID);
AU.setPreservesCFG();
}
@@ -90,17 +97,21 @@ namespace {
void RewriteNonIntegerIVs(Loop *L);
- void LinearFunctionTestReplace(Loop *L, SCEVHandle BackedgeTakenCount,
+ ICmpInst *LinearFunctionTestReplace(Loop *L, SCEVHandle BackedgeTakenCount,
Value *IndVar,
BasicBlock *ExitingBlock,
BranchInst *BI,
SCEVExpander &Rewriter);
void RewriteLoopExitValues(Loop *L, const SCEV *BackedgeTakenCount);
- void DeleteTriviallyDeadInstructions(SmallPtrSet<Instruction*, 16> &Insts);
+ void RewriteIVExpressions(Loop *L, const Type *LargestType,
+ SCEVExpander &Rewriter);
- void HandleFloatingPointIV(Loop *L, PHINode *PH,
- SmallPtrSet<Instruction*, 16> &DeadInsts);
+ void SinkUnusedInvariants(Loop *L, SCEVExpander &Rewriter);
+
+ void FixUsesBeforeDefs(Loop *L, SCEVExpander &Rewriter);
+
+ void HandleFloatingPointIV(Loop *L, PHINode *PH);
};
}
@@ -112,31 +123,12 @@ Pass *llvm::createIndVarSimplifyPass() {
return new IndVarSimplify();
}
-/// DeleteTriviallyDeadInstructions - If any of the instructions is the
-/// specified set are trivially dead, delete them and see if this makes any of
-/// their operands subsequently dead.
-void IndVarSimplify::
-DeleteTriviallyDeadInstructions(SmallPtrSet<Instruction*, 16> &Insts) {
- while (!Insts.empty()) {
- Instruction *I = *Insts.begin();
- Insts.erase(I);
- if (isInstructionTriviallyDead(I)) {
- for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
- if (Instruction *U = dyn_cast<Instruction>(I->getOperand(i)))
- Insts.insert(U);
- DOUT << "INDVARS: Deleting: " << *I;
- I->eraseFromParent();
- Changed = true;
- }
- }
-}
-
/// LinearFunctionTestReplace - This method rewrites the exit condition of the
/// loop to be a canonical != comparison against the incremented loop induction
/// variable. This pass is able to rewrite the exit tests of any loop where the
/// SCEV analysis can determine a loop-invariant trip count of the loop, which
/// is actually a much broader range than just linear tests.
-void IndVarSimplify::LinearFunctionTestReplace(Loop *L,
+ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
SCEVHandle BackedgeTakenCount,
Value *IndVar,
BasicBlock *ExitingBlock,
@@ -196,10 +188,15 @@ void IndVarSimplify::LinearFunctionTestReplace(Loop *L,
<< (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
<< " RHS:\t" << *RHS << "\n";
- Value *Cond = new ICmpInst(Opcode, CmpIndVar, ExitCnt, "exitcond", BI);
- BI->setCondition(Cond);
+ ICmpInst *Cond = new ICmpInst(Opcode, CmpIndVar, ExitCnt, "exitcond", BI);
+
+ Instruction *OrigCond = cast<Instruction>(BI->getCondition());
+ OrigCond->replaceAllUsesWith(Cond);
+ RecursivelyDeleteTriviallyDeadInstructions(OrigCond);
+
++NumLFTR;
Changed = true;
+ return Cond;
}
/// RewriteLoopExitValues - Check to see if this loop has a computable
@@ -207,8 +204,16 @@ void IndVarSimplify::LinearFunctionTestReplace(Loop *L,
/// final value of any expressions that are recurrent in the loop, and
/// substitute the exit values from the loop into any instructions outside of
/// the loop that use the final values of the current expressions.
+///
+/// This is mostly redundant with the regular IndVarSimplify activities that
+/// happen later, except that it's more powerful in some cases, because it's
+/// able to brute-force evaluate arbitrary instructions as long as they have
+/// constant operands at the beginning of the loop.
void IndVarSimplify::RewriteLoopExitValues(Loop *L,
const SCEV *BackedgeTakenCount) {
+ // Verify the input to the pass in already in LCSSA form.
+ assert(L->isLCSSAForm());
+
BasicBlock *Preheader = L->getLoopPreheader();
// Scan all of the instructions in the loop, looking at those that have
@@ -226,9 +231,6 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L,
BlockToInsertInto = Preheader;
BasicBlock::iterator InsertPt = BlockToInsertInto->getFirstNonPHI();
- bool HasConstantItCount = isa<SCEVConstant>(BackedgeTakenCount);
-
- SmallPtrSet<Instruction*, 16> InstructionsToDelete;
std::map<Instruction*, Value*> ExitValues;
// Find all values that are computed inside the loop, but used outside of it.
@@ -268,18 +270,11 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L,
if (!L->contains(Inst->getParent()))
continue;
- // We require that this value either have a computable evolution or that
- // the loop have a constant iteration count. In the case where the loop
- // has a constant iteration count, we can sometimes force evaluation of
- // the exit value through brute force.
- SCEVHandle SH = SE->getSCEV(Inst);
- if (!SH->hasComputableLoopEvolution(L) && !HasConstantItCount)
- continue; // Cannot get exit evolution for the loop value.
-
// Okay, this instruction has a user outside of the current loop
// and varies predictably *inside* the loop. Evaluate the value it
// contains when the loop exits, if possible.
- SCEVHandle ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
+ SCEVHandle SH = SE->getSCEV(Inst);
+ SCEVHandle ExitValue = SE->getSCEVAtScope(SH, L->getParentLoop());
if (isa<SCEVCouldNotCompute>(ExitValue) ||
!ExitValue->isLoopInvariant(L))
continue;
@@ -298,9 +293,8 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L,
PN->setIncomingValue(i, ExitVal);
- // If this instruction is dead now, schedule it to be removed.
- if (Inst->use_empty())
- InstructionsToDelete.insert(Inst);
+ // If this instruction is dead now, delete it.
+ RecursivelyDeleteTriviallyDeadInstructions(Inst);
// See if this is a single-entry LCSSA PHI node. If so, we can (and
// have to) remove
@@ -308,14 +302,12 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L,
// in the loop, so we don't need an LCSSA phi node anymore.
if (NumPreds == 1) {
PN->replaceAllUsesWith(ExitVal);
- PN->eraseFromParent();
+ RecursivelyDeleteTriviallyDeadInstructions(PN);
break;
}
}
}
}
-
- DeleteTriviallyDeadInstructions(InstructionsToDelete);
}
void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
@@ -325,266 +317,24 @@ void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
//
BasicBlock *Header = L->getHeader();
- SmallPtrSet<Instruction*, 16> DeadInsts;
- for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
- PHINode *PN = cast<PHINode>(I);
- HandleFloatingPointIV(L, PN, DeadInsts);
- }
+ SmallVector<WeakVH, 8> PHIs;
+ for (BasicBlock::iterator I = Header->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ PHIs.push_back(PN);
+
+ for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
+ if (PHINode *PN = dyn_cast_or_null<PHINode>(PHIs[i]))
+ HandleFloatingPointIV(L, PN);
// If the loop previously had floating-point IV, ScalarEvolution
// may not have been able to compute a trip count. Now that we've done some
// re-writing, the trip count may be computable.
if (Changed)
SE->forgetLoopBackedgeTakenCount(L);
-
- if (!DeadInsts.empty())
- DeleteTriviallyDeadInstructions(DeadInsts);
-}
-
-/// getEffectiveIndvarType - Determine the widest type that the
-/// induction-variable PHINode Phi is cast to.
-///
-static const Type *getEffectiveIndvarType(const PHINode *Phi,
- const ScalarEvolution *SE) {
- const Type *Ty = Phi->getType();
-
- for (Value::use_const_iterator UI = Phi->use_begin(), UE = Phi->use_end();
- UI != UE; ++UI) {
- const Type *CandidateType = NULL;
- if (const ZExtInst *ZI = dyn_cast<ZExtInst>(UI))
- CandidateType = ZI->getDestTy();
- else if (const SExtInst *SI = dyn_cast<SExtInst>(UI))
- CandidateType = SI->getDestTy();
- else if (const IntToPtrInst *IP = dyn_cast<IntToPtrInst>(UI))
- CandidateType = IP->getDestTy();
- else if (const PtrToIntInst *PI = dyn_cast<PtrToIntInst>(UI))
- CandidateType = PI->getDestTy();
- if (CandidateType &&
- SE->isSCEVable(CandidateType) &&
- SE->getTypeSizeInBits(CandidateType) > SE->getTypeSizeInBits(Ty))
- Ty = CandidateType;
- }
-
- return Ty;
-}
-
-/// TestOrigIVForWrap - Analyze the original induction variable that
-/// controls the loop's iteration to determine whether it would ever
-/// undergo signed or unsigned overflow.
-///
-/// In addition to setting the NoSignedWrap and NoUnsignedWrap
-/// variables to true when appropriate (they are not set to false here),
-/// return the PHI for this induction variable. Also record the initial
-/// and final values and the increment; these are not meaningful unless
-/// either NoSignedWrap or NoUnsignedWrap is true, and are always meaningful
-/// in that case, although the final value may be 0 indicating a nonconstant.
-///
-/// TODO: This duplicates a fair amount of ScalarEvolution logic.
-/// Perhaps this can be merged with
-/// ScalarEvolution::getBackedgeTakenCount
-/// and/or ScalarEvolution::get{Sign,Zero}ExtendExpr.
-///
-static const PHINode *TestOrigIVForWrap(const Loop *L,
- const BranchInst *BI,
- const Instruction *OrigCond,
- const ScalarEvolution &SE,
- bool &NoSignedWrap,
- bool &NoUnsignedWrap,
- const ConstantInt* &InitialVal,
- const ConstantInt* &IncrVal,
- const ConstantInt* &LimitVal) {
- // Verify that the loop is sane and find the exit condition.
- const ICmpInst *Cmp = dyn_cast<ICmpInst>(OrigCond);
- if (!Cmp) return 0;
-
- const Value *CmpLHS = Cmp->getOperand(0);
- const Value *CmpRHS = Cmp->getOperand(1);
- const BasicBlock *TrueBB = BI->getSuccessor(0);
- const BasicBlock *FalseBB = BI->getSuccessor(1);
- ICmpInst::Predicate Pred = Cmp->getPredicate();
-
- // Canonicalize a constant to the RHS.
- if (isa<ConstantInt>(CmpLHS)) {
- Pred = ICmpInst::getSwappedPredicate(Pred);
- std::swap(CmpLHS, CmpRHS);
- }
- // Canonicalize SLE to SLT.
- if (Pred == ICmpInst::ICMP_SLE)
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(CmpRHS))
- if (!CI->getValue().isMaxSignedValue()) {
- CmpRHS = ConstantInt::get(CI->getValue() + 1);
- Pred = ICmpInst::ICMP_SLT;
- }
- // Canonicalize SGT to SGE.
- if (Pred == ICmpInst::ICMP_SGT)
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(CmpRHS))
- if (!CI->getValue().isMaxSignedValue()) {
- CmpRHS = ConstantInt::get(CI->getValue() + 1);
- Pred = ICmpInst::ICMP_SGE;
- }
- // Canonicalize SGE to SLT.
- if (Pred == ICmpInst::ICMP_SGE) {
- std::swap(TrueBB, FalseBB);
- Pred = ICmpInst::ICMP_SLT;
- }
- // Canonicalize ULE to ULT.
- if (Pred == ICmpInst::ICMP_ULE)
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(CmpRHS))
- if (!CI->getValue().isMaxValue()) {
- CmpRHS = ConstantInt::get(CI->getValue() + 1);
- Pred = ICmpInst::ICMP_ULT;
- }
- // Canonicalize UGT to UGE.
- if (Pred == ICmpInst::ICMP_UGT)
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(CmpRHS))
- if (!CI->getValue().isMaxValue()) {
- CmpRHS = ConstantInt::get(CI->getValue() + 1);
- Pred = ICmpInst::ICMP_UGE;
- }
- // Canonicalize UGE to ULT.
- if (Pred == ICmpInst::ICMP_UGE) {
- std::swap(TrueBB, FalseBB);
- Pred = ICmpInst::ICMP_ULT;
- }
- // For now, analyze only LT loops for signed overflow.
- if (Pred != ICmpInst::ICMP_SLT && Pred != ICmpInst::ICMP_ULT)
- return 0;
-
- bool isSigned = Pred == ICmpInst::ICMP_SLT;
-
- // Get the increment instruction. Look past casts if we will
- // be able to prove that the original induction variable doesn't
- // undergo signed or unsigned overflow, respectively.
- const Value *IncrInst = CmpLHS;
- if (isSigned) {
- if (const SExtInst *SI = dyn_cast<SExtInst>(CmpLHS)) {
- if (!isa<ConstantInt>(CmpRHS) ||
- !cast<ConstantInt>(CmpRHS)->getValue()
- .isSignedIntN(SE.getTypeSizeInBits(IncrInst->getType())))
- return 0;
- IncrInst = SI->getOperand(0);
- }
- } else {
- if (const ZExtInst *ZI = dyn_cast<ZExtInst>(CmpLHS)) {
- if (!isa<ConstantInt>(CmpRHS) ||
- !cast<ConstantInt>(CmpRHS)->getValue()
- .isIntN(SE.getTypeSizeInBits(IncrInst->getType())))
- return 0;
- IncrInst = ZI->getOperand(0);
- }
- }
-
- // For now, only analyze induction variables that have simple increments.
- const BinaryOperator *IncrOp = dyn_cast<BinaryOperator>(IncrInst);
- if (!IncrOp || IncrOp->getOpcode() != Instruction::Add)
- return 0;
- IncrVal = dyn_cast<ConstantInt>(IncrOp->getOperand(1));
- if (!IncrVal)
- return 0;
-
- // Make sure the PHI looks like a normal IV.
- const PHINode *PN = dyn_cast<PHINode>(IncrOp->getOperand(0));
- if (!PN || PN->getNumIncomingValues() != 2)
- return 0;
- unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
- unsigned BackEdge = !IncomingEdge;
- if (!L->contains(PN->getIncomingBlock(BackEdge)) ||
- PN->getIncomingValue(BackEdge) != IncrOp)
- return 0;
- if (!L->contains(TrueBB))
- return 0;
-
- // For now, only analyze loops with a constant start value, so that
- // we can easily determine if the start value is not a maximum value
- // which would wrap on the first iteration.
- InitialVal = dyn_cast<ConstantInt>(PN->getIncomingValue(IncomingEdge));
- if (!InitialVal)
- return 0;
-
- // The upper limit need not be a constant; we'll check later.
- LimitVal = dyn_cast<ConstantInt>(CmpRHS);
-
- // We detect the impossibility of wrapping in two cases, both of
- // which require starting with a non-max value:
- // - The IV counts up by one, and the loop iterates only while it remains
- // less than a limiting value (any) in the same type.
- // - The IV counts up by a positive increment other than 1, and the
- // constant limiting value + the increment is less than the max value
- // (computed as max-increment to avoid overflow)
- if (isSigned && !InitialVal->getValue().isMaxSignedValue()) {
- if (IncrVal->equalsInt(1))
- NoSignedWrap = true; // LimitVal need not be constant
- else if (LimitVal) {
- uint64_t numBits = LimitVal->getValue().getBitWidth();
- if (IncrVal->getValue().sgt(APInt::getNullValue(numBits)) &&
- (APInt::getSignedMaxValue(numBits) - IncrVal->getValue())
- .sgt(LimitVal->getValue()))
- NoSignedWrap = true;
- }
- } else if (!isSigned && !InitialVal->getValue().isMaxValue()) {
- if (IncrVal->equalsInt(1))
- NoUnsignedWrap = true; // LimitVal need not be constant
- else if (LimitVal) {
- uint64_t numBits = LimitVal->getValue().getBitWidth();
- if (IncrVal->getValue().ugt(APInt::getNullValue(numBits)) &&
- (APInt::getMaxValue(numBits) - IncrVal->getValue())
- .ugt(LimitVal->getValue()))
- NoUnsignedWrap = true;
- }
- }
- return PN;
-}
-
-static Value *getSignExtendedTruncVar(const SCEVAddRecExpr *AR,
- ScalarEvolution *SE,
- const Type *LargestType, Loop *L,
- const Type *myType,
- SCEVExpander &Rewriter) {
- SCEVHandle ExtendedStart =
- SE->getSignExtendExpr(AR->getStart(), LargestType);
- SCEVHandle ExtendedStep =
- SE->getSignExtendExpr(AR->getStepRecurrence(*SE), LargestType);
- SCEVHandle ExtendedAddRec =
- SE->getAddRecExpr(ExtendedStart, ExtendedStep, L);
- if (LargestType != myType)
- ExtendedAddRec = SE->getTruncateExpr(ExtendedAddRec, myType);
- return Rewriter.expandCodeFor(ExtendedAddRec, myType);
-}
-
-static Value *getZeroExtendedTruncVar(const SCEVAddRecExpr *AR,
- ScalarEvolution *SE,
- const Type *LargestType, Loop *L,
- const Type *myType,
- SCEVExpander &Rewriter) {
- SCEVHandle ExtendedStart =
- SE->getZeroExtendExpr(AR->getStart(), LargestType);
- SCEVHandle ExtendedStep =
- SE->getZeroExtendExpr(AR->getStepRecurrence(*SE), LargestType);
- SCEVHandle ExtendedAddRec =
- SE->getAddRecExpr(ExtendedStart, ExtendedStep, L);
- if (LargestType != myType)
- ExtendedAddRec = SE->getTruncateExpr(ExtendedAddRec, myType);
- return Rewriter.expandCodeFor(ExtendedAddRec, myType);
-}
-
-/// allUsesAreSameTyped - See whether all Uses of I are instructions
-/// with the same Opcode and the same type.
-static bool allUsesAreSameTyped(unsigned int Opcode, Instruction *I) {
- const Type* firstType = NULL;
- for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
- UI != UE; ++UI) {
- Instruction *II = dyn_cast<Instruction>(*UI);
- if (!II || II->getOpcode() != Opcode)
- return false;
- if (!firstType)
- firstType = II->getType();
- else if (firstType != II->getType())
- return false;
- }
- return true;
}
bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
+ IU = &getAnalysis<IVUsers>();
LI = &getAnalysis<LoopInfo>();
SE = &getAnalysis<ScalarEvolution>();
Changed = false;
@@ -594,11 +344,8 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
RewriteNonIntegerIVs(L);
BasicBlock *Header = L->getHeader();
- BasicBlock *ExitingBlock = L->getExitingBlock();
- SmallPtrSet<Instruction*, 16> DeadInsts;
-
- // Verify the input to the pass in already in LCSSA form.
- assert(L->isLCSSAForm());
+ BasicBlock *ExitingBlock = L->getExitingBlock(); // may be null
+ SCEVHandle BackedgeTakenCount = SE->getBackedgeTakenCount(L);
// Check to see if this loop has a computable loop-invariant execution count.
// If so, this means that we can compute the final value of any expressions
@@ -606,59 +353,45 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// loop into any instructions outside of the loop that use the final values of
// the current expressions.
//
- SCEVHandle BackedgeTakenCount = SE->getBackedgeTakenCount(L);
if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount))
RewriteLoopExitValues(L, BackedgeTakenCount);
- // Next, analyze all of the induction variables in the loop, canonicalizing
- // auxillary induction variables.
- std::vector<std::pair<PHINode*, SCEVHandle> > IndVars;
-
- for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
- PHINode *PN = cast<PHINode>(I);
- if (SE->isSCEVable(PN->getType())) {
- SCEVHandle SCEV = SE->getSCEV(PN);
- // FIXME: It is an extremely bad idea to indvar substitute anything more
- // complex than affine induction variables. Doing so will put expensive
- // polynomial evaluations inside of the loop, and the str reduction pass
- // currently can only reduce affine polynomials. For now just disable
- // indvar subst on anything more complex than an affine addrec.
- if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SCEV))
- if (AR->getLoop() == L && AR->isAffine())
- IndVars.push_back(std::make_pair(PN, SCEV));
- }
- }
-
- // Compute the type of the largest recurrence expression, and collect
- // the set of the types of the other recurrence expressions.
+ // Compute the type of the largest recurrence expression, and decide whether
+ // a canonical induction variable should be inserted.
const Type *LargestType = 0;
- SmallSetVector<const Type *, 4> SizesToInsert;
+ bool NeedCannIV = false;
if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
LargestType = BackedgeTakenCount->getType();
LargestType = SE->getEffectiveSCEVType(LargestType);
- SizesToInsert.insert(LargestType);
+ // If we have a known trip count and a single exit block, we'll be
+ // rewriting the loop exit test condition below, which requires a
+ // canonical induction variable.
+ if (ExitingBlock)
+ NeedCannIV = true;
}
- for (unsigned i = 0, e = IndVars.size(); i != e; ++i) {
- const PHINode *PN = IndVars[i].first;
- const Type *PNTy = PN->getType();
- PNTy = SE->getEffectiveSCEVType(PNTy);
- SizesToInsert.insert(PNTy);
- const Type *EffTy = getEffectiveIndvarType(PN, SE);
- EffTy = SE->getEffectiveSCEVType(EffTy);
- SizesToInsert.insert(EffTy);
+ for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
+ SCEVHandle Stride = IU->StrideOrder[i];
+ const Type *Ty = SE->getEffectiveSCEVType(Stride->getType());
if (!LargestType ||
- SE->getTypeSizeInBits(EffTy) >
+ SE->getTypeSizeInBits(Ty) >
SE->getTypeSizeInBits(LargestType))
- LargestType = EffTy;
+ LargestType = Ty;
+
+ std::map<SCEVHandle, IVUsersOfOneStride *>::iterator SI =
+ IU->IVUsesByStride.find(IU->StrideOrder[i]);
+ assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
+
+ if (!SI->second->Users.empty())
+ NeedCannIV = true;
}
// Create a rewriter object which we'll use to transform the code with.
SCEVExpander Rewriter(*SE, *LI);
- // Now that we know the largest of of the induction variables in this loop,
- // insert a canonical induction variable of the largest size.
+ // Now that we know the largest of of the induction variable expressions
+ // in this loop, insert a canonical induction variable of the largest size.
Value *IndVar = 0;
- if (!SizesToInsert.empty()) {
+ if (NeedCannIV) {
IndVar = Rewriter.getOrInsertCanonicalInductionVariable(L,LargestType);
++NumInserted;
Changed = true;
@@ -667,229 +400,291 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// If we have a trip count expression, rewrite the loop's exit condition
// using it. We can currently only handle loops with a single exit.
- bool NoSignedWrap = false;
- bool NoUnsignedWrap = false;
- const ConstantInt* InitialVal, * IncrVal, * LimitVal;
- const PHINode *OrigControllingPHI = 0;
- if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount) && ExitingBlock)
+ ICmpInst *NewICmp = 0;
+ if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount) && ExitingBlock) {
+ assert(NeedCannIV &&
+ "LinearFunctionTestReplace requires a canonical induction variable");
// Can't rewrite non-branch yet.
- if (BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator())) {
- if (Instruction *OrigCond = dyn_cast<Instruction>(BI->getCondition())) {
- // Determine if the OrigIV will ever undergo overflow.
- OrigControllingPHI =
- TestOrigIVForWrap(L, BI, OrigCond, *SE,
- NoSignedWrap, NoUnsignedWrap,
- InitialVal, IncrVal, LimitVal);
-
- // We'll be replacing the original condition, so it'll be dead.
- DeadInsts.insert(OrigCond);
- }
+ if (BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator()))
+ NewICmp = LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
+ ExitingBlock, BI, Rewriter);
+ }
- LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
- ExitingBlock, BI, Rewriter);
- }
+ Rewriter.setInsertionPoint(Header->getFirstNonPHI());
- // Now that we have a canonical induction variable, we can rewrite any
- // recurrences in terms of the induction variable. Start with the auxillary
- // induction variables, and recursively rewrite any of their uses.
- BasicBlock::iterator InsertPt = Header->getFirstNonPHI();
- Rewriter.setInsertionPoint(InsertPt);
-
- // If there were induction variables of other sizes, cast the primary
- // induction variable to the right size for them, avoiding the need for the
- // code evaluation methods to insert induction variables of different sizes.
- for (unsigned i = 0, e = SizesToInsert.size(); i != e; ++i) {
- const Type *Ty = SizesToInsert[i];
- if (Ty != LargestType) {
- Instruction *New = new TruncInst(IndVar, Ty, "indvar", InsertPt);
- Rewriter.addInsertedValue(New, SE->getSCEV(New));
- DOUT << "INDVARS: Made trunc IV for type " << *Ty << ": "
- << *New << "\n";
- }
- }
+ // Rewrite IV-derived expressions.
+ RewriteIVExpressions(L, LargestType, Rewriter);
- // Rewrite all induction variables in terms of the canonical induction
- // variable.
- while (!IndVars.empty()) {
- PHINode *PN = IndVars.back().first;
- const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(IndVars.back().second);
- Value *NewVal = Rewriter.expandCodeFor(AR, PN->getType());
- DOUT << "INDVARS: Rewrote IV '" << *AR << "' " << *PN
- << " into = " << *NewVal << "\n";
- NewVal->takeName(PN);
-
- /// If the new canonical induction variable is wider than the original,
- /// and the original has uses that are casts to wider types, see if the
- /// truncate and extend can be omitted.
- if (PN == OrigControllingPHI && PN->getType() != LargestType)
- for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end();
- UI != UE; ++UI) {
- Instruction *UInst = dyn_cast<Instruction>(*UI);
- if (UInst && isa<SExtInst>(UInst) && NoSignedWrap) {
- Value *TruncIndVar = getSignExtendedTruncVar(AR, SE, LargestType, L,
- UInst->getType(), Rewriter);
- UInst->replaceAllUsesWith(TruncIndVar);
- DeadInsts.insert(UInst);
- }
- // See if we can figure out sext(i+constant) doesn't wrap, so we can
- // use a larger add. This is common in subscripting.
- if (UInst && UInst->getOpcode()==Instruction::Add &&
- !UInst->use_empty() &&
- allUsesAreSameTyped(Instruction::SExt, UInst) &&
- isa<ConstantInt>(UInst->getOperand(1)) &&
- NoSignedWrap && LimitVal) {
- uint64_t oldBitSize = LimitVal->getValue().getBitWidth();
- uint64_t newBitSize = LargestType->getPrimitiveSizeInBits();
- ConstantInt* AddRHS = dyn_cast<ConstantInt>(UInst->getOperand(1));
- if (((APInt::getSignedMaxValue(oldBitSize) - IncrVal->getValue()) -
- AddRHS->getValue()).sgt(LimitVal->getValue())) {
- // We've determined this is (i+constant) and it won't overflow.
- if (isa<SExtInst>(UInst->use_begin())) {
- SExtInst* oldSext = dyn_cast<SExtInst>(UInst->use_begin());
- uint64_t truncSize = oldSext->getType()->getPrimitiveSizeInBits();
- Value *TruncIndVar = getSignExtendedTruncVar(AR, SE, LargestType,
- L, oldSext->getType(), Rewriter);
- APInt APnewAddRHS = APInt(AddRHS->getValue()).sext(newBitSize);
- if (newBitSize > truncSize)
- APnewAddRHS = APnewAddRHS.trunc(truncSize);
- ConstantInt* newAddRHS =ConstantInt::get(APnewAddRHS);
- Value *NewAdd =
- BinaryOperator::CreateAdd(TruncIndVar, newAddRHS,
- UInst->getName()+".nosex", UInst);
- for (Value::use_iterator UI2 = UInst->use_begin(),
- UE2 = UInst->use_end(); UI2 != UE2; ++UI2) {
- Instruction *II = dyn_cast<Instruction>(UI2);
- II->replaceAllUsesWith(NewAdd);
- DeadInsts.insert(II);
- }
- DeadInsts.insert(UInst);
- }
- }
- }
- // Try for sext(i | constant). This is safe as long as the
- // high bit of the constant is not set.
- if (UInst && UInst->getOpcode()==Instruction::Or &&
- !UInst->use_empty() &&
- allUsesAreSameTyped(Instruction::SExt, UInst) && NoSignedWrap &&
- isa<ConstantInt>(UInst->getOperand(1))) {
- ConstantInt* RHS = dyn_cast<ConstantInt>(UInst->getOperand(1));
- if (!RHS->getValue().isNegative()) {
- uint64_t newBitSize = LargestType->getPrimitiveSizeInBits();
- SExtInst* oldSext = dyn_cast<SExtInst>(UInst->use_begin());
- uint64_t truncSize = oldSext->getType()->getPrimitiveSizeInBits();
- Value *TruncIndVar = getSignExtendedTruncVar(AR, SE, LargestType,
- L, oldSext->getType(), Rewriter);
- APInt APnewOrRHS = APInt(RHS->getValue()).sext(newBitSize);
- if (newBitSize > truncSize)
- APnewOrRHS = APnewOrRHS.trunc(truncSize);
- ConstantInt* newOrRHS =ConstantInt::get(APnewOrRHS);
- Value *NewOr =
- BinaryOperator::CreateOr(TruncIndVar, newOrRHS,
- UInst->getName()+".nosex", UInst);
- for (Value::use_iterator UI2 = UInst->use_begin(),
- UE2 = UInst->use_end(); UI2 != UE2; ++UI2) {
- Instruction *II = dyn_cast<Instruction>(UI2);
- II->replaceAllUsesWith(NewOr);
- DeadInsts.insert(II);
- }
- DeadInsts.insert(UInst);
- }
- }
- // A zext of a signed variable known not to overflow is still safe.
- if (UInst && isa<ZExtInst>(UInst) && (NoUnsignedWrap || NoSignedWrap)) {
- Value *TruncIndVar = getZeroExtendedTruncVar(AR, SE, LargestType, L,
- UInst->getType(), Rewriter);
- UInst->replaceAllUsesWith(TruncIndVar);
- DeadInsts.insert(UInst);
- }
- // If we have zext(i&constant), it's always safe to use the larger
- // variable. This is not common but is a bottleneck in Openssl.
- // (RHS doesn't have to be constant. There should be a better approach
- // than bottom-up pattern matching for this...)
- if (UInst && UInst->getOpcode()==Instruction::And &&
- !UInst->use_empty() &&
- allUsesAreSameTyped(Instruction::ZExt, UInst) &&
- isa<ConstantInt>(UInst->getOperand(1))) {
- uint64_t newBitSize = LargestType->getPrimitiveSizeInBits();
- ConstantInt* AndRHS = dyn_cast<ConstantInt>(UInst->getOperand(1));
- ZExtInst* oldZext = dyn_cast<ZExtInst>(UInst->use_begin());
- uint64_t truncSize = oldZext->getType()->getPrimitiveSizeInBits();
- Value *TruncIndVar = getSignExtendedTruncVar(AR, SE, LargestType,
- L, oldZext->getType(), Rewriter);
- APInt APnewAndRHS = APInt(AndRHS->getValue()).zext(newBitSize);
- if (newBitSize > truncSize)
- APnewAndRHS = APnewAndRHS.trunc(truncSize);
- ConstantInt* newAndRHS = ConstantInt::get(APnewAndRHS);
- Value *NewAnd =
- BinaryOperator::CreateAnd(TruncIndVar, newAndRHS,
- UInst->getName()+".nozex", UInst);
- for (Value::use_iterator UI2 = UInst->use_begin(),
- UE2 = UInst->use_end(); UI2 != UE2; ++UI2) {
- Instruction *II = dyn_cast<Instruction>(UI2);
- II->replaceAllUsesWith(NewAnd);
- DeadInsts.insert(II);
+ // Loop-invariant instructions in the preheader that aren't used in the
+ // loop may be sunk below the loop to reduce register pressure.
+ SinkUnusedInvariants(L, Rewriter);
+
+ // Reorder instructions to avoid use-before-def conditions.
+ FixUsesBeforeDefs(L, Rewriter);
+
+ // For completeness, inform IVUsers of the IV use in the newly-created
+ // loop exit test instruction.
+ if (NewICmp)
+ IU->AddUsersIfInteresting(cast<Instruction>(NewICmp->getOperand(0)));
+
+ // Clean up dead instructions.
+ DeleteDeadPHIs(L->getHeader());
+ // Check a post-condition.
+ assert(L->isLCSSAForm() && "Indvars did not leave the loop in lcssa form!");
+ return Changed;
+}
+
+void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,
+ SCEVExpander &Rewriter) {
+ SmallVector<WeakVH, 16> DeadInsts;
+
+ // Rewrite all induction variable expressions in terms of the canonical
+ // induction variable.
+ //
+ // If there were induction variables of other sizes or offsets, manually
+ // add the offsets to the primary induction variable and cast, avoiding
+ // the need for the code evaluation methods to insert induction variables
+ // of different sizes.
+ for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
+ SCEVHandle Stride = IU->StrideOrder[i];
+
+ std::map<SCEVHandle, IVUsersOfOneStride *>::iterator SI =
+ IU->IVUsesByStride.find(IU->StrideOrder[i]);
+ assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
+ ilist<IVStrideUse> &List = SI->second->Users;
+ for (ilist<IVStrideUse>::iterator UI = List.begin(),
+ E = List.end(); UI != E; ++UI) {
+ SCEVHandle Offset = UI->getOffset();
+ Value *Op = UI->getOperandValToReplace();
+ Instruction *User = UI->getUser();
+ bool isSigned = UI->isSigned();
+
+ // Compute the final addrec to expand into code.
+ SCEVHandle AR = IU->getReplacementExpr(*UI);
+
+ // FIXME: It is an extremely bad idea to indvar substitute anything more
+ // complex than affine induction variables. Doing so will put expensive
+ // polynomial evaluations inside of the loop, and the str reduction pass
+ // currently can only reduce affine polynomials. For now just disable
+ // indvar subst on anything more complex than an affine addrec, unless
+ // it can be expanded to a trivial value.
+ if (!Stride->isLoopInvariant(L) &&
+ !isa<SCEVConstant>(AR) &&
+ L->contains(User->getParent()))
+ continue;
+
+ Value *NewVal = 0;
+ if (AR->isLoopInvariant(L)) {
+ BasicBlock::iterator I = Rewriter.getInsertionPoint();
+ // Expand loop-invariant values in the loop preheader. They will
+ // be sunk to the exit block later, if possible.
+ NewVal =
+ Rewriter.expandCodeFor(AR, LargestType,
+ L->getLoopPreheader()->getTerminator());
+ Rewriter.setInsertionPoint(I);
+ ++NumReplaced;
+ } else {
+ const Type *IVTy = Offset->getType();
+ const Type *UseTy = Op->getType();
+
+ // Promote the Offset and Stride up to the canonical induction
+ // variable's bit width.
+ SCEVHandle PromotedOffset = Offset;
+ SCEVHandle PromotedStride = Stride;
+ if (SE->getTypeSizeInBits(IVTy) != SE->getTypeSizeInBits(LargestType)) {
+ // It doesn't matter for correctness whether zero or sign extension
+ // is used here, since the value is truncated away below, but if the
+ // value is signed, sign extension is more likely to be folded.
+ if (isSigned) {
+ PromotedOffset = SE->getSignExtendExpr(PromotedOffset, LargestType);
+ PromotedStride = SE->getSignExtendExpr(PromotedStride, LargestType);
+ } else {
+ PromotedOffset = SE->getZeroExtendExpr(PromotedOffset, LargestType);
+ // If the stride is obviously negative, use sign extension to
+ // produce things like x-1 instead of x+255.
+ if (isa<SCEVConstant>(PromotedStride) &&
+ cast<SCEVConstant>(PromotedStride)
+ ->getValue()->getValue().isNegative())
+ PromotedStride = SE->getSignExtendExpr(PromotedStride,
+ LargestType);
+ else
+ PromotedStride = SE->getZeroExtendExpr(PromotedStride,
+ LargestType);
}
- DeadInsts.insert(UInst);
}
- // If we have zext((i+constant)&constant), we can use the larger
- // variable even if the add does overflow. This works whenever the
- // constant being ANDed is the same size as i, which it presumably is.
- // We don't need to restrict the expression being and'ed to i+const,
- // but we have to promote everything in it, so it's convenient.
- // zext((i | constant)&constant) is also valid and accepted here.
- if (UInst && (UInst->getOpcode()==Instruction::Add ||
- UInst->getOpcode()==Instruction::Or) &&
- UInst->hasOneUse() &&
- isa<ConstantInt>(UInst->getOperand(1))) {
- uint64_t newBitSize = LargestType->getPrimitiveSizeInBits();
- ConstantInt* AddRHS = dyn_cast<ConstantInt>(UInst->getOperand(1));
- Instruction *UInst2 = dyn_cast<Instruction>(UInst->use_begin());
- if (UInst2 && UInst2->getOpcode() == Instruction::And &&
- !UInst2->use_empty() &&
- allUsesAreSameTyped(Instruction::ZExt, UInst2) &&
- isa<ConstantInt>(UInst2->getOperand(1))) {
- ZExtInst* oldZext = dyn_cast<ZExtInst>(UInst2->use_begin());
- uint64_t truncSize = oldZext->getType()->getPrimitiveSizeInBits();
- Value *TruncIndVar = getSignExtendedTruncVar(AR, SE, LargestType,
- L, oldZext->getType(), Rewriter);
- ConstantInt* AndRHS = dyn_cast<ConstantInt>(UInst2->getOperand(1));
- APInt APnewAddRHS = APInt(AddRHS->getValue()).zext(newBitSize);
- if (newBitSize > truncSize)
- APnewAddRHS = APnewAddRHS.trunc(truncSize);
- ConstantInt* newAddRHS = ConstantInt::get(APnewAddRHS);
- Value *NewAdd = ((UInst->getOpcode()==Instruction::Add) ?
- BinaryOperator::CreateAdd(TruncIndVar, newAddRHS,
- UInst->getName()+".nozex", UInst2) :
- BinaryOperator::CreateOr(TruncIndVar, newAddRHS,
- UInst->getName()+".nozex", UInst2));
- APInt APcopy2 = APInt(AndRHS->getValue());
- ConstantInt* newAndRHS = ConstantInt::get(APcopy2.zext(newBitSize));
- Value *NewAnd =
- BinaryOperator::CreateAnd(NewAdd, newAndRHS,
- UInst->getName()+".nozex", UInst2);
- for (Value::use_iterator UI2 = UInst2->use_begin(),
- UE2 = UInst2->use_end(); UI2 != UE2; ++UI2) {
- Instruction *II = dyn_cast<Instruction>(UI2);
- II->replaceAllUsesWith(NewAnd);
- DeadInsts.insert(II);
- }
- DeadInsts.insert(UInst);
- DeadInsts.insert(UInst2);
+
+ // Create the SCEV representing the offset from the canonical
+ // induction variable, still in the canonical induction variable's
+ // type, so that all expanded arithmetic is done in the same type.
+ SCEVHandle NewAR = SE->getAddRecExpr(SE->getIntegerSCEV(0, LargestType),
+ PromotedStride, L);
+ // Add the PromotedOffset as a separate step, because it may not be
+ // loop-invariant.
+ NewAR = SE->getAddExpr(NewAR, PromotedOffset);
+
+ // Expand the addrec into instructions.
+ Value *V = Rewriter.expandCodeFor(NewAR, LargestType);
+
+ // Insert an explicit cast if necessary to truncate the value
+ // down to the original stride type. This is done outside of
+ // SCEVExpander because in SCEV expressions, a truncate of an
+ // addrec is always folded.
+ if (LargestType != IVTy) {
+ if (SE->getTypeSizeInBits(IVTy) != SE->getTypeSizeInBits(LargestType))
+ NewAR = SE->getTruncateExpr(NewAR, IVTy);
+ if (Rewriter.isInsertedExpression(NewAR))
+ V = Rewriter.expandCodeFor(NewAR, IVTy);
+ else {
+ V = Rewriter.InsertCastOfTo(CastInst::getCastOpcode(V, false,
+ IVTy, false),
+ V, IVTy);
+ assert(!isa<SExtInst>(V) && !isa<ZExtInst>(V) &&
+ "LargestType wasn't actually the largest type!");
+ // Force the rewriter to use this trunc whenever this addrec
+ // appears so that it doesn't insert new phi nodes or
+ // arithmetic in a different type.
+ Rewriter.addInsertedValue(V, NewAR);
}
}
+
+ DOUT << "INDVARS: Made offset-and-trunc IV for offset "
+ << *IVTy << " " << *Offset << ": ";
+ DEBUG(WriteAsOperand(*DOUT, V, false));
+ DOUT << "\n";
+
+ // Now expand it into actual Instructions and patch it into place.
+ NewVal = Rewriter.expandCodeFor(AR, UseTy);
}
- // Replace the old PHI Node with the inserted computation.
- PN->replaceAllUsesWith(NewVal);
- DeadInsts.insert(PN);
- IndVars.pop_back();
- ++NumRemoved;
- Changed = true;
+ // Patch the new value into place.
+ if (Op->hasName())
+ NewVal->takeName(Op);
+ User->replaceUsesOfWith(Op, NewVal);
+ UI->setOperandValToReplace(NewVal);
+ DOUT << "INDVARS: Rewrote IV '" << *AR << "' " << *Op
+ << " into = " << *NewVal << "\n";
+ ++NumRemoved;
+ Changed = true;
+
+ // The old value may be dead now.
+ DeadInsts.push_back(Op);
+ }
}
- DeleteTriviallyDeadInstructions(DeadInsts);
- assert(L->isLCSSAForm());
- return Changed;
+ // Now that we're done iterating through lists, clean up any instructions
+ // which are now dead.
+ while (!DeadInsts.empty()) {
+ Instruction *Inst = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val());
+ if (Inst)
+ RecursivelyDeleteTriviallyDeadInstructions(Inst);
+ }
+}
+
+/// If there's a single exit block, sink any loop-invariant values that
+/// were defined in the preheader but not used inside the loop into the
+/// exit block to reduce register pressure in the loop.
+void IndVarSimplify::SinkUnusedInvariants(Loop *L, SCEVExpander &Rewriter) {
+ BasicBlock *ExitBlock = L->getExitBlock();
+ if (!ExitBlock) return;
+
+ Instruction *NonPHI = ExitBlock->getFirstNonPHI();
+ BasicBlock *Preheader = L->getLoopPreheader();
+ BasicBlock::iterator I = Preheader->getTerminator();
+ while (I != Preheader->begin()) {
+ --I;
+ // New instructions were inserted at the end of the preheader. Only
+ // consider those new instructions.
+ if (!Rewriter.isInsertedInstruction(I))
+ break;
+ // Determine if there is a use in or before the loop (direct or
+ // otherwise).
+ bool UsedInLoop = false;
+ for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE; ++UI) {
+ BasicBlock *UseBB = cast<Instruction>(UI)->getParent();
+ if (PHINode *P = dyn_cast<PHINode>(UI)) {
+ unsigned i =
+ PHINode::getIncomingValueNumForOperand(UI.getOperandNo());
+ UseBB = P->getIncomingBlock(i);
+ }
+ if (UseBB == Preheader || L->contains(UseBB)) {
+ UsedInLoop = true;
+ break;
+ }
+ }
+ // If there is, the def must remain in the preheader.
+ if (UsedInLoop)
+ continue;
+ // Otherwise, sink it to the exit block.
+ Instruction *ToMove = I;
+ bool Done = false;
+ if (I != Preheader->begin())
+ --I;
+ else
+ Done = true;
+ ToMove->moveBefore(NonPHI);
+ if (Done)
+ break;
+ }
+}
+
+/// Re-schedule the inserted instructions to put defs before uses. This
+/// fixes problems that arrise when SCEV expressions contain loop-variant
+/// values unrelated to the induction variable which are defined inside the
+/// loop. FIXME: It would be better to insert instructions in the right
+/// place so that this step isn't needed.
+void IndVarSimplify::FixUsesBeforeDefs(Loop *L, SCEVExpander &Rewriter) {
+ // Visit all the blocks in the loop in pre-order dom-tree dfs order.
+ DominatorTree *DT = &getAnalysis<DominatorTree>();
+ std::map<Instruction *, unsigned> NumPredsLeft;
+ SmallVector<DomTreeNode *, 16> Worklist;
+ Worklist.push_back(DT->getNode(L->getHeader()));
+ do {
+ DomTreeNode *Node = Worklist.pop_back_val();
+ for (DomTreeNode::iterator I = Node->begin(), E = Node->end(); I != E; ++I)
+ if (L->contains((*I)->getBlock()))
+ Worklist.push_back(*I);
+ BasicBlock *BB = Node->getBlock();
+ // Visit all the instructions in the block top down.
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ // Count the number of operands that aren't properly dominating.
+ unsigned NumPreds = 0;
+ if (Rewriter.isInsertedInstruction(I) && !isa<PHINode>(I))
+ for (User::op_iterator OI = I->op_begin(), OE = I->op_end();
+ OI != OE; ++OI)
+ if (Instruction *Inst = dyn_cast<Instruction>(OI))
+ if (L->contains(Inst->getParent()) && !NumPredsLeft.count(Inst))
+ ++NumPreds;
+ NumPredsLeft[I] = NumPreds;
+ // Notify uses of the position of this instruction, and move the
+ // users (and their dependents, recursively) into place after this
+ // instruction if it is their last outstanding operand.
+ for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE; ++UI) {
+ Instruction *Inst = cast<Instruction>(UI);
+ std::map<Instruction *, unsigned>::iterator Z = NumPredsLeft.find(Inst);
+ if (Z != NumPredsLeft.end() && Z->second != 0 && --Z->second == 0) {
+ SmallVector<Instruction *, 4> UseWorkList;
+ UseWorkList.push_back(Inst);
+ BasicBlock::iterator InsertPt = next(I);
+ while (isa<PHINode>(InsertPt)) ++InsertPt;
+ do {
+ Instruction *Use = UseWorkList.pop_back_val();
+ Use->moveBefore(InsertPt);
+ NumPredsLeft.erase(Use);
+ for (Value::use_iterator IUI = Use->use_begin(),
+ IUE = Use->use_end(); IUI != IUE; ++IUI) {
+ Instruction *IUIInst = cast<Instruction>(IUI);
+ if (L->contains(IUIInst->getParent()) &&
+ Rewriter.isInsertedInstruction(IUIInst) &&
+ !isa<PHINode>(IUIInst))
+ UseWorkList.push_back(IUIInst);
+ }
+ } while (!UseWorkList.empty());
+ }
+ }
+ }
+ } while (!Worklist.empty());
}
/// Return true if it is OK to use SIToFPInst for an inducation variable
@@ -933,8 +728,7 @@ static bool convertToInt(const APFloat &APF, uint64_t *intVal) {
/// for(int i = 0; i < 10000; ++i)
/// bar((double)i);
///
-void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH,
- SmallPtrSet<Instruction*, 16> &DeadInsts) {
+void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) {
unsigned IncomingEdge = L->contains(PH->getIncomingBlock(0));
unsigned BackEdge = IncomingEdge^1;
@@ -1041,25 +835,34 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH,
ICmpInst *NewEC = new ICmpInst(NewPred, LHS, RHS, EC->getNameStart(),
EC->getParent()->getTerminator());
+ // In the following deltions, PH may become dead and may be deleted.
+ // Use a WeakVH to observe whether this happens.
+ WeakVH WeakPH = PH;
+
// Delete old, floating point, exit comparision instruction.
EC->replaceAllUsesWith(NewEC);
- DeadInsts.insert(EC);
+ RecursivelyDeleteTriviallyDeadInstructions(EC);
// Delete old, floating point, increment instruction.
Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
- DeadInsts.insert(Incr);
-
- // Replace floating induction variable. Give SIToFPInst preference over
- // UIToFPInst because it is faster on platforms that are widely used.
- if (useSIToFPInst(*InitValue, *EV, newInitValue, intEV)) {
- SIToFPInst *Conv = new SIToFPInst(NewPHI, PH->getType(), "indvar.conv",
- PH->getParent()->getFirstNonPHI());
- PH->replaceAllUsesWith(Conv);
- } else {
- UIToFPInst *Conv = new UIToFPInst(NewPHI, PH->getType(), "indvar.conv",
- PH->getParent()->getFirstNonPHI());
- PH->replaceAllUsesWith(Conv);
+ RecursivelyDeleteTriviallyDeadInstructions(Incr);
+
+ // Replace floating induction variable, if it isn't already deleted.
+ // Give SIToFPInst preference over UIToFPInst because it is faster on
+ // platforms that are widely used.
+ if (WeakPH && !PH->use_empty()) {
+ if (useSIToFPInst(*InitValue, *EV, newInitValue, intEV)) {
+ SIToFPInst *Conv = new SIToFPInst(NewPHI, PH->getType(), "indvar.conv",
+ PH->getParent()->getFirstNonPHI());
+ PH->replaceAllUsesWith(Conv);
+ } else {
+ UIToFPInst *Conv = new UIToFPInst(NewPHI, PH->getType(), "indvar.conv",
+ PH->getParent()->getFirstNonPHI());
+ PH->replaceAllUsesWith(Conv);
+ }
+ RecursivelyDeleteTriviallyDeadInstructions(PH);
}
- DeadInsts.insert(PH);
-}
+ // Add a new IVUsers entry for the newly-created integer PHI.
+ IU->AddUsersIfInteresting(NewPHI);
+}
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 127ef56..4f6d531 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -20,6 +20,7 @@
#include "llvm/Type.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/IVUsers.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
@@ -53,40 +54,6 @@ namespace {
struct BasedUser;
- /// IVStrideUse - Keep track of one use of a strided induction variable, where
- /// the stride is stored externally. The Offset member keeps track of the
- /// offset from the IV, User is the actual user of the operand, and
- /// 'OperandValToReplace' is the operand of the User that is the use.
- struct VISIBILITY_HIDDEN IVStrideUse {
- SCEVHandle Offset;
- Instruction *User;
- Value *OperandValToReplace;
-
- // isUseOfPostIncrementedValue - True if this should use the
- // post-incremented version of this IV, not the preincremented version.
- // This can only be set in special cases, such as the terminating setcc
- // instruction for a loop or uses dominated by the loop.
- bool isUseOfPostIncrementedValue;
-
- IVStrideUse(const SCEVHandle &Offs, Instruction *U, Value *O)
- : Offset(Offs), User(U), OperandValToReplace(O),
- isUseOfPostIncrementedValue(false) {}
- };
-
- /// IVUsersOfOneStride - This structure keeps track of all instructions that
- /// have an operand that is based on the trip count multiplied by some stride.
- /// The stride for all of these users is common and kept external to this
- /// structure.
- struct VISIBILITY_HIDDEN IVUsersOfOneStride {
- /// Users - Keep track of all of the users of this stride as well as the
- /// initial value and the operand that uses the IV.
- std::vector<IVStrideUse> Users;
-
- void addUser(const SCEVHandle &Offset,Instruction *User, Value *Operand) {
- Users.push_back(IVStrideUse(Offset, User, Operand));
- }
- };
-
/// IVInfo - This structure keeps track of one IV expression inserted during
/// StrengthReduceStridedIVUsers. It contains the stride, the common base, as
/// well as the PHI node and increment value created for rewrite.
@@ -110,15 +77,12 @@ namespace {
};
class VISIBILITY_HIDDEN LoopStrengthReduce : public LoopPass {
+ IVUsers *IU;
LoopInfo *LI;
DominatorTree *DT;
ScalarEvolution *SE;
bool Changed;
- /// IVUsesByStride - Keep track of all uses of induction variables that we
- /// are interested in. The key of the map is the stride of the access.
- std::map<SCEVHandle, IVUsersOfOneStride> IVUsesByStride;
-
/// IVsByStride - Keep track of all IVs that have been inserted for a
/// particular stride.
std::map<SCEVHandle, IVsOfOneStride> IVsByStride;
@@ -127,14 +91,9 @@ namespace {
/// reused (nor should they be rewritten to reuse other strides).
SmallSet<SCEVHandle, 4> StrideNoReuse;
- /// StrideOrder - An ordering of the keys in IVUsesByStride that is stable:
- /// We use this to iterate over the IVUsesByStride collection without being
- /// dependent on random ordering of pointers in the process.
- SmallVector<SCEVHandle, 16> StrideOrder;
-
/// DeadInsts - Keep track of instructions we may have made dead, so that
/// we can remove them after we are done working.
- SmallVector<Instruction*, 16> DeadInsts;
+ SmallVector<WeakVH, 16> DeadInsts;
/// TLI - Keep a pointer of a TargetLowering to consult for determining
/// transformation profitability.
@@ -161,11 +120,11 @@ namespace {
AU.addRequired<DominatorTree>();
AU.addRequired<ScalarEvolution>();
AU.addPreserved<ScalarEvolution>();
+ AU.addRequired<IVUsers>();
+ AU.addPreserved<IVUsers>();
}
private:
- bool AddUsersIfInteresting(Instruction *I, Loop *L,
- SmallPtrSet<Instruction*,16> &Processed);
ICmpInst *ChangeCompareStride(Loop *L, ICmpInst *Cond,
IVStrideUse* &CondUse,
const SCEVHandle* &CondStride);
@@ -191,6 +150,8 @@ namespace {
const std::vector<BasedUser>& UsersToProcess);
bool ValidScale(bool, int64_t,
const std::vector<BasedUser>& UsersToProcess);
+ bool ValidOffset(bool, int64_t, int64_t,
+ const std::vector<BasedUser>& UsersToProcess);
SCEVHandle CollectIVUsers(const SCEVHandle &Stride,
IVUsersOfOneStride &Uses,
Loop *L,
@@ -242,21 +203,8 @@ Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
void LoopStrengthReduce::DeleteTriviallyDeadInstructions() {
if (DeadInsts.empty()) return;
- // Sort the deadinsts list so that we can trivially eliminate duplicates as we
- // go. The code below never adds a non-dead instruction to the worklist, but
- // callers may not be so careful.
- array_pod_sort(DeadInsts.begin(), DeadInsts.end());
-
- // Drop duplicate instructions and those with uses.
- for (unsigned i = 0, e = DeadInsts.size()-1; i < e; ++i) {
- Instruction *I = DeadInsts[i];
- if (!I->use_empty()) DeadInsts[i] = 0;
- while (i != e && DeadInsts[i+1] == I)
- DeadInsts[++i] = 0;
- }
-
while (!DeadInsts.empty()) {
- Instruction *I = DeadInsts.back();
+ Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.back());
DeadInsts.pop_back();
if (I == 0 || !isInstructionTriviallyDead(I))
@@ -313,111 +261,6 @@ static bool containsAddRecFromDifferentLoop(SCEVHandle S, Loop *L) {
return false;
}
-/// getSCEVStartAndStride - Compute the start and stride of this expression,
-/// returning false if the expression is not a start/stride pair, or true if it
-/// is. The stride must be a loop invariant expression, but the start may be
-/// a mix of loop invariant and loop variant expressions. The start cannot,
-/// however, contain an AddRec from a different loop, unless that loop is an
-/// outer loop of the current loop.
-static bool getSCEVStartAndStride(const SCEVHandle &SH, Loop *L,
- SCEVHandle &Start, SCEVHandle &Stride,
- ScalarEvolution *SE, DominatorTree *DT) {
- SCEVHandle TheAddRec = Start; // Initialize to zero.
-
- // If the outer level is an AddExpr, the operands are all start values except
- // for a nested AddRecExpr.
- if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(SH)) {
- for (unsigned i = 0, e = AE->getNumOperands(); i != e; ++i)
- if (const SCEVAddRecExpr *AddRec =
- dyn_cast<SCEVAddRecExpr>(AE->getOperand(i))) {
- if (AddRec->getLoop() == L)
- TheAddRec = SE->getAddExpr(AddRec, TheAddRec);
- else
- return false; // Nested IV of some sort?
- } else {
- Start = SE->getAddExpr(Start, AE->getOperand(i));
- }
-
- } else if (isa<SCEVAddRecExpr>(SH)) {
- TheAddRec = SH;
- } else {
- return false; // not analyzable.
- }
-
- const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(TheAddRec);
- if (!AddRec || AddRec->getLoop() != L) return false;
-
- // FIXME: Generalize to non-affine IV's.
- if (!AddRec->isAffine()) return false;
-
- // If Start contains an SCEVAddRecExpr from a different loop, other than an
- // outer loop of the current loop, reject it. SCEV has no concept of
- // operating on more than one loop at a time so don't confuse it with such
- // expressions.
- if (containsAddRecFromDifferentLoop(AddRec->getOperand(0), L))
- return false;
-
- Start = SE->getAddExpr(Start, AddRec->getOperand(0));
-
- if (!isa<SCEVConstant>(AddRec->getOperand(1))) {
- // If stride is an instruction, make sure it dominates the loop preheader.
- // Otherwise we could end up with a use before def situation.
- BasicBlock *Preheader = L->getLoopPreheader();
- if (!AddRec->getOperand(1)->dominates(Preheader, DT))
- return false;
-
- DOUT << "[" << L->getHeader()->getName()
- << "] Variable stride: " << *AddRec << "\n";
- }
-
- Stride = AddRec->getOperand(1);
- return true;
-}
-
-/// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression
-/// and now we need to decide whether the user should use the preinc or post-inc
-/// value. If this user should use the post-inc version of the IV, return true.
-///
-/// Choosing wrong here can break dominance properties (if we choose to use the
-/// post-inc value when we cannot) or it can end up adding extra live-ranges to
-/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we
-/// should use the post-inc value).
-static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV,
- Loop *L, DominatorTree *DT, Pass *P,
- SmallVectorImpl<Instruction*> &DeadInsts){
- // If the user is in the loop, use the preinc value.
- if (L->contains(User->getParent())) return false;
-
- BasicBlock *LatchBlock = L->getLoopLatch();
-
- // Ok, the user is outside of the loop. If it is dominated by the latch
- // block, use the post-inc value.
- if (DT->dominates(LatchBlock, User->getParent()))
- return true;
-
- // There is one case we have to be careful of: PHI nodes. These little guys
- // can live in blocks that do not dominate the latch block, but (since their
- // uses occur in the predecessor block, not the block the PHI lives in) should
- // still use the post-inc value. Check for this case now.
- PHINode *PN = dyn_cast<PHINode>(User);
- if (!PN) return false; // not a phi, not dominated by latch block.
-
- // Look at all of the uses of IV by the PHI node. If any use corresponds to
- // a block that is not dominated by the latch block, give up and use the
- // preincremented value.
- unsigned NumUses = 0;
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (PN->getIncomingValue(i) == IV) {
- ++NumUses;
- if (!DT->dominates(LatchBlock, PN->getIncomingBlock(i)))
- return false;
- }
-
- // Okay, all uses of IV by PN are in predecessor blocks that really are
- // dominated by the latch block. Use the post-incremented value.
- return true;
-}
-
/// isAddressUse - Returns true if the specified instruction is using the
/// specified value as an address.
static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
@@ -467,90 +310,6 @@ static const Type *getAccessType(const Instruction *Inst) {
return UseTy;
}
-/// AddUsersIfInteresting - Inspect the specified instruction. If it is a
-/// reducible SCEV, recursively add its users to the IVUsesByStride set and
-/// return true. Otherwise, return false.
-bool LoopStrengthReduce::AddUsersIfInteresting(Instruction *I, Loop *L,
- SmallPtrSet<Instruction*,16> &Processed) {
- if (!SE->isSCEVable(I->getType()))
- return false; // Void and FP expressions cannot be reduced.
-
- // LSR is not APInt clean, do not touch integers bigger than 64-bits.
- if (SE->getTypeSizeInBits(I->getType()) > 64)
- return false;
-
- if (!Processed.insert(I))
- return true; // Instruction already handled.
-
- // Get the symbolic expression for this instruction.
- SCEVHandle ISE = SE->getSCEV(I);
- if (isa<SCEVCouldNotCompute>(ISE)) return false;
-
- // Get the start and stride for this expression.
- SCEVHandle Start = SE->getIntegerSCEV(0, ISE->getType());
- SCEVHandle Stride = Start;
- if (!getSCEVStartAndStride(ISE, L, Start, Stride, SE, DT))
- return false; // Non-reducible symbolic expression, bail out.
-
- std::vector<Instruction *> IUsers;
- // Collect all I uses now because IVUseShouldUsePostIncValue may
- // invalidate use_iterator.
- for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI)
- IUsers.push_back(cast<Instruction>(*UI));
-
- for (unsigned iused_index = 0, iused_size = IUsers.size();
- iused_index != iused_size; ++iused_index) {
-
- Instruction *User = IUsers[iused_index];
-
- // Do not infinitely recurse on PHI nodes.
- if (isa<PHINode>(User) && Processed.count(User))
- continue;
-
- // Descend recursively, but not into PHI nodes outside the current loop.
- // It's important to see the entire expression outside the loop to get
- // choices that depend on addressing mode use right, although we won't
- // consider references ouside the loop in all cases.
- // If User is already in Processed, we don't want to recurse into it again,
- // but do want to record a second reference in the same instruction.
- bool AddUserToIVUsers = false;
- if (LI->getLoopFor(User->getParent()) != L) {
- if (isa<PHINode>(User) || Processed.count(User) ||
- !AddUsersIfInteresting(User, L, Processed)) {
- DOUT << "FOUND USER in other loop: " << *User
- << " OF SCEV: " << *ISE << "\n";
- AddUserToIVUsers = true;
- }
- } else if (Processed.count(User) ||
- !AddUsersIfInteresting(User, L, Processed)) {
- DOUT << "FOUND USER: " << *User
- << " OF SCEV: " << *ISE << "\n";
- AddUserToIVUsers = true;
- }
-
- if (AddUserToIVUsers) {
- IVUsersOfOneStride &StrideUses = IVUsesByStride[Stride];
- if (StrideUses.Users.empty()) // First occurrence of this stride?
- StrideOrder.push_back(Stride);
-
- // Okay, we found a user that we cannot reduce. Analyze the instruction
- // and decide what to do with it. If we are a use inside of the loop, use
- // the value before incrementation, otherwise use it after incrementation.
- if (IVUseShouldUsePostIncValue(User, I, L, DT, this, DeadInsts)) {
- // The value used will be incremented by the stride more than we are
- // expecting, so subtract this off.
- SCEVHandle NewStart = SE->getMinusSCEV(Start, Stride);
- StrideUses.addUser(NewStart, User, I);
- StrideUses.Users.back().isUseOfPostIncrementedValue = true;
- DOUT << " USING POSTINC SCEV, START=" << *NewStart<< "\n";
- } else {
- StrideUses.addUser(Start, User, I);
- }
- }
- }
- return true;
-}
-
namespace {
/// BasedUser - For a particular base value, keep information about how we've
/// partitioned the expression so far.
@@ -571,6 +330,13 @@ namespace {
/// EmittedBase.
Value *OperandValToReplace;
+ /// isSigned - The stride (and thus also the Base) of this use may be in
+ /// a narrower type than the use itself (OperandValToReplace->getType()).
+ /// When this is the case, the isSigned field indicates whether the
+ /// IV expression should be signed-extended instead of zero-extended to
+ /// fit the type of the use.
+ bool isSigned;
+
/// Imm - The immediate value that should be added to the base immediately
/// before Inst, because it will be folded into the imm field of the
/// instruction. This is also sometimes used for loop-variant values that
@@ -589,10 +355,11 @@ namespace {
bool isUseOfPostIncrementedValue;
BasedUser(IVStrideUse &IVSU, ScalarEvolution *se)
- : SE(se), Base(IVSU.Offset), Inst(IVSU.User),
- OperandValToReplace(IVSU.OperandValToReplace),
+ : SE(se), Base(IVSU.getOffset()), Inst(IVSU.getUser()),
+ OperandValToReplace(IVSU.getOperandValToReplace()),
+ isSigned(IVSU.isSigned()),
Imm(SE->getIntegerSCEV(0, Base->getType())),
- isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue) {}
+ isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue()) {}
// Once we rewrite the code to insert the new IVs we want, update the
// operands of Inst to use the new expression 'NewBase', with 'Imm' added
@@ -600,7 +367,7 @@ namespace {
void RewriteInstructionToUseNewBase(const SCEVHandle &NewBase,
Instruction *InsertPt,
SCEVExpander &Rewriter, Loop *L, Pass *P,
- SmallVectorImpl<Instruction*> &DeadInsts);
+ SmallVectorImpl<WeakVH> &DeadInsts);
Value *InsertCodeForBaseAtPosition(const SCEVHandle &NewBase,
const Type *Ty,
@@ -638,19 +405,27 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEVHandle &NewBase,
InsertLoop = InsertLoop->getParentLoop();
}
- Value *Base = Rewriter.expandCodeFor(NewBase, Ty, BaseInsertPt);
+ Value *Base = Rewriter.expandCodeFor(NewBase, NewBase->getType(),
+ BaseInsertPt);
+
+ SCEVHandle NewValSCEV = SE->getUnknown(Base);
// If there is no immediate value, skip the next part.
- if (Imm->isZero())
- return Base;
+ if (!Imm->isZero()) {
+ // If we are inserting the base and imm values in the same block, make sure
+ // to adjust the IP position if insertion reused a result.
+ if (IP == BaseInsertPt)
+ IP = Rewriter.getInsertionPoint();
+
+ // Always emit the immediate (if non-zero) into the same block as the user.
+ NewValSCEV = SE->getAddExpr(NewValSCEV, Imm);
+ }
+
+ if (isSigned)
+ NewValSCEV = SE->getTruncateOrSignExtend(NewValSCEV, Ty);
+ else
+ NewValSCEV = SE->getTruncateOrZeroExtend(NewValSCEV, Ty);
- // If we are inserting the base and imm values in the same block, make sure to
- // adjust the IP position if insertion reused a result.
- if (IP == BaseInsertPt)
- IP = Rewriter.getInsertionPoint();
-
- // Always emit the immediate (if non-zero) into the same block as the user.
- SCEVHandle NewValSCEV = SE->getAddExpr(SE->getUnknown(Base), Imm);
return Rewriter.expandCodeFor(NewValSCEV, Ty, IP);
}
@@ -664,7 +439,7 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEVHandle &NewBase,
void BasedUser::RewriteInstructionToUseNewBase(const SCEVHandle &NewBase,
Instruction *NewBasePt,
SCEVExpander &Rewriter, Loop *L, Pass *P,
- SmallVectorImpl<Instruction*> &DeadInsts){
+ SmallVectorImpl<WeakVH> &DeadInsts) {
if (!isa<PHINode>(Inst)) {
// By default, insert code at the user instruction.
BasicBlock::iterator InsertPt = Inst;
@@ -1158,6 +933,39 @@ bool LoopStrengthReduce::ValidScale(bool HasBaseReg, int64_t Scale,
return true;
}
+/// ValidOffset - Check whether the given Offset is valid for all loads and
+/// stores in UsersToProcess.
+///
+bool LoopStrengthReduce::ValidOffset(bool HasBaseReg,
+ int64_t Offset,
+ int64_t Scale,
+ const std::vector<BasedUser>& UsersToProcess) {
+ if (!TLI)
+ return true;
+
+ for (unsigned i=0, e = UsersToProcess.size(); i!=e; ++i) {
+ // If this is a load or other access, pass the type of the access in.
+ const Type *AccessTy = Type::VoidTy;
+ if (isAddressUse(UsersToProcess[i].Inst,
+ UsersToProcess[i].OperandValToReplace))
+ AccessTy = getAccessType(UsersToProcess[i].Inst);
+ else if (isa<PHINode>(UsersToProcess[i].Inst))
+ continue;
+
+ TargetLowering::AddrMode AM;
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(UsersToProcess[i].Imm))
+ AM.BaseOffs = SC->getValue()->getSExtValue();
+ AM.BaseOffs = (uint64_t)AM.BaseOffs + (uint64_t)Offset;
+ AM.HasBaseReg = HasBaseReg || !UsersToProcess[i].Base->isZero();
+ AM.Scale = Scale;
+
+ // If load[imm+r*scale] is illegal, bail out.
+ if (!TLI->isLegalAddressingMode(AM, AccessTy))
+ return false;
+ }
+ return true;
+}
+
/// RequiresTypeConversion - Returns true if converting Ty1 to Ty2 is not
/// a nop.
bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty1,
@@ -1196,10 +1004,10 @@ SCEVHandle LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Stride)) {
int64_t SInt = SC->getValue()->getSExtValue();
- for (unsigned NewStride = 0, e = StrideOrder.size(); NewStride != e;
- ++NewStride) {
+ for (unsigned NewStride = 0, e = IU->StrideOrder.size();
+ NewStride != e; ++NewStride) {
std::map<SCEVHandle, IVsOfOneStride>::iterator SI =
- IVsByStride.find(StrideOrder[NewStride]);
+ IVsByStride.find(IU->StrideOrder[NewStride]);
if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first) ||
StrideNoReuse.count(SI->first))
continue;
@@ -1215,24 +1023,44 @@ SCEVHandle LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
// multiplications.
if (Scale == 1 ||
(AllUsesAreAddresses &&
- ValidScale(HasBaseReg, Scale, UsersToProcess)))
+ ValidScale(HasBaseReg, Scale, UsersToProcess))) {
+ // Prefer to reuse an IV with a base of zero.
for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(),
IE = SI->second.IVs.end(); II != IE; ++II)
- // FIXME: Only handle base == 0 for now.
- // Only reuse previous IV if it would not require a type conversion.
+ // Only reuse previous IV if it would not require a type conversion
+ // and if the base difference can be folded.
if (II->Base->isZero() &&
!RequiresTypeConversion(II->Base->getType(), Ty)) {
IV = *II;
return SE->getIntegerSCEV(Scale, Stride->getType());
}
+ // Otherwise, settle for an IV with a foldable base.
+ if (AllUsesAreAddresses)
+ for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(),
+ IE = SI->second.IVs.end(); II != IE; ++II)
+ // Only reuse previous IV if it would not require a type conversion
+ // and if the base difference can be folded.
+ if (SE->getEffectiveSCEVType(II->Base->getType()) ==
+ SE->getEffectiveSCEVType(Ty) &&
+ isa<SCEVConstant>(II->Base)) {
+ int64_t Base =
+ cast<SCEVConstant>(II->Base)->getValue()->getSExtValue();
+ if (Base > INT32_MIN && Base <= INT32_MAX &&
+ ValidOffset(HasBaseReg, -Base * Scale,
+ Scale, UsersToProcess)) {
+ IV = *II;
+ return SE->getIntegerSCEV(Scale, Stride->getType());
+ }
+ }
+ }
}
} else if (AllUsesAreOutsideLoop) {
// Accept nonconstant strides here; it is really really right to substitute
// an existing IV if we can.
- for (unsigned NewStride = 0, e = StrideOrder.size(); NewStride != e;
- ++NewStride) {
+ for (unsigned NewStride = 0, e = IU->StrideOrder.size();
+ NewStride != e; ++NewStride) {
std::map<SCEVHandle, IVsOfOneStride>::iterator SI =
- IVsByStride.find(StrideOrder[NewStride]);
+ IVsByStride.find(IU->StrideOrder[NewStride]);
if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first))
continue;
int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
@@ -1249,10 +1077,10 @@ SCEVHandle LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
}
// Special case, old IV is -1*x and this one is x. Can treat this one as
// -1*old.
- for (unsigned NewStride = 0, e = StrideOrder.size(); NewStride != e;
- ++NewStride) {
+ for (unsigned NewStride = 0, e = IU->StrideOrder.size();
+ NewStride != e; ++NewStride) {
std::map<SCEVHandle, IVsOfOneStride>::iterator SI =
- IVsByStride.find(StrideOrder[NewStride]);
+ IVsByStride.find(IU->StrideOrder[NewStride]);
if (SI == IVsByStride.end())
continue;
if (const SCEVMulExpr *ME = dyn_cast<SCEVMulExpr>(SI->first))
@@ -1303,10 +1131,15 @@ SCEVHandle LoopStrengthReduce::CollectIVUsers(const SCEVHandle &Stride,
bool &AllUsesAreAddresses,
bool &AllUsesAreOutsideLoop,
std::vector<BasedUser> &UsersToProcess) {
+ // FIXME: Generalize to non-affine IV's.
+ if (!Stride->isLoopInvariant(L))
+ return SE->getIntegerSCEV(0, Stride->getType());
+
UsersToProcess.reserve(Uses.Users.size());
- for (unsigned i = 0, e = Uses.Users.size(); i != e; ++i) {
- UsersToProcess.push_back(BasedUser(Uses.Users[i], SE));
-
+ for (ilist<IVStrideUse>::iterator I = Uses.Users.begin(),
+ E = Uses.Users.end(); I != E; ++I) {
+ UsersToProcess.push_back(BasedUser(*I, SE));
+
// Move any loop variant operands from the offset field to the immediate
// field of the use, so that we don't try to use something before it is
// computed.
@@ -1404,7 +1237,7 @@ bool LoopStrengthReduce::ShouldUseFullStrengthReductionMode(
// TODO: For now, don't do full strength reduction if there could
// potentially be greater-stride multiples of the current stride
// which could reuse the current stride IV.
- if (StrideOrder.back() != Stride)
+ if (IU->StrideOrder.back() != Stride)
return false;
// Iterate through the uses to find conditions that automatically rule out
@@ -1853,8 +1686,8 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride,
SCEVHandle RewriteExpr = SE->getUnknown(RewriteOp);
- if (SE->getTypeSizeInBits(RewriteOp->getType()) !=
- SE->getTypeSizeInBits(ReplacedTy)) {
+ if (SE->getEffectiveSCEVType(RewriteOp->getType()) !=
+ SE->getEffectiveSCEVType(ReplacedTy)) {
assert(SE->getTypeSizeInBits(RewriteOp->getType()) >
SE->getTypeSizeInBits(ReplacedTy) &&
"Unexpected widening cast!");
@@ -1884,8 +1717,8 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride,
// it here.
if (!ReuseIV.Base->isZero()) {
SCEVHandle typedBase = ReuseIV.Base;
- if (SE->getTypeSizeInBits(RewriteExpr->getType()) !=
- SE->getTypeSizeInBits(ReuseIV.Base->getType())) {
+ if (SE->getEffectiveSCEVType(RewriteExpr->getType()) !=
+ SE->getEffectiveSCEVType(ReuseIV.Base->getType())) {
// It's possible the original IV is a larger type than the new IV,
// in which case we have to truncate the Base. We checked in
// RequiresTypeConversion that this is valid.
@@ -1929,7 +1762,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride,
// Mark old value we replaced as possibly dead, so that it is eliminated
// if we just replaced the last use of that value.
- DeadInsts.push_back(cast<Instruction>(User.OperandValToReplace));
+ DeadInsts.push_back(User.OperandValToReplace);
UsersToProcess.pop_back();
++NumReduced;
@@ -1949,19 +1782,19 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride,
/// false.
bool LoopStrengthReduce::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,
const SCEVHandle *&CondStride) {
- for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e && !CondUse;
- ++Stride) {
- std::map<SCEVHandle, IVUsersOfOneStride>::iterator SI =
- IVUsesByStride.find(StrideOrder[Stride]);
- assert(SI != IVUsesByStride.end() && "Stride doesn't exist!");
-
- for (std::vector<IVStrideUse>::iterator UI = SI->second.Users.begin(),
- E = SI->second.Users.end(); UI != E; ++UI)
- if (UI->User == Cond) {
+ for (unsigned Stride = 0, e = IU->StrideOrder.size();
+ Stride != e && !CondUse; ++Stride) {
+ std::map<SCEVHandle, IVUsersOfOneStride *>::iterator SI =
+ IU->IVUsesByStride.find(IU->StrideOrder[Stride]);
+ assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
+
+ for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(),
+ E = SI->second->Users.end(); UI != E; ++UI)
+ if (UI->getUser() == Cond) {
// NOTE: we could handle setcc instructions with multiple uses here, but
// InstCombine does it as well for simple uses, it's not clear that it
// occurs enough in real life to handle.
- CondUse = &*UI;
+ CondUse = UI;
CondStride = &SI->first;
return true;
}
@@ -2022,9 +1855,18 @@ namespace {
ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
IVStrideUse* &CondUse,
const SCEVHandle* &CondStride) {
- if (StrideOrder.size() < 2 ||
- IVUsesByStride[*CondStride].Users.size() != 1)
+ // If there's only one stride in the loop, there's nothing to do here.
+ if (IU->StrideOrder.size() < 2)
+ return Cond;
+ // If there are other users of the condition's stride, don't bother
+ // trying to change the condition because the stride will still
+ // remain.
+ std::map<SCEVHandle, IVUsersOfOneStride *>::iterator I =
+ IU->IVUsesByStride.find(*CondStride);
+ if (I == IU->IVUsesByStride.end() ||
+ I->second->Users.size() != 1)
return Cond;
+ // Only handle constant strides for now.
const SCEVConstant *SC = dyn_cast<SCEVConstant>(*CondStride);
if (!SC) return Cond;
@@ -2051,9 +1893,9 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
return Cond;
// Look for a suitable stride / iv as replacement.
- for (unsigned i = 0, e = StrideOrder.size(); i != e; ++i) {
- std::map<SCEVHandle, IVUsersOfOneStride>::iterator SI =
- IVUsesByStride.find(StrideOrder[i]);
+ for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
+ std::map<SCEVHandle, IVUsersOfOneStride *>::iterator SI =
+ IU->IVUsesByStride.find(IU->StrideOrder[i]);
if (!isa<SCEVConstant>(SI->first))
continue;
int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
@@ -2069,6 +1911,9 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
// Check for overflow.
if (!Mul.isSignedIntN(BitWidth))
continue;
+ // Check for overflow in the stride's type too.
+ if (!Mul.isSignedIntN(SE->getTypeSizeInBits(SI->first->getType())))
+ continue;
// Watch out for overflow.
if (ICmpInst::isSignedPredicate(Predicate) &&
@@ -2079,9 +1924,27 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
continue;
// Pick the best iv to use trying to avoid a cast.
NewCmpLHS = NULL;
- for (std::vector<IVStrideUse>::iterator UI = SI->second.Users.begin(),
- E = SI->second.Users.end(); UI != E; ++UI) {
- NewCmpLHS = UI->OperandValToReplace;
+ for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(),
+ E = SI->second->Users.end(); UI != E; ++UI) {
+ Value *Op = UI->getOperandValToReplace();
+
+ // If the IVStrideUse implies a cast, check for an actual cast which
+ // can be used to find the original IV expression.
+ if (SE->getEffectiveSCEVType(Op->getType()) !=
+ SE->getEffectiveSCEVType(SI->first->getType())) {
+ CastInst *CI = dyn_cast<CastInst>(Op);
+ // If it's not a simple cast, it's complicated.
+ if (!CI)
+ continue;
+ // If it's a cast from a type other than the stride type,
+ // it's complicated.
+ if (CI->getOperand(0)->getType() != SI->first->getType())
+ continue;
+ // Ok, we found the IV expression in the stride's type.
+ Op = CI->getOperand(0);
+ }
+
+ NewCmpLHS = Op;
if (NewCmpLHS->getType() == CmpTy)
break;
}
@@ -2105,13 +1968,13 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
// Don't rewrite if use offset is non-constant and the new type is
// of a different type.
// FIXME: too conservative?
- if (NewTyBits != TyBits && !isa<SCEVConstant>(CondUse->Offset))
+ if (NewTyBits != TyBits && !isa<SCEVConstant>(CondUse->getOffset()))
continue;
bool AllUsesAreAddresses = true;
bool AllUsesAreOutsideLoop = true;
std::vector<BasedUser> UsersToProcess;
- SCEVHandle CommonExprs = CollectIVUsers(SI->first, SI->second, L,
+ SCEVHandle CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
AllUsesAreAddresses,
AllUsesAreOutsideLoop,
UsersToProcess);
@@ -2127,7 +1990,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
if (Scale < 0 && !Cond->isEquality())
Predicate = ICmpInst::getSwappedPredicate(Predicate);
- NewStride = &StrideOrder[i];
+ NewStride = &IU->StrideOrder[i];
if (!isa<PointerType>(NewCmpTy))
NewCmpRHS = ConstantInt::get(NewCmpTy, NewCmpVal);
else {
@@ -2135,10 +1998,11 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
NewCmpRHS = ConstantExpr::getIntToPtr(CI, NewCmpTy);
}
NewOffset = TyBits == NewTyBits
- ? SE->getMulExpr(CondUse->Offset,
+ ? SE->getMulExpr(CondUse->getOffset(),
SE->getConstant(ConstantInt::get(CmpTy, Scale)))
: SE->getConstant(ConstantInt::get(NewCmpIntTy,
- cast<SCEVConstant>(CondUse->Offset)->getValue()->getSExtValue()*Scale));
+ cast<SCEVConstant>(CondUse->getOffset())->getValue()
+ ->getSExtValue()*Scale));
break;
}
}
@@ -2165,13 +2029,12 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
OldCond);
// Remove the old compare instruction. The old indvar is probably dead too.
- DeadInsts.push_back(cast<Instruction>(CondUse->OperandValToReplace));
+ DeadInsts.push_back(CondUse->getOperandValToReplace());
OldCond->replaceAllUsesWith(Cond);
OldCond->eraseFromParent();
- IVUsesByStride[*CondStride].Users.pop_back();
- IVUsesByStride[*NewStride].addUser(NewOffset, Cond, NewCmpLHS);
- CondUse = &IVUsesByStride[*NewStride].Users.back();
+ IU->IVUsesByStride[*NewStride]->addUser(NewOffset, Cond, NewCmpLHS, false);
+ CondUse = &IU->IVUsesByStride[*NewStride]->Users.back();
CondStride = NewStride;
++NumEliminated;
Changed = true;
@@ -2287,12 +2150,12 @@ ICmpInst *LoopStrengthReduce::OptimizeSMax(Loop *L, ICmpInst *Cond,
// Delete the max calculation instructions.
Cond->replaceAllUsesWith(NewCond);
- Cond->eraseFromParent();
+ CondUse->setUser(NewCond);
Instruction *Cmp = cast<Instruction>(Sel->getOperand(0));
+ Cond->eraseFromParent();
Sel->eraseFromParent();
if (Cmp->use_empty())
Cmp->eraseFromParent();
- CondUse->User = NewCond;
return NewCond;
}
@@ -2304,19 +2167,19 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
return;
- for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e;
+ for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e;
++Stride) {
- std::map<SCEVHandle, IVUsersOfOneStride>::iterator SI =
- IVUsesByStride.find(StrideOrder[Stride]);
- assert(SI != IVUsesByStride.end() && "Stride doesn't exist!");
+ std::map<SCEVHandle, IVUsersOfOneStride *>::iterator SI =
+ IU->IVUsesByStride.find(IU->StrideOrder[Stride]);
+ assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
if (!isa<SCEVConstant>(SI->first))
continue;
- for (std::vector<IVStrideUse>::iterator UI = SI->second.Users.begin(),
- E = SI->second.Users.end(); UI != E; /* empty */) {
- std::vector<IVStrideUse>::iterator CandidateUI = UI;
+ for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(),
+ E = SI->second->Users.end(); UI != E; /* empty */) {
+ ilist<IVStrideUse>::iterator CandidateUI = UI;
++UI;
- Instruction *ShadowUse = CandidateUI->User;
+ Instruction *ShadowUse = CandidateUI->getUser();
const Type *DestTy = NULL;
/* If shadow use is a int->float cast then insert a second IV
@@ -2331,9 +2194,9 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
for (unsigned i = 0; i < n; ++i, ++d)
foo(d);
*/
- if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->User))
+ if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser()))
DestTy = UCast->getDestTy();
- else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->User))
+ else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser()))
DestTy = SCast->getDestTy();
if (!DestTy) continue;
@@ -2400,7 +2263,6 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
/* Remove cast operation */
ShadowUse->replaceAllUsesWith(NewPH);
ShadowUse->eraseFromParent();
- SI->second.Users.erase(CandidateUI);
NumShadow++;
break;
}
@@ -2450,11 +2312,12 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
// transform the icmp to use post-inc iv. Otherwise do so only if it would
// not reuse another iv and its iv would be reused by other uses. We are
// optimizing for the case where the icmp is the only use of the iv.
- IVUsersOfOneStride &StrideUses = IVUsesByStride[*CondStride];
- for (unsigned i = 0, e = StrideUses.Users.size(); i != e; ++i) {
- if (StrideUses.Users[i].User == Cond)
+ IVUsersOfOneStride &StrideUses = *IU->IVUsesByStride[*CondStride];
+ for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(),
+ E = StrideUses.Users.end(); I != E; ++I) {
+ if (I->getUser() == Cond)
continue;
- if (!StrideUses.Users[i].isUseOfPostIncrementedValue)
+ if (!I->isUseOfPostIncrementedValue())
return;
}
@@ -2463,10 +2326,10 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
// StrengthReduceStridedIVUsers?
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(*CondStride)) {
int64_t SInt = SC->getValue()->getSExtValue();
- for (unsigned NewStride = 0, ee = StrideOrder.size(); NewStride != ee;
+ for (unsigned NewStride = 0, ee = IU->StrideOrder.size(); NewStride != ee;
++NewStride) {
- std::map<SCEVHandle, IVUsersOfOneStride>::iterator SI =
- IVUsesByStride.find(StrideOrder[NewStride]);
+ std::map<SCEVHandle, IVUsersOfOneStride *>::iterator SI =
+ IU->IVUsesByStride.find(IU->StrideOrder[NewStride]);
if (!isa<SCEVConstant>(SI->first) || SI->first == *CondStride)
continue;
int64_t SSInt =
@@ -2479,7 +2342,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
bool AllUsesAreAddresses = true;
bool AllUsesAreOutsideLoop = true;
std::vector<BasedUser> UsersToProcess;
- SCEVHandle CommonExprs = CollectIVUsers(SI->first, SI->second, L,
+ SCEVHandle CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
AllUsesAreAddresses,
AllUsesAreOutsideLoop,
UsersToProcess);
@@ -2518,17 +2381,18 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
LatchBlock->getInstList().insert(TermBr, Cond);
// Clone the IVUse, as the old use still exists!
- IVUsesByStride[*CondStride].addUser(CondUse->Offset, Cond,
- CondUse->OperandValToReplace);
- CondUse = &IVUsesByStride[*CondStride].Users.back();
+ IU->IVUsesByStride[*CondStride]->addUser(CondUse->getOffset(), Cond,
+ CondUse->getOperandValToReplace(),
+ false);
+ CondUse = &IU->IVUsesByStride[*CondStride]->Users.back();
}
}
// If we get to here, we know that we can transform the setcc instruction to
// use the post-incremented version of the IV, allowing us to coalesce the
// live ranges for the IV correctly.
- CondUse->Offset = SE->getMinusSCEV(CondUse->Offset, *CondStride);
- CondUse->isUseOfPostIncrementedValue = true;
+ CondUse->setOffset(SE->getMinusSCEV(CondUse->getOffset(), *CondStride));
+ CondUse->setIsUseOfPostIncrementedValue(true);
Changed = true;
++NumLoopCond;
@@ -2644,19 +2508,13 @@ void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) {
bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
+ IU = &getAnalysis<IVUsers>();
LI = &getAnalysis<LoopInfo>();
DT = &getAnalysis<DominatorTree>();
SE = &getAnalysis<ScalarEvolution>();
Changed = false;
- // Find all uses of induction variables in this loop, and categorize
- // them by stride. Start by finding all of the PHI nodes in the header for
- // this loop. If they are induction variables, inspect their uses.
- SmallPtrSet<Instruction*,16> Processed; // Don't reprocess instructions.
- for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I)
- AddUsersIfInteresting(I, L, Processed);
-
- if (!IVUsesByStride.empty()) {
+ if (!IU->IVUsesByStride.empty()) {
#ifndef NDEBUG
DOUT << "\nLSR on \"" << L->getHeader()->getParent()->getNameStart()
<< "\" ";
@@ -2664,7 +2522,8 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
#endif
// Sort the StrideOrder so we process larger strides first.
- std::stable_sort(StrideOrder.begin(), StrideOrder.end(), StrideCompare(SE));
+ std::stable_sort(IU->StrideOrder.begin(), IU->StrideOrder.end(),
+ StrideCompare(SE));
// Optimize induction variables. Some indvar uses can be transformed to use
// strides that will be needed for other purposes. A common example of this
@@ -2695,11 +2554,15 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
// Also, note that we iterate over IVUsesByStride indirectly by using
// StrideOrder. This extra layer of indirection makes the ordering of
// strides deterministic - not dependent on map order.
- for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e; ++Stride) {
- std::map<SCEVHandle, IVUsersOfOneStride>::iterator SI =
- IVUsesByStride.find(StrideOrder[Stride]);
- assert(SI != IVUsesByStride.end() && "Stride doesn't exist!");
- StrengthReduceStridedIVUsers(SI->first, SI->second, L);
+ for (unsigned Stride = 0, e = IU->StrideOrder.size();
+ Stride != e; ++Stride) {
+ std::map<SCEVHandle, IVUsersOfOneStride *>::iterator SI =
+ IU->IVUsesByStride.find(IU->StrideOrder[Stride]);
+ assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
+ // FIXME: Generalize to non-affine IV's.
+ if (!SI->first->isLoopInvariant(L))
+ continue;
+ StrengthReduceStridedIVUsers(SI->first, *SI->second, L);
}
}
@@ -2708,9 +2571,7 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
OptimizeLoopCountIV(L);
// We're done analyzing this loop; release all the state we built up for it.
- IVUsesByStride.clear();
IVsByStride.clear();
- StrideOrder.clear();
StrideNoReuse.clear();
// Clean up after ourselves
diff --git a/test/CodeGen/X86/iv-users-in-other-loops.ll b/test/CodeGen/X86/iv-users-in-other-loops.ll
index d11b025..275feba 100644
--- a/test/CodeGen/X86/iv-users-in-other-loops.ll
+++ b/test/CodeGen/X86/iv-users-in-other-loops.ll
@@ -1,10 +1,11 @@
; RUN: llvm-as < %s | llc -march=x86-64 -f -o %t
; RUN: grep inc %t | count 1
; RUN: grep dec %t | count 2
-; RUN: grep addq %t | count 13
-; RUN: grep leaq %t | count 8
-; RUN: grep leal %t | count 4
-; RUN: grep movq %t | count 5
+; RUN: grep addq %t | count 8
+; RUN: grep addb %t | count 2
+; RUN: grep leaq %t | count 12
+; RUN: grep leal %t | count 2
+; RUN: grep movq %t | count 4
; IV users in each of the loops from other loops shouldn't cause LSR
; to insert new induction variables. Previously it would create a
diff --git a/test/CodeGen/X86/masked-iv-safe.ll b/test/CodeGen/X86/masked-iv-safe.ll
index e9b80a4..e102535 100644
--- a/test/CodeGen/X86/masked-iv-safe.ll
+++ b/test/CodeGen/X86/masked-iv-safe.ll
@@ -3,14 +3,13 @@
; RUN: not grep movz %t
; RUN: not grep sar %t
; RUN: not grep shl %t
-; RUN: grep add %t | count 6
-; RUN: grep inc %t | count 2
-; RUN: grep dec %t | count 4
+; RUN: grep add %t | count 2
+; RUN: grep inc %t | count 4
+; RUN: grep dec %t | count 2
; RUN: grep lea %t | count 2
; Optimize away zext-inreg and sext-inreg on the loop induction
; variable using trip-count information.
-; Also, the loop-reversal algorithm kicks in twice.
define void @count_up(double* %d, i64 %n) nounwind {
entry:
diff --git a/test/CodeGen/X86/subreg-to-reg-5.ll b/test/CodeGen/X86/subreg-to-reg-5.ll
index eee751a..81b262a 100644
--- a/test/CodeGen/X86/subreg-to-reg-5.ll
+++ b/test/CodeGen/X86/subreg-to-reg-5.ll
@@ -8,7 +8,8 @@ entry:
bb2: ; preds = %bb3, %entry
%B_addr.0.rec = phi i64 [ %indvar.next154, %bb3 ], [ 0, %entry ] ; <i64> [#uses=2]
- br i1 false, label %bb3, label %bb4
+ %z = icmp slt i64 %B_addr.0.rec, 20000
+ br i1 %z, label %bb3, label %bb4
bb3: ; preds = %bb2
%indvar.next154 = add i64 %B_addr.0.rec, 1 ; <i64> [#uses=1]
@@ -17,7 +18,7 @@ bb3: ; preds = %bb2
bb4: ; preds = %bb2
%B_addr.0 = getelementptr float* %B, i64 %B_addr.0.rec ; <float*> [#uses=1]
%t1 = ptrtoint float* %B_addr.0 to i64 ; <i64> [#uses=1]
- %t2 = and i64 %t1, 15 ; <i64> [#uses=1]
+ %t2 = and i64 %t1, 4294967295 ; <i64> [#uses=1]
%t3 = icmp eq i64 %t2, 0 ; <i1> [#uses=1]
br i1 %t3, label %bb5, label %bb10.preheader
@@ -25,7 +26,7 @@ bb10.preheader: ; preds = %bb4
br label %bb9
bb5: ; preds = %bb4
- unreachable
+ ret float 7.0
bb9: ; preds = %bb10.preheader
%t5 = getelementptr float* %B, i64 0 ; <float*> [#uses=1]
diff --git a/test/Transforms/IndVarSimplify/2009-04-15-shorten-iv-vars-2.ll b/test/Transforms/IndVarSimplify/2009-04-15-shorten-iv-vars-2.ll
index 5cc595e..4d26803 100644
--- a/test/Transforms/IndVarSimplify/2009-04-15-shorten-iv-vars-2.ll
+++ b/test/Transforms/IndVarSimplify/2009-04-15-shorten-iv-vars-2.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars | llvm-dis | not grep {sext}
-; RUN: llvm-as < %s | opt -indvars | llvm-dis | not grep {zext}
+; RUN: llvm-as < %s | opt -indvars -instcombine | llvm-dis | not grep {\[sz\]ext}
; ModuleID = '<stdin>'
;extern int *a, *b, *c, *d, *e, *f; /* 64 bit */
;extern int K[256];
diff --git a/test/Transforms/IndVarSimplify/ada-loops.ll b/test/Transforms/IndVarSimplify/ada-loops.ll
new file mode 100644
index 0000000..56325b3
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/ada-loops.ll
@@ -0,0 +1,90 @@
+; RUN: llvm-as < %s | opt -indvars | llvm-dis > %t
+; RUN: grep phi %t | count 4
+; RUN: grep {= phi i32} %t | count 4
+; RUN: not grep {sext i} %t
+; RUN: not grep {zext i} %t
+; RUN: not grep {trunc i} %t
+; RUN: not grep {add i8} %t
+; PR1301
+
+; Do a bunch of analysis and prove that the loops can use an i32 trip
+; count without casting.
+
+; ModuleID = 'ada.bc'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-pc-linux-gnu"
+
+define void @kinds__sbytezero([256 x i32]* nocapture %a) nounwind {
+bb.thread:
+ %tmp46 = getelementptr [256 x i32]* %a, i32 0, i32 0 ; <i32*> [#uses=1]
+ store i32 0, i32* %tmp46
+ br label %bb
+
+bb: ; preds = %bb, %bb.thread
+ %i.0.reg2mem.0 = phi i8 [ -128, %bb.thread ], [ %tmp8, %bb ] ; <i8> [#uses=1]
+ %tmp8 = add i8 %i.0.reg2mem.0, 1 ; <i8> [#uses=3]
+ %tmp1 = sext i8 %tmp8 to i32 ; <i32> [#uses=1]
+ %tmp3 = add i32 %tmp1, 128 ; <i32> [#uses=1]
+ %tmp4 = getelementptr [256 x i32]* %a, i32 0, i32 %tmp3 ; <i32*> [#uses=1]
+ store i32 0, i32* %tmp4
+ %0 = icmp eq i8 %tmp8, 127 ; <i1> [#uses=1]
+ br i1 %0, label %return, label %bb
+
+return: ; preds = %bb
+ ret void
+}
+
+define void @kinds__ubytezero([256 x i32]* nocapture %a) nounwind {
+bb.thread:
+ %tmp35 = getelementptr [256 x i32]* %a, i32 0, i32 0 ; <i32*> [#uses=1]
+ store i32 0, i32* %tmp35
+ br label %bb
+
+bb: ; preds = %bb, %bb.thread
+ %i.0.reg2mem.0 = phi i8 [ 0, %bb.thread ], [ %tmp7, %bb ] ; <i8> [#uses=1]
+ %tmp7 = add i8 %i.0.reg2mem.0, 1 ; <i8> [#uses=3]
+ %tmp1 = zext i8 %tmp7 to i32 ; <i32> [#uses=1]
+ %tmp3 = getelementptr [256 x i32]* %a, i32 0, i32 %tmp1 ; <i32*> [#uses=1]
+ store i32 0, i32* %tmp3
+ %0 = icmp eq i8 %tmp7, -1 ; <i1> [#uses=1]
+ br i1 %0, label %return, label %bb
+
+return: ; preds = %bb
+ ret void
+}
+
+define void @kinds__srangezero([21 x i32]* nocapture %a) nounwind {
+bb.thread:
+ br label %bb
+
+bb: ; preds = %bb, %bb.thread
+ %i.0.reg2mem.0 = phi i8 [ -10, %bb.thread ], [ %tmp7, %bb ] ; <i8> [#uses=2]
+ %tmp12 = sext i8 %i.0.reg2mem.0 to i32 ; <i32> [#uses=1]
+ %tmp4 = add i32 %tmp12, 10 ; <i32> [#uses=1]
+ %tmp5 = getelementptr [21 x i32]* %a, i32 0, i32 %tmp4 ; <i32*> [#uses=1]
+ store i32 0, i32* %tmp5
+ %tmp7 = add i8 %i.0.reg2mem.0, 1 ; <i8> [#uses=2]
+ %0 = icmp sgt i8 %tmp7, 10 ; <i1> [#uses=1]
+ br i1 %0, label %return, label %bb
+
+return: ; preds = %bb
+ ret void
+}
+
+define void @kinds__urangezero([21 x i32]* nocapture %a) nounwind {
+bb.thread:
+ br label %bb
+
+bb: ; preds = %bb, %bb.thread
+ %i.0.reg2mem.0 = phi i8 [ 10, %bb.thread ], [ %tmp7, %bb ] ; <i8> [#uses=2]
+ %tmp12 = sext i8 %i.0.reg2mem.0 to i32 ; <i32> [#uses=1]
+ %tmp4 = add i32 %tmp12, -10 ; <i32> [#uses=1]
+ %tmp5 = getelementptr [21 x i32]* %a, i32 0, i32 %tmp4 ; <i32*> [#uses=1]
+ store i32 0, i32* %tmp5
+ %tmp7 = add i8 %i.0.reg2mem.0, 1 ; <i8> [#uses=2]
+ %0 = icmp sgt i8 %tmp7, 30 ; <i1> [#uses=1]
+ br i1 %0, label %return, label %bb
+
+return: ; preds = %bb
+ ret void
+}
diff --git a/test/Transforms/IndVarSimplify/iv-zext.ll b/test/Transforms/IndVarSimplify/iv-zext.ll
new file mode 100644
index 0000000..76d48de
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/iv-zext.ll
@@ -0,0 +1,33 @@
+; RUN: llvm-as < %s | opt -indvars | llvm-dis > %t
+; RUN: not grep and %t
+; RUN: not grep zext %t
+
+target datalayout = "-p:64:64:64"
+
+define void @foo(double* %d, i64 %n) nounwind {
+entry:
+ br label %loop
+
+loop:
+ %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
+ %indvar.i8 = and i64 %indvar, 255
+ %t0 = getelementptr double* %d, i64 %indvar.i8
+ %t1 = load double* %t0
+ %t2 = mul double %t1, 0.1
+ store double %t2, double* %t0
+ %indvar.i24 = and i64 %indvar, 16777215
+ %t3 = getelementptr double* %d, i64 %indvar.i24
+ %t4 = load double* %t3
+ %t5 = mul double %t4, 2.3
+ store double %t5, double* %t3
+ %t6 = getelementptr double* %d, i64 %indvar
+ %t7 = load double* %t6
+ %t8 = mul double %t7, 4.5
+ store double %t8, double* %t6
+ %indvar.next = add i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, 10
+ br i1 %exitcond, label %return, label %loop
+
+return:
+ ret void
+}
diff --git a/test/Transforms/IndVarSimplify/loop_evaluate_6.ll b/test/Transforms/IndVarSimplify/loop_evaluate_6.ll
new file mode 100644
index 0000000..35fbf52
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/loop_evaluate_6.ll
@@ -0,0 +1,29 @@
+; RUN: llvm-as < %s | opt -indvars -loop-deletion | llvm-dis | grep phi | count 1
+
+; Indvars should be able to evaluate this loop, allowing loop deletion
+; to delete it.
+
+define i32 @test(i32 %x_offs) nounwind readnone {
+entry:
+ %0 = icmp sgt i32 %x_offs, 4 ; <i1> [#uses=1]
+ br i1 %0, label %bb.nph, label %bb2
+
+bb.nph: ; preds = %entry
+ br label %bb
+
+bb: ; preds = %bb1, %bb.nph
+ %x_offs_addr.01 = phi i32 [ %1, %bb1 ], [ %x_offs, %bb.nph ] ; <i32> [#uses=1]
+ %1 = add i32 %x_offs_addr.01, -4 ; <i32> [#uses=3]
+ br label %bb1
+
+bb1: ; preds = %bb
+ %2 = icmp sgt i32 %1, 4 ; <i1> [#uses=1]
+ br i1 %2, label %bb, label %bb1.bb2_crit_edge
+
+bb1.bb2_crit_edge: ; preds = %bb1
+ br label %bb2
+
+bb2: ; preds = %bb1.bb2_crit_edge, %entry
+ %x_offs_addr.0.lcssa = phi i32 [ %1, %bb1.bb2_crit_edge ], [ %x_offs, %entry ] ; <i32> [#uses=1]
+ ret i32 %x_offs_addr.0.lcssa
+}
diff --git a/test/Transforms/LoopStrengthReduce/2009-04-28-no-reduce-mul.ll b/test/Transforms/LoopStrengthReduce/2009-04-28-no-reduce-mul.ll
index 153a181..f873b3d 100644
--- a/test/Transforms/LoopStrengthReduce/2009-04-28-no-reduce-mul.ll
+++ b/test/Transforms/LoopStrengthReduce/2009-04-28-no-reduce-mul.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-reduce | llvm-dis | grep mul | count 3
+; RUN: llvm-as < %s | opt -loop-reduce | llvm-dis | grep {mul.*%lsr.iv} | count 2
; The multiply in bb2 must not be reduced to an add, as the sext causes the
; %1 argument to become negative after a while.
; ModuleID = '<stdin>'