diff options
author | Dan Gohman <gohman@apple.com> | 2009-05-12 02:17:14 +0000 |
---|---|---|
committer | Dan Gohman <gohman@apple.com> | 2009-05-12 02:17:14 +0000 |
commit | 28055122535b28133c1dbadd920e2980fcb84431 (patch) | |
tree | 3f5da42b9778500f8db915f31c8003baa064f862 /lib/Transforms | |
parent | 9fd4a00298936c4ab3bf51389147467cb29e025a (diff) | |
download | external_llvm-28055122535b28133c1dbadd920e2980fcb84431.zip external_llvm-28055122535b28133c1dbadd920e2980fcb84431.tar.gz external_llvm-28055122535b28133c1dbadd920e2980fcb84431.tar.bz2 |
Factor the code for collecting IV users out of LSR into an IVUsers class,
and generalize it so that it can be used by IndVarSimplify. Implement the
base IndVarSimplify transformation code using IVUsers. This removes
TestOrigIVForWrap and associated code, as ScalarEvolution now has enough
builtin overflow detection and folding logic to handle all the same cases,
and more. Run "opt -iv-users -analyze -disable-output" on your favorite
loop for an example of what IVUsers does.
This lets IndVarSimplify eliminate IV casts and compute trip counts in
more cases. Also, this happens to finally fix the remaining testcases
in PR1301.
Now that IndVarSimplify is being more aggressive, it occasionally runs
into the problem where ScalarEvolutionExpander's code for avoiding
duplicate expansions makes it difficult to ensure that all expanded
instructions dominate all the instructions that will use them. As a
temporary measure, IndVarSimplify now uses a FixUsesBeforeDefs function
to fix up instructions inserted by SCEVExpander. Fortunately, this code
is contained, and can be easily removed once a more comprehensive
solution is available.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@71535 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Transforms')
-rw-r--r-- | lib/Transforms/Scalar/IndVarSimplify.cpp | 935 | ||||
-rw-r--r-- | lib/Transforms/Scalar/LoopStrengthReduce.cpp | 569 |
2 files changed, 584 insertions, 920 deletions
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 3d9017d..80d34f6 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -43,6 +43,8 @@ #include "llvm/Constants.h" #include "llvm/Instructions.h" #include "llvm/Type.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/IVUsers.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" @@ -51,11 +53,12 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Support/CommandLine.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" using namespace llvm; STATISTIC(NumRemoved , "Number of aux indvars removed"); @@ -65,6 +68,7 @@ STATISTIC(NumLFTR , "Number of loop exit tests replaced"); namespace { class VISIBILITY_HIDDEN IndVarSimplify : public LoopPass { + IVUsers *IU; LoopInfo *LI; ScalarEvolution *SE; bool Changed; @@ -76,12 +80,15 @@ namespace { virtual bool runOnLoop(Loop *L, LPPassManager &LPM); virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<DominatorTree>(); AU.addRequired<ScalarEvolution>(); AU.addRequiredID(LCSSAID); AU.addRequiredID(LoopSimplifyID); AU.addRequired<LoopInfo>(); + AU.addRequired<IVUsers>(); AU.addPreserved<ScalarEvolution>(); AU.addPreservedID(LoopSimplifyID); + AU.addPreserved<IVUsers>(); AU.addPreservedID(LCSSAID); AU.setPreservesCFG(); } @@ -90,17 +97,21 @@ namespace { void RewriteNonIntegerIVs(Loop *L); - void LinearFunctionTestReplace(Loop *L, SCEVHandle BackedgeTakenCount, + ICmpInst *LinearFunctionTestReplace(Loop *L, SCEVHandle BackedgeTakenCount, Value *IndVar, BasicBlock *ExitingBlock, BranchInst *BI, SCEVExpander &Rewriter); void RewriteLoopExitValues(Loop *L, const SCEV *BackedgeTakenCount); - void DeleteTriviallyDeadInstructions(SmallPtrSet<Instruction*, 16> &Insts); + void RewriteIVExpressions(Loop *L, const Type *LargestType, + SCEVExpander &Rewriter); - void HandleFloatingPointIV(Loop *L, PHINode *PH, - SmallPtrSet<Instruction*, 16> &DeadInsts); + void SinkUnusedInvariants(Loop *L, SCEVExpander &Rewriter); + + void FixUsesBeforeDefs(Loop *L, SCEVExpander &Rewriter); + + void HandleFloatingPointIV(Loop *L, PHINode *PH); }; } @@ -112,31 +123,12 @@ Pass *llvm::createIndVarSimplifyPass() { return new IndVarSimplify(); } -/// DeleteTriviallyDeadInstructions - If any of the instructions is the -/// specified set are trivially dead, delete them and see if this makes any of -/// their operands subsequently dead. -void IndVarSimplify:: -DeleteTriviallyDeadInstructions(SmallPtrSet<Instruction*, 16> &Insts) { - while (!Insts.empty()) { - Instruction *I = *Insts.begin(); - Insts.erase(I); - if (isInstructionTriviallyDead(I)) { - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) - if (Instruction *U = dyn_cast<Instruction>(I->getOperand(i))) - Insts.insert(U); - DOUT << "INDVARS: Deleting: " << *I; - I->eraseFromParent(); - Changed = true; - } - } -} - /// LinearFunctionTestReplace - This method rewrites the exit condition of the /// loop to be a canonical != comparison against the incremented loop induction /// variable. This pass is able to rewrite the exit tests of any loop where the /// SCEV analysis can determine a loop-invariant trip count of the loop, which /// is actually a much broader range than just linear tests. -void IndVarSimplify::LinearFunctionTestReplace(Loop *L, +ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L, SCEVHandle BackedgeTakenCount, Value *IndVar, BasicBlock *ExitingBlock, @@ -196,10 +188,15 @@ void IndVarSimplify::LinearFunctionTestReplace(Loop *L, << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n" << " RHS:\t" << *RHS << "\n"; - Value *Cond = new ICmpInst(Opcode, CmpIndVar, ExitCnt, "exitcond", BI); - BI->setCondition(Cond); + ICmpInst *Cond = new ICmpInst(Opcode, CmpIndVar, ExitCnt, "exitcond", BI); + + Instruction *OrigCond = cast<Instruction>(BI->getCondition()); + OrigCond->replaceAllUsesWith(Cond); + RecursivelyDeleteTriviallyDeadInstructions(OrigCond); + ++NumLFTR; Changed = true; + return Cond; } /// RewriteLoopExitValues - Check to see if this loop has a computable @@ -207,8 +204,16 @@ void IndVarSimplify::LinearFunctionTestReplace(Loop *L, /// final value of any expressions that are recurrent in the loop, and /// substitute the exit values from the loop into any instructions outside of /// the loop that use the final values of the current expressions. +/// +/// This is mostly redundant with the regular IndVarSimplify activities that +/// happen later, except that it's more powerful in some cases, because it's +/// able to brute-force evaluate arbitrary instructions as long as they have +/// constant operands at the beginning of the loop. void IndVarSimplify::RewriteLoopExitValues(Loop *L, const SCEV *BackedgeTakenCount) { + // Verify the input to the pass in already in LCSSA form. + assert(L->isLCSSAForm()); + BasicBlock *Preheader = L->getLoopPreheader(); // Scan all of the instructions in the loop, looking at those that have @@ -226,9 +231,6 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, BlockToInsertInto = Preheader; BasicBlock::iterator InsertPt = BlockToInsertInto->getFirstNonPHI(); - bool HasConstantItCount = isa<SCEVConstant>(BackedgeTakenCount); - - SmallPtrSet<Instruction*, 16> InstructionsToDelete; std::map<Instruction*, Value*> ExitValues; // Find all values that are computed inside the loop, but used outside of it. @@ -268,18 +270,11 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, if (!L->contains(Inst->getParent())) continue; - // We require that this value either have a computable evolution or that - // the loop have a constant iteration count. In the case where the loop - // has a constant iteration count, we can sometimes force evaluation of - // the exit value through brute force. - SCEVHandle SH = SE->getSCEV(Inst); - if (!SH->hasComputableLoopEvolution(L) && !HasConstantItCount) - continue; // Cannot get exit evolution for the loop value. - // Okay, this instruction has a user outside of the current loop // and varies predictably *inside* the loop. Evaluate the value it // contains when the loop exits, if possible. - SCEVHandle ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); + SCEVHandle SH = SE->getSCEV(Inst); + SCEVHandle ExitValue = SE->getSCEVAtScope(SH, L->getParentLoop()); if (isa<SCEVCouldNotCompute>(ExitValue) || !ExitValue->isLoopInvariant(L)) continue; @@ -298,9 +293,8 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, PN->setIncomingValue(i, ExitVal); - // If this instruction is dead now, schedule it to be removed. - if (Inst->use_empty()) - InstructionsToDelete.insert(Inst); + // If this instruction is dead now, delete it. + RecursivelyDeleteTriviallyDeadInstructions(Inst); // See if this is a single-entry LCSSA PHI node. If so, we can (and // have to) remove @@ -308,14 +302,12 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, // in the loop, so we don't need an LCSSA phi node anymore. if (NumPreds == 1) { PN->replaceAllUsesWith(ExitVal); - PN->eraseFromParent(); + RecursivelyDeleteTriviallyDeadInstructions(PN); break; } } } } - - DeleteTriviallyDeadInstructions(InstructionsToDelete); } void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) { @@ -325,266 +317,24 @@ void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) { // BasicBlock *Header = L->getHeader(); - SmallPtrSet<Instruction*, 16> DeadInsts; - for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { - PHINode *PN = cast<PHINode>(I); - HandleFloatingPointIV(L, PN, DeadInsts); - } + SmallVector<WeakVH, 8> PHIs; + for (BasicBlock::iterator I = Header->begin(); + PHINode *PN = dyn_cast<PHINode>(I); ++I) + PHIs.push_back(PN); + + for (unsigned i = 0, e = PHIs.size(); i != e; ++i) + if (PHINode *PN = dyn_cast_or_null<PHINode>(PHIs[i])) + HandleFloatingPointIV(L, PN); // If the loop previously had floating-point IV, ScalarEvolution // may not have been able to compute a trip count. Now that we've done some // re-writing, the trip count may be computable. if (Changed) SE->forgetLoopBackedgeTakenCount(L); - - if (!DeadInsts.empty()) - DeleteTriviallyDeadInstructions(DeadInsts); -} - -/// getEffectiveIndvarType - Determine the widest type that the -/// induction-variable PHINode Phi is cast to. -/// -static const Type *getEffectiveIndvarType(const PHINode *Phi, - const ScalarEvolution *SE) { - const Type *Ty = Phi->getType(); - - for (Value::use_const_iterator UI = Phi->use_begin(), UE = Phi->use_end(); - UI != UE; ++UI) { - const Type *CandidateType = NULL; - if (const ZExtInst *ZI = dyn_cast<ZExtInst>(UI)) - CandidateType = ZI->getDestTy(); - else if (const SExtInst *SI = dyn_cast<SExtInst>(UI)) - CandidateType = SI->getDestTy(); - else if (const IntToPtrInst *IP = dyn_cast<IntToPtrInst>(UI)) - CandidateType = IP->getDestTy(); - else if (const PtrToIntInst *PI = dyn_cast<PtrToIntInst>(UI)) - CandidateType = PI->getDestTy(); - if (CandidateType && - SE->isSCEVable(CandidateType) && - SE->getTypeSizeInBits(CandidateType) > SE->getTypeSizeInBits(Ty)) - Ty = CandidateType; - } - - return Ty; -} - -/// TestOrigIVForWrap - Analyze the original induction variable that -/// controls the loop's iteration to determine whether it would ever -/// undergo signed or unsigned overflow. -/// -/// In addition to setting the NoSignedWrap and NoUnsignedWrap -/// variables to true when appropriate (they are not set to false here), -/// return the PHI for this induction variable. Also record the initial -/// and final values and the increment; these are not meaningful unless -/// either NoSignedWrap or NoUnsignedWrap is true, and are always meaningful -/// in that case, although the final value may be 0 indicating a nonconstant. -/// -/// TODO: This duplicates a fair amount of ScalarEvolution logic. -/// Perhaps this can be merged with -/// ScalarEvolution::getBackedgeTakenCount -/// and/or ScalarEvolution::get{Sign,Zero}ExtendExpr. -/// -static const PHINode *TestOrigIVForWrap(const Loop *L, - const BranchInst *BI, - const Instruction *OrigCond, - const ScalarEvolution &SE, - bool &NoSignedWrap, - bool &NoUnsignedWrap, - const ConstantInt* &InitialVal, - const ConstantInt* &IncrVal, - const ConstantInt* &LimitVal) { - // Verify that the loop is sane and find the exit condition. - const ICmpInst *Cmp = dyn_cast<ICmpInst>(OrigCond); - if (!Cmp) return 0; - - const Value *CmpLHS = Cmp->getOperand(0); - const Value *CmpRHS = Cmp->getOperand(1); - const BasicBlock *TrueBB = BI->getSuccessor(0); - const BasicBlock *FalseBB = BI->getSuccessor(1); - ICmpInst::Predicate Pred = Cmp->getPredicate(); - - // Canonicalize a constant to the RHS. - if (isa<ConstantInt>(CmpLHS)) { - Pred = ICmpInst::getSwappedPredicate(Pred); - std::swap(CmpLHS, CmpRHS); - } - // Canonicalize SLE to SLT. - if (Pred == ICmpInst::ICMP_SLE) - if (const ConstantInt *CI = dyn_cast<ConstantInt>(CmpRHS)) - if (!CI->getValue().isMaxSignedValue()) { - CmpRHS = ConstantInt::get(CI->getValue() + 1); - Pred = ICmpInst::ICMP_SLT; - } - // Canonicalize SGT to SGE. - if (Pred == ICmpInst::ICMP_SGT) - if (const ConstantInt *CI = dyn_cast<ConstantInt>(CmpRHS)) - if (!CI->getValue().isMaxSignedValue()) { - CmpRHS = ConstantInt::get(CI->getValue() + 1); - Pred = ICmpInst::ICMP_SGE; - } - // Canonicalize SGE to SLT. - if (Pred == ICmpInst::ICMP_SGE) { - std::swap(TrueBB, FalseBB); - Pred = ICmpInst::ICMP_SLT; - } - // Canonicalize ULE to ULT. - if (Pred == ICmpInst::ICMP_ULE) - if (const ConstantInt *CI = dyn_cast<ConstantInt>(CmpRHS)) - if (!CI->getValue().isMaxValue()) { - CmpRHS = ConstantInt::get(CI->getValue() + 1); - Pred = ICmpInst::ICMP_ULT; - } - // Canonicalize UGT to UGE. - if (Pred == ICmpInst::ICMP_UGT) - if (const ConstantInt *CI = dyn_cast<ConstantInt>(CmpRHS)) - if (!CI->getValue().isMaxValue()) { - CmpRHS = ConstantInt::get(CI->getValue() + 1); - Pred = ICmpInst::ICMP_UGE; - } - // Canonicalize UGE to ULT. - if (Pred == ICmpInst::ICMP_UGE) { - std::swap(TrueBB, FalseBB); - Pred = ICmpInst::ICMP_ULT; - } - // For now, analyze only LT loops for signed overflow. - if (Pred != ICmpInst::ICMP_SLT && Pred != ICmpInst::ICMP_ULT) - return 0; - - bool isSigned = Pred == ICmpInst::ICMP_SLT; - - // Get the increment instruction. Look past casts if we will - // be able to prove that the original induction variable doesn't - // undergo signed or unsigned overflow, respectively. - const Value *IncrInst = CmpLHS; - if (isSigned) { - if (const SExtInst *SI = dyn_cast<SExtInst>(CmpLHS)) { - if (!isa<ConstantInt>(CmpRHS) || - !cast<ConstantInt>(CmpRHS)->getValue() - .isSignedIntN(SE.getTypeSizeInBits(IncrInst->getType()))) - return 0; - IncrInst = SI->getOperand(0); - } - } else { - if (const ZExtInst *ZI = dyn_cast<ZExtInst>(CmpLHS)) { - if (!isa<ConstantInt>(CmpRHS) || - !cast<ConstantInt>(CmpRHS)->getValue() - .isIntN(SE.getTypeSizeInBits(IncrInst->getType()))) - return 0; - IncrInst = ZI->getOperand(0); - } - } - - // For now, only analyze induction variables that have simple increments. - const BinaryOperator *IncrOp = dyn_cast<BinaryOperator>(IncrInst); - if (!IncrOp || IncrOp->getOpcode() != Instruction::Add) - return 0; - IncrVal = dyn_cast<ConstantInt>(IncrOp->getOperand(1)); - if (!IncrVal) - return 0; - - // Make sure the PHI looks like a normal IV. - const PHINode *PN = dyn_cast<PHINode>(IncrOp->getOperand(0)); - if (!PN || PN->getNumIncomingValues() != 2) - return 0; - unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); - unsigned BackEdge = !IncomingEdge; - if (!L->contains(PN->getIncomingBlock(BackEdge)) || - PN->getIncomingValue(BackEdge) != IncrOp) - return 0; - if (!L->contains(TrueBB)) - return 0; - - // For now, only analyze loops with a constant start value, so that - // we can easily determine if the start value is not a maximum value - // which would wrap on the first iteration. - InitialVal = dyn_cast<ConstantInt>(PN->getIncomingValue(IncomingEdge)); - if (!InitialVal) - return 0; - - // The upper limit need not be a constant; we'll check later. - LimitVal = dyn_cast<ConstantInt>(CmpRHS); - - // We detect the impossibility of wrapping in two cases, both of - // which require starting with a non-max value: - // - The IV counts up by one, and the loop iterates only while it remains - // less than a limiting value (any) in the same type. - // - The IV counts up by a positive increment other than 1, and the - // constant limiting value + the increment is less than the max value - // (computed as max-increment to avoid overflow) - if (isSigned && !InitialVal->getValue().isMaxSignedValue()) { - if (IncrVal->equalsInt(1)) - NoSignedWrap = true; // LimitVal need not be constant - else if (LimitVal) { - uint64_t numBits = LimitVal->getValue().getBitWidth(); - if (IncrVal->getValue().sgt(APInt::getNullValue(numBits)) && - (APInt::getSignedMaxValue(numBits) - IncrVal->getValue()) - .sgt(LimitVal->getValue())) - NoSignedWrap = true; - } - } else if (!isSigned && !InitialVal->getValue().isMaxValue()) { - if (IncrVal->equalsInt(1)) - NoUnsignedWrap = true; // LimitVal need not be constant - else if (LimitVal) { - uint64_t numBits = LimitVal->getValue().getBitWidth(); - if (IncrVal->getValue().ugt(APInt::getNullValue(numBits)) && - (APInt::getMaxValue(numBits) - IncrVal->getValue()) - .ugt(LimitVal->getValue())) - NoUnsignedWrap = true; - } - } - return PN; -} - -static Value *getSignExtendedTruncVar(const SCEVAddRecExpr *AR, - ScalarEvolution *SE, - const Type *LargestType, Loop *L, - const Type *myType, - SCEVExpander &Rewriter) { - SCEVHandle ExtendedStart = - SE->getSignExtendExpr(AR->getStart(), LargestType); - SCEVHandle ExtendedStep = - SE->getSignExtendExpr(AR->getStepRecurrence(*SE), LargestType); - SCEVHandle ExtendedAddRec = - SE->getAddRecExpr(ExtendedStart, ExtendedStep, L); - if (LargestType != myType) - ExtendedAddRec = SE->getTruncateExpr(ExtendedAddRec, myType); - return Rewriter.expandCodeFor(ExtendedAddRec, myType); -} - -static Value *getZeroExtendedTruncVar(const SCEVAddRecExpr *AR, - ScalarEvolution *SE, - const Type *LargestType, Loop *L, - const Type *myType, - SCEVExpander &Rewriter) { - SCEVHandle ExtendedStart = - SE->getZeroExtendExpr(AR->getStart(), LargestType); - SCEVHandle ExtendedStep = - SE->getZeroExtendExpr(AR->getStepRecurrence(*SE), LargestType); - SCEVHandle ExtendedAddRec = - SE->getAddRecExpr(ExtendedStart, ExtendedStep, L); - if (LargestType != myType) - ExtendedAddRec = SE->getTruncateExpr(ExtendedAddRec, myType); - return Rewriter.expandCodeFor(ExtendedAddRec, myType); -} - -/// allUsesAreSameTyped - See whether all Uses of I are instructions -/// with the same Opcode and the same type. -static bool allUsesAreSameTyped(unsigned int Opcode, Instruction *I) { - const Type* firstType = NULL; - for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); - UI != UE; ++UI) { - Instruction *II = dyn_cast<Instruction>(*UI); - if (!II || II->getOpcode() != Opcode) - return false; - if (!firstType) - firstType = II->getType(); - else if (firstType != II->getType()) - return false; - } - return true; } bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { + IU = &getAnalysis<IVUsers>(); LI = &getAnalysis<LoopInfo>(); SE = &getAnalysis<ScalarEvolution>(); Changed = false; @@ -594,11 +344,8 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { RewriteNonIntegerIVs(L); BasicBlock *Header = L->getHeader(); - BasicBlock *ExitingBlock = L->getExitingBlock(); - SmallPtrSet<Instruction*, 16> DeadInsts; - - // Verify the input to the pass in already in LCSSA form. - assert(L->isLCSSAForm()); + BasicBlock *ExitingBlock = L->getExitingBlock(); // may be null + SCEVHandle BackedgeTakenCount = SE->getBackedgeTakenCount(L); // Check to see if this loop has a computable loop-invariant execution count. // If so, this means that we can compute the final value of any expressions @@ -606,59 +353,45 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // loop into any instructions outside of the loop that use the final values of // the current expressions. // - SCEVHandle BackedgeTakenCount = SE->getBackedgeTakenCount(L); if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount)) RewriteLoopExitValues(L, BackedgeTakenCount); - // Next, analyze all of the induction variables in the loop, canonicalizing - // auxillary induction variables. - std::vector<std::pair<PHINode*, SCEVHandle> > IndVars; - - for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { - PHINode *PN = cast<PHINode>(I); - if (SE->isSCEVable(PN->getType())) { - SCEVHandle SCEV = SE->getSCEV(PN); - // FIXME: It is an extremely bad idea to indvar substitute anything more - // complex than affine induction variables. Doing so will put expensive - // polynomial evaluations inside of the loop, and the str reduction pass - // currently can only reduce affine polynomials. For now just disable - // indvar subst on anything more complex than an affine addrec. - if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SCEV)) - if (AR->getLoop() == L && AR->isAffine()) - IndVars.push_back(std::make_pair(PN, SCEV)); - } - } - - // Compute the type of the largest recurrence expression, and collect - // the set of the types of the other recurrence expressions. + // Compute the type of the largest recurrence expression, and decide whether + // a canonical induction variable should be inserted. const Type *LargestType = 0; - SmallSetVector<const Type *, 4> SizesToInsert; + bool NeedCannIV = false; if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount)) { LargestType = BackedgeTakenCount->getType(); LargestType = SE->getEffectiveSCEVType(LargestType); - SizesToInsert.insert(LargestType); + // If we have a known trip count and a single exit block, we'll be + // rewriting the loop exit test condition below, which requires a + // canonical induction variable. + if (ExitingBlock) + NeedCannIV = true; } - for (unsigned i = 0, e = IndVars.size(); i != e; ++i) { - const PHINode *PN = IndVars[i].first; - const Type *PNTy = PN->getType(); - PNTy = SE->getEffectiveSCEVType(PNTy); - SizesToInsert.insert(PNTy); - const Type *EffTy = getEffectiveIndvarType(PN, SE); - EffTy = SE->getEffectiveSCEVType(EffTy); - SizesToInsert.insert(EffTy); + for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) { + SCEVHandle Stride = IU->StrideOrder[i]; + const Type *Ty = SE->getEffectiveSCEVType(Stride->getType()); if (!LargestType || - SE->getTypeSizeInBits(EffTy) > + SE->getTypeSizeInBits(Ty) > SE->getTypeSizeInBits(LargestType)) - LargestType = EffTy; + LargestType = Ty; + + std::map<SCEVHandle, IVUsersOfOneStride *>::iterator SI = + IU->IVUsesByStride.find(IU->StrideOrder[i]); + assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); + + if (!SI->second->Users.empty()) + NeedCannIV = true; } // Create a rewriter object which we'll use to transform the code with. SCEVExpander Rewriter(*SE, *LI); - // Now that we know the largest of of the induction variables in this loop, - // insert a canonical induction variable of the largest size. + // Now that we know the largest of of the induction variable expressions + // in this loop, insert a canonical induction variable of the largest size. Value *IndVar = 0; - if (!SizesToInsert.empty()) { + if (NeedCannIV) { IndVar = Rewriter.getOrInsertCanonicalInductionVariable(L,LargestType); ++NumInserted; Changed = true; @@ -667,229 +400,291 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // If we have a trip count expression, rewrite the loop's exit condition // using it. We can currently only handle loops with a single exit. - bool NoSignedWrap = false; - bool NoUnsignedWrap = false; - const ConstantInt* InitialVal, * IncrVal, * LimitVal; - const PHINode *OrigControllingPHI = 0; - if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount) && ExitingBlock) + ICmpInst *NewICmp = 0; + if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount) && ExitingBlock) { + assert(NeedCannIV && + "LinearFunctionTestReplace requires a canonical induction variable"); // Can't rewrite non-branch yet. - if (BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator())) { - if (Instruction *OrigCond = dyn_cast<Instruction>(BI->getCondition())) { - // Determine if the OrigIV will ever undergo overflow. - OrigControllingPHI = - TestOrigIVForWrap(L, BI, OrigCond, *SE, - NoSignedWrap, NoUnsignedWrap, - InitialVal, IncrVal, LimitVal); - - // We'll be replacing the original condition, so it'll be dead. - DeadInsts.insert(OrigCond); - } + if (BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator())) + NewICmp = LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar, + ExitingBlock, BI, Rewriter); + } - LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar, - ExitingBlock, BI, Rewriter); - } + Rewriter.setInsertionPoint(Header->getFirstNonPHI()); - // Now that we have a canonical induction variable, we can rewrite any - // recurrences in terms of the induction variable. Start with the auxillary - // induction variables, and recursively rewrite any of their uses. - BasicBlock::iterator InsertPt = Header->getFirstNonPHI(); - Rewriter.setInsertionPoint(InsertPt); - - // If there were induction variables of other sizes, cast the primary - // induction variable to the right size for them, avoiding the need for the - // code evaluation methods to insert induction variables of different sizes. - for (unsigned i = 0, e = SizesToInsert.size(); i != e; ++i) { - const Type *Ty = SizesToInsert[i]; - if (Ty != LargestType) { - Instruction *New = new TruncInst(IndVar, Ty, "indvar", InsertPt); - Rewriter.addInsertedValue(New, SE->getSCEV(New)); - DOUT << "INDVARS: Made trunc IV for type " << *Ty << ": " - << *New << "\n"; - } - } + // Rewrite IV-derived expressions. + RewriteIVExpressions(L, LargestType, Rewriter); - // Rewrite all induction variables in terms of the canonical induction - // variable. - while (!IndVars.empty()) { - PHINode *PN = IndVars.back().first; - const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(IndVars.back().second); - Value *NewVal = Rewriter.expandCodeFor(AR, PN->getType()); - DOUT << "INDVARS: Rewrote IV '" << *AR << "' " << *PN - << " into = " << *NewVal << "\n"; - NewVal->takeName(PN); - - /// If the new canonical induction variable is wider than the original, - /// and the original has uses that are casts to wider types, see if the - /// truncate and extend can be omitted. - if (PN == OrigControllingPHI && PN->getType() != LargestType) - for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end(); - UI != UE; ++UI) { - Instruction *UInst = dyn_cast<Instruction>(*UI); - if (UInst && isa<SExtInst>(UInst) && NoSignedWrap) { - Value *TruncIndVar = getSignExtendedTruncVar(AR, SE, LargestType, L, - UInst->getType(), Rewriter); - UInst->replaceAllUsesWith(TruncIndVar); - DeadInsts.insert(UInst); - } - // See if we can figure out sext(i+constant) doesn't wrap, so we can - // use a larger add. This is common in subscripting. - if (UInst && UInst->getOpcode()==Instruction::Add && - !UInst->use_empty() && - allUsesAreSameTyped(Instruction::SExt, UInst) && - isa<ConstantInt>(UInst->getOperand(1)) && - NoSignedWrap && LimitVal) { - uint64_t oldBitSize = LimitVal->getValue().getBitWidth(); - uint64_t newBitSize = LargestType->getPrimitiveSizeInBits(); - ConstantInt* AddRHS = dyn_cast<ConstantInt>(UInst->getOperand(1)); - if (((APInt::getSignedMaxValue(oldBitSize) - IncrVal->getValue()) - - AddRHS->getValue()).sgt(LimitVal->getValue())) { - // We've determined this is (i+constant) and it won't overflow. - if (isa<SExtInst>(UInst->use_begin())) { - SExtInst* oldSext = dyn_cast<SExtInst>(UInst->use_begin()); - uint64_t truncSize = oldSext->getType()->getPrimitiveSizeInBits(); - Value *TruncIndVar = getSignExtendedTruncVar(AR, SE, LargestType, - L, oldSext->getType(), Rewriter); - APInt APnewAddRHS = APInt(AddRHS->getValue()).sext(newBitSize); - if (newBitSize > truncSize) - APnewAddRHS = APnewAddRHS.trunc(truncSize); - ConstantInt* newAddRHS =ConstantInt::get(APnewAddRHS); - Value *NewAdd = - BinaryOperator::CreateAdd(TruncIndVar, newAddRHS, - UInst->getName()+".nosex", UInst); - for (Value::use_iterator UI2 = UInst->use_begin(), - UE2 = UInst->use_end(); UI2 != UE2; ++UI2) { - Instruction *II = dyn_cast<Instruction>(UI2); - II->replaceAllUsesWith(NewAdd); - DeadInsts.insert(II); - } - DeadInsts.insert(UInst); - } - } - } - // Try for sext(i | constant). This is safe as long as the - // high bit of the constant is not set. - if (UInst && UInst->getOpcode()==Instruction::Or && - !UInst->use_empty() && - allUsesAreSameTyped(Instruction::SExt, UInst) && NoSignedWrap && - isa<ConstantInt>(UInst->getOperand(1))) { - ConstantInt* RHS = dyn_cast<ConstantInt>(UInst->getOperand(1)); - if (!RHS->getValue().isNegative()) { - uint64_t newBitSize = LargestType->getPrimitiveSizeInBits(); - SExtInst* oldSext = dyn_cast<SExtInst>(UInst->use_begin()); - uint64_t truncSize = oldSext->getType()->getPrimitiveSizeInBits(); - Value *TruncIndVar = getSignExtendedTruncVar(AR, SE, LargestType, - L, oldSext->getType(), Rewriter); - APInt APnewOrRHS = APInt(RHS->getValue()).sext(newBitSize); - if (newBitSize > truncSize) - APnewOrRHS = APnewOrRHS.trunc(truncSize); - ConstantInt* newOrRHS =ConstantInt::get(APnewOrRHS); - Value *NewOr = - BinaryOperator::CreateOr(TruncIndVar, newOrRHS, - UInst->getName()+".nosex", UInst); - for (Value::use_iterator UI2 = UInst->use_begin(), - UE2 = UInst->use_end(); UI2 != UE2; ++UI2) { - Instruction *II = dyn_cast<Instruction>(UI2); - II->replaceAllUsesWith(NewOr); - DeadInsts.insert(II); - } - DeadInsts.insert(UInst); - } - } - // A zext of a signed variable known not to overflow is still safe. - if (UInst && isa<ZExtInst>(UInst) && (NoUnsignedWrap || NoSignedWrap)) { - Value *TruncIndVar = getZeroExtendedTruncVar(AR, SE, LargestType, L, - UInst->getType(), Rewriter); - UInst->replaceAllUsesWith(TruncIndVar); - DeadInsts.insert(UInst); - } - // If we have zext(i&constant), it's always safe to use the larger - // variable. This is not common but is a bottleneck in Openssl. - // (RHS doesn't have to be constant. There should be a better approach - // than bottom-up pattern matching for this...) - if (UInst && UInst->getOpcode()==Instruction::And && - !UInst->use_empty() && - allUsesAreSameTyped(Instruction::ZExt, UInst) && - isa<ConstantInt>(UInst->getOperand(1))) { - uint64_t newBitSize = LargestType->getPrimitiveSizeInBits(); - ConstantInt* AndRHS = dyn_cast<ConstantInt>(UInst->getOperand(1)); - ZExtInst* oldZext = dyn_cast<ZExtInst>(UInst->use_begin()); - uint64_t truncSize = oldZext->getType()->getPrimitiveSizeInBits(); - Value *TruncIndVar = getSignExtendedTruncVar(AR, SE, LargestType, - L, oldZext->getType(), Rewriter); - APInt APnewAndRHS = APInt(AndRHS->getValue()).zext(newBitSize); - if (newBitSize > truncSize) - APnewAndRHS = APnewAndRHS.trunc(truncSize); - ConstantInt* newAndRHS = ConstantInt::get(APnewAndRHS); - Value *NewAnd = - BinaryOperator::CreateAnd(TruncIndVar, newAndRHS, - UInst->getName()+".nozex", UInst); - for (Value::use_iterator UI2 = UInst->use_begin(), - UE2 = UInst->use_end(); UI2 != UE2; ++UI2) { - Instruction *II = dyn_cast<Instruction>(UI2); - II->replaceAllUsesWith(NewAnd); - DeadInsts.insert(II); + // Loop-invariant instructions in the preheader that aren't used in the + // loop may be sunk below the loop to reduce register pressure. + SinkUnusedInvariants(L, Rewriter); + + // Reorder instructions to avoid use-before-def conditions. + FixUsesBeforeDefs(L, Rewriter); + + // For completeness, inform IVUsers of the IV use in the newly-created + // loop exit test instruction. + if (NewICmp) + IU->AddUsersIfInteresting(cast<Instruction>(NewICmp->getOperand(0))); + + // Clean up dead instructions. + DeleteDeadPHIs(L->getHeader()); + // Check a post-condition. + assert(L->isLCSSAForm() && "Indvars did not leave the loop in lcssa form!"); + return Changed; +} + +void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType, + SCEVExpander &Rewriter) { + SmallVector<WeakVH, 16> DeadInsts; + + // Rewrite all induction variable expressions in terms of the canonical + // induction variable. + // + // If there were induction variables of other sizes or offsets, manually + // add the offsets to the primary induction variable and cast, avoiding + // the need for the code evaluation methods to insert induction variables + // of different sizes. + for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) { + SCEVHandle Stride = IU->StrideOrder[i]; + + std::map<SCEVHandle, IVUsersOfOneStride *>::iterator SI = + IU->IVUsesByStride.find(IU->StrideOrder[i]); + assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); + ilist<IVStrideUse> &List = SI->second->Users; + for (ilist<IVStrideUse>::iterator UI = List.begin(), + E = List.end(); UI != E; ++UI) { + SCEVHandle Offset = UI->getOffset(); + Value *Op = UI->getOperandValToReplace(); + Instruction *User = UI->getUser(); + bool isSigned = UI->isSigned(); + + // Compute the final addrec to expand into code. + SCEVHandle AR = IU->getReplacementExpr(*UI); + + // FIXME: It is an extremely bad idea to indvar substitute anything more + // complex than affine induction variables. Doing so will put expensive + // polynomial evaluations inside of the loop, and the str reduction pass + // currently can only reduce affine polynomials. For now just disable + // indvar subst on anything more complex than an affine addrec, unless + // it can be expanded to a trivial value. + if (!Stride->isLoopInvariant(L) && + !isa<SCEVConstant>(AR) && + L->contains(User->getParent())) + continue; + + Value *NewVal = 0; + if (AR->isLoopInvariant(L)) { + BasicBlock::iterator I = Rewriter.getInsertionPoint(); + // Expand loop-invariant values in the loop preheader. They will + // be sunk to the exit block later, if possible. + NewVal = + Rewriter.expandCodeFor(AR, LargestType, + L->getLoopPreheader()->getTerminator()); + Rewriter.setInsertionPoint(I); + ++NumReplaced; + } else { + const Type *IVTy = Offset->getType(); + const Type *UseTy = Op->getType(); + + // Promote the Offset and Stride up to the canonical induction + // variable's bit width. + SCEVHandle PromotedOffset = Offset; + SCEVHandle PromotedStride = Stride; + if (SE->getTypeSizeInBits(IVTy) != SE->getTypeSizeInBits(LargestType)) { + // It doesn't matter for correctness whether zero or sign extension + // is used here, since the value is truncated away below, but if the + // value is signed, sign extension is more likely to be folded. + if (isSigned) { + PromotedOffset = SE->getSignExtendExpr(PromotedOffset, LargestType); + PromotedStride = SE->getSignExtendExpr(PromotedStride, LargestType); + } else { + PromotedOffset = SE->getZeroExtendExpr(PromotedOffset, LargestType); + // If the stride is obviously negative, use sign extension to + // produce things like x-1 instead of x+255. + if (isa<SCEVConstant>(PromotedStride) && + cast<SCEVConstant>(PromotedStride) + ->getValue()->getValue().isNegative()) + PromotedStride = SE->getSignExtendExpr(PromotedStride, + LargestType); + else + PromotedStride = SE->getZeroExtendExpr(PromotedStride, + LargestType); } - DeadInsts.insert(UInst); } - // If we have zext((i+constant)&constant), we can use the larger - // variable even if the add does overflow. This works whenever the - // constant being ANDed is the same size as i, which it presumably is. - // We don't need to restrict the expression being and'ed to i+const, - // but we have to promote everything in it, so it's convenient. - // zext((i | constant)&constant) is also valid and accepted here. - if (UInst && (UInst->getOpcode()==Instruction::Add || - UInst->getOpcode()==Instruction::Or) && - UInst->hasOneUse() && - isa<ConstantInt>(UInst->getOperand(1))) { - uint64_t newBitSize = LargestType->getPrimitiveSizeInBits(); - ConstantInt* AddRHS = dyn_cast<ConstantInt>(UInst->getOperand(1)); - Instruction *UInst2 = dyn_cast<Instruction>(UInst->use_begin()); - if (UInst2 && UInst2->getOpcode() == Instruction::And && - !UInst2->use_empty() && - allUsesAreSameTyped(Instruction::ZExt, UInst2) && - isa<ConstantInt>(UInst2->getOperand(1))) { - ZExtInst* oldZext = dyn_cast<ZExtInst>(UInst2->use_begin()); - uint64_t truncSize = oldZext->getType()->getPrimitiveSizeInBits(); - Value *TruncIndVar = getSignExtendedTruncVar(AR, SE, LargestType, - L, oldZext->getType(), Rewriter); - ConstantInt* AndRHS = dyn_cast<ConstantInt>(UInst2->getOperand(1)); - APInt APnewAddRHS = APInt(AddRHS->getValue()).zext(newBitSize); - if (newBitSize > truncSize) - APnewAddRHS = APnewAddRHS.trunc(truncSize); - ConstantInt* newAddRHS = ConstantInt::get(APnewAddRHS); - Value *NewAdd = ((UInst->getOpcode()==Instruction::Add) ? - BinaryOperator::CreateAdd(TruncIndVar, newAddRHS, - UInst->getName()+".nozex", UInst2) : - BinaryOperator::CreateOr(TruncIndVar, newAddRHS, - UInst->getName()+".nozex", UInst2)); - APInt APcopy2 = APInt(AndRHS->getValue()); - ConstantInt* newAndRHS = ConstantInt::get(APcopy2.zext(newBitSize)); - Value *NewAnd = - BinaryOperator::CreateAnd(NewAdd, newAndRHS, - UInst->getName()+".nozex", UInst2); - for (Value::use_iterator UI2 = UInst2->use_begin(), - UE2 = UInst2->use_end(); UI2 != UE2; ++UI2) { - Instruction *II = dyn_cast<Instruction>(UI2); - II->replaceAllUsesWith(NewAnd); - DeadInsts.insert(II); - } - DeadInsts.insert(UInst); - DeadInsts.insert(UInst2); + + // Create the SCEV representing the offset from the canonical + // induction variable, still in the canonical induction variable's + // type, so that all expanded arithmetic is done in the same type. + SCEVHandle NewAR = SE->getAddRecExpr(SE->getIntegerSCEV(0, LargestType), + PromotedStride, L); + // Add the PromotedOffset as a separate step, because it may not be + // loop-invariant. + NewAR = SE->getAddExpr(NewAR, PromotedOffset); + + // Expand the addrec into instructions. + Value *V = Rewriter.expandCodeFor(NewAR, LargestType); + + // Insert an explicit cast if necessary to truncate the value + // down to the original stride type. This is done outside of + // SCEVExpander because in SCEV expressions, a truncate of an + // addrec is always folded. + if (LargestType != IVTy) { + if (SE->getTypeSizeInBits(IVTy) != SE->getTypeSizeInBits(LargestType)) + NewAR = SE->getTruncateExpr(NewAR, IVTy); + if (Rewriter.isInsertedExpression(NewAR)) + V = Rewriter.expandCodeFor(NewAR, IVTy); + else { + V = Rewriter.InsertCastOfTo(CastInst::getCastOpcode(V, false, + IVTy, false), + V, IVTy); + assert(!isa<SExtInst>(V) && !isa<ZExtInst>(V) && + "LargestType wasn't actually the largest type!"); + // Force the rewriter to use this trunc whenever this addrec + // appears so that it doesn't insert new phi nodes or + // arithmetic in a different type. + Rewriter.addInsertedValue(V, NewAR); } } + + DOUT << "INDVARS: Made offset-and-trunc IV for offset " + << *IVTy << " " << *Offset << ": "; + DEBUG(WriteAsOperand(*DOUT, V, false)); + DOUT << "\n"; + + // Now expand it into actual Instructions and patch it into place. + NewVal = Rewriter.expandCodeFor(AR, UseTy); } - // Replace the old PHI Node with the inserted computation. - PN->replaceAllUsesWith(NewVal); - DeadInsts.insert(PN); - IndVars.pop_back(); - ++NumRemoved; - Changed = true; + // Patch the new value into place. + if (Op->hasName()) + NewVal->takeName(Op); + User->replaceUsesOfWith(Op, NewVal); + UI->setOperandValToReplace(NewVal); + DOUT << "INDVARS: Rewrote IV '" << *AR << "' " << *Op + << " into = " << *NewVal << "\n"; + ++NumRemoved; + Changed = true; + + // The old value may be dead now. + DeadInsts.push_back(Op); + } } - DeleteTriviallyDeadInstructions(DeadInsts); - assert(L->isLCSSAForm()); - return Changed; + // Now that we're done iterating through lists, clean up any instructions + // which are now dead. + while (!DeadInsts.empty()) { + Instruction *Inst = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()); + if (Inst) + RecursivelyDeleteTriviallyDeadInstructions(Inst); + } +} + +/// If there's a single exit block, sink any loop-invariant values that +/// were defined in the preheader but not used inside the loop into the +/// exit block to reduce register pressure in the loop. +void IndVarSimplify::SinkUnusedInvariants(Loop *L, SCEVExpander &Rewriter) { + BasicBlock *ExitBlock = L->getExitBlock(); + if (!ExitBlock) return; + + Instruction *NonPHI = ExitBlock->getFirstNonPHI(); + BasicBlock *Preheader = L->getLoopPreheader(); + BasicBlock::iterator I = Preheader->getTerminator(); + while (I != Preheader->begin()) { + --I; + // New instructions were inserted at the end of the preheader. Only + // consider those new instructions. + if (!Rewriter.isInsertedInstruction(I)) + break; + // Determine if there is a use in or before the loop (direct or + // otherwise). + bool UsedInLoop = false; + for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); + UI != UE; ++UI) { + BasicBlock *UseBB = cast<Instruction>(UI)->getParent(); + if (PHINode *P = dyn_cast<PHINode>(UI)) { + unsigned i = + PHINode::getIncomingValueNumForOperand(UI.getOperandNo()); + UseBB = P->getIncomingBlock(i); + } + if (UseBB == Preheader || L->contains(UseBB)) { + UsedInLoop = true; + break; + } + } + // If there is, the def must remain in the preheader. + if (UsedInLoop) + continue; + // Otherwise, sink it to the exit block. + Instruction *ToMove = I; + bool Done = false; + if (I != Preheader->begin()) + --I; + else + Done = true; + ToMove->moveBefore(NonPHI); + if (Done) + break; + } +} + +/// Re-schedule the inserted instructions to put defs before uses. This +/// fixes problems that arrise when SCEV expressions contain loop-variant +/// values unrelated to the induction variable which are defined inside the +/// loop. FIXME: It would be better to insert instructions in the right +/// place so that this step isn't needed. +void IndVarSimplify::FixUsesBeforeDefs(Loop *L, SCEVExpander &Rewriter) { + // Visit all the blocks in the loop in pre-order dom-tree dfs order. + DominatorTree *DT = &getAnalysis<DominatorTree>(); + std::map<Instruction *, unsigned> NumPredsLeft; + SmallVector<DomTreeNode *, 16> Worklist; + Worklist.push_back(DT->getNode(L->getHeader())); + do { + DomTreeNode *Node = Worklist.pop_back_val(); + for (DomTreeNode::iterator I = Node->begin(), E = Node->end(); I != E; ++I) + if (L->contains((*I)->getBlock())) + Worklist.push_back(*I); + BasicBlock *BB = Node->getBlock(); + // Visit all the instructions in the block top down. + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + // Count the number of operands that aren't properly dominating. + unsigned NumPreds = 0; + if (Rewriter.isInsertedInstruction(I) && !isa<PHINode>(I)) + for (User::op_iterator OI = I->op_begin(), OE = I->op_end(); + OI != OE; ++OI) + if (Instruction *Inst = dyn_cast<Instruction>(OI)) + if (L->contains(Inst->getParent()) && !NumPredsLeft.count(Inst)) + ++NumPreds; + NumPredsLeft[I] = NumPreds; + // Notify uses of the position of this instruction, and move the + // users (and their dependents, recursively) into place after this + // instruction if it is their last outstanding operand. + for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); + UI != UE; ++UI) { + Instruction *Inst = cast<Instruction>(UI); + std::map<Instruction *, unsigned>::iterator Z = NumPredsLeft.find(Inst); + if (Z != NumPredsLeft.end() && Z->second != 0 && --Z->second == 0) { + SmallVector<Instruction *, 4> UseWorkList; + UseWorkList.push_back(Inst); + BasicBlock::iterator InsertPt = next(I); + while (isa<PHINode>(InsertPt)) ++InsertPt; + do { + Instruction *Use = UseWorkList.pop_back_val(); + Use->moveBefore(InsertPt); + NumPredsLeft.erase(Use); + for (Value::use_iterator IUI = Use->use_begin(), + IUE = Use->use_end(); IUI != IUE; ++IUI) { + Instruction *IUIInst = cast<Instruction>(IUI); + if (L->contains(IUIInst->getParent()) && + Rewriter.isInsertedInstruction(IUIInst) && + !isa<PHINode>(IUIInst)) + UseWorkList.push_back(IUIInst); + } + } while (!UseWorkList.empty()); + } + } + } + } while (!Worklist.empty()); } /// Return true if it is OK to use SIToFPInst for an inducation variable @@ -933,8 +728,7 @@ static bool convertToInt(const APFloat &APF, uint64_t *intVal) { /// for(int i = 0; i < 10000; ++i) /// bar((double)i); /// -void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH, - SmallPtrSet<Instruction*, 16> &DeadInsts) { +void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) { unsigned IncomingEdge = L->contains(PH->getIncomingBlock(0)); unsigned BackEdge = IncomingEdge^1; @@ -1041,25 +835,34 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH, ICmpInst *NewEC = new ICmpInst(NewPred, LHS, RHS, EC->getNameStart(), EC->getParent()->getTerminator()); + // In the following deltions, PH may become dead and may be deleted. + // Use a WeakVH to observe whether this happens. + WeakVH WeakPH = PH; + // Delete old, floating point, exit comparision instruction. EC->replaceAllUsesWith(NewEC); - DeadInsts.insert(EC); + RecursivelyDeleteTriviallyDeadInstructions(EC); // Delete old, floating point, increment instruction. Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); - DeadInsts.insert(Incr); - - // Replace floating induction variable. Give SIToFPInst preference over - // UIToFPInst because it is faster on platforms that are widely used. - if (useSIToFPInst(*InitValue, *EV, newInitValue, intEV)) { - SIToFPInst *Conv = new SIToFPInst(NewPHI, PH->getType(), "indvar.conv", - PH->getParent()->getFirstNonPHI()); - PH->replaceAllUsesWith(Conv); - } else { - UIToFPInst *Conv = new UIToFPInst(NewPHI, PH->getType(), "indvar.conv", - PH->getParent()->getFirstNonPHI()); - PH->replaceAllUsesWith(Conv); + RecursivelyDeleteTriviallyDeadInstructions(Incr); + + // Replace floating induction variable, if it isn't already deleted. + // Give SIToFPInst preference over UIToFPInst because it is faster on + // platforms that are widely used. + if (WeakPH && !PH->use_empty()) { + if (useSIToFPInst(*InitValue, *EV, newInitValue, intEV)) { + SIToFPInst *Conv = new SIToFPInst(NewPHI, PH->getType(), "indvar.conv", + PH->getParent()->getFirstNonPHI()); + PH->replaceAllUsesWith(Conv); + } else { + UIToFPInst *Conv = new UIToFPInst(NewPHI, PH->getType(), "indvar.conv", + PH->getParent()->getFirstNonPHI()); + PH->replaceAllUsesWith(Conv); + } + RecursivelyDeleteTriviallyDeadInstructions(PH); } - DeadInsts.insert(PH); -} + // Add a new IVUsers entry for the newly-created integer PHI. + IU->AddUsersIfInteresting(NewPHI); +} diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 127ef56..4f6d531 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -20,6 +20,7 @@ #include "llvm/Type.h" #include "llvm/DerivedTypes.h" #include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/IVUsers.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" @@ -53,40 +54,6 @@ namespace { struct BasedUser; - /// IVStrideUse - Keep track of one use of a strided induction variable, where - /// the stride is stored externally. The Offset member keeps track of the - /// offset from the IV, User is the actual user of the operand, and - /// 'OperandValToReplace' is the operand of the User that is the use. - struct VISIBILITY_HIDDEN IVStrideUse { - SCEVHandle Offset; - Instruction *User; - Value *OperandValToReplace; - - // isUseOfPostIncrementedValue - True if this should use the - // post-incremented version of this IV, not the preincremented version. - // This can only be set in special cases, such as the terminating setcc - // instruction for a loop or uses dominated by the loop. - bool isUseOfPostIncrementedValue; - - IVStrideUse(const SCEVHandle &Offs, Instruction *U, Value *O) - : Offset(Offs), User(U), OperandValToReplace(O), - isUseOfPostIncrementedValue(false) {} - }; - - /// IVUsersOfOneStride - This structure keeps track of all instructions that - /// have an operand that is based on the trip count multiplied by some stride. - /// The stride for all of these users is common and kept external to this - /// structure. - struct VISIBILITY_HIDDEN IVUsersOfOneStride { - /// Users - Keep track of all of the users of this stride as well as the - /// initial value and the operand that uses the IV. - std::vector<IVStrideUse> Users; - - void addUser(const SCEVHandle &Offset,Instruction *User, Value *Operand) { - Users.push_back(IVStrideUse(Offset, User, Operand)); - } - }; - /// IVInfo - This structure keeps track of one IV expression inserted during /// StrengthReduceStridedIVUsers. It contains the stride, the common base, as /// well as the PHI node and increment value created for rewrite. @@ -110,15 +77,12 @@ namespace { }; class VISIBILITY_HIDDEN LoopStrengthReduce : public LoopPass { + IVUsers *IU; LoopInfo *LI; DominatorTree *DT; ScalarEvolution *SE; bool Changed; - /// IVUsesByStride - Keep track of all uses of induction variables that we - /// are interested in. The key of the map is the stride of the access. - std::map<SCEVHandle, IVUsersOfOneStride> IVUsesByStride; - /// IVsByStride - Keep track of all IVs that have been inserted for a /// particular stride. std::map<SCEVHandle, IVsOfOneStride> IVsByStride; @@ -127,14 +91,9 @@ namespace { /// reused (nor should they be rewritten to reuse other strides). SmallSet<SCEVHandle, 4> StrideNoReuse; - /// StrideOrder - An ordering of the keys in IVUsesByStride that is stable: - /// We use this to iterate over the IVUsesByStride collection without being - /// dependent on random ordering of pointers in the process. - SmallVector<SCEVHandle, 16> StrideOrder; - /// DeadInsts - Keep track of instructions we may have made dead, so that /// we can remove them after we are done working. - SmallVector<Instruction*, 16> DeadInsts; + SmallVector<WeakVH, 16> DeadInsts; /// TLI - Keep a pointer of a TargetLowering to consult for determining /// transformation profitability. @@ -161,11 +120,11 @@ namespace { AU.addRequired<DominatorTree>(); AU.addRequired<ScalarEvolution>(); AU.addPreserved<ScalarEvolution>(); + AU.addRequired<IVUsers>(); + AU.addPreserved<IVUsers>(); } private: - bool AddUsersIfInteresting(Instruction *I, Loop *L, - SmallPtrSet<Instruction*,16> &Processed); ICmpInst *ChangeCompareStride(Loop *L, ICmpInst *Cond, IVStrideUse* &CondUse, const SCEVHandle* &CondStride); @@ -191,6 +150,8 @@ namespace { const std::vector<BasedUser>& UsersToProcess); bool ValidScale(bool, int64_t, const std::vector<BasedUser>& UsersToProcess); + bool ValidOffset(bool, int64_t, int64_t, + const std::vector<BasedUser>& UsersToProcess); SCEVHandle CollectIVUsers(const SCEVHandle &Stride, IVUsersOfOneStride &Uses, Loop *L, @@ -242,21 +203,8 @@ Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) { void LoopStrengthReduce::DeleteTriviallyDeadInstructions() { if (DeadInsts.empty()) return; - // Sort the deadinsts list so that we can trivially eliminate duplicates as we - // go. The code below never adds a non-dead instruction to the worklist, but - // callers may not be so careful. - array_pod_sort(DeadInsts.begin(), DeadInsts.end()); - - // Drop duplicate instructions and those with uses. - for (unsigned i = 0, e = DeadInsts.size()-1; i < e; ++i) { - Instruction *I = DeadInsts[i]; - if (!I->use_empty()) DeadInsts[i] = 0; - while (i != e && DeadInsts[i+1] == I) - DeadInsts[++i] = 0; - } - while (!DeadInsts.empty()) { - Instruction *I = DeadInsts.back(); + Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.back()); DeadInsts.pop_back(); if (I == 0 || !isInstructionTriviallyDead(I)) @@ -313,111 +261,6 @@ static bool containsAddRecFromDifferentLoop(SCEVHandle S, Loop *L) { return false; } -/// getSCEVStartAndStride - Compute the start and stride of this expression, -/// returning false if the expression is not a start/stride pair, or true if it -/// is. The stride must be a loop invariant expression, but the start may be -/// a mix of loop invariant and loop variant expressions. The start cannot, -/// however, contain an AddRec from a different loop, unless that loop is an -/// outer loop of the current loop. -static bool getSCEVStartAndStride(const SCEVHandle &SH, Loop *L, - SCEVHandle &Start, SCEVHandle &Stride, - ScalarEvolution *SE, DominatorTree *DT) { - SCEVHandle TheAddRec = Start; // Initialize to zero. - - // If the outer level is an AddExpr, the operands are all start values except - // for a nested AddRecExpr. - if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(SH)) { - for (unsigned i = 0, e = AE->getNumOperands(); i != e; ++i) - if (const SCEVAddRecExpr *AddRec = - dyn_cast<SCEVAddRecExpr>(AE->getOperand(i))) { - if (AddRec->getLoop() == L) - TheAddRec = SE->getAddExpr(AddRec, TheAddRec); - else - return false; // Nested IV of some sort? - } else { - Start = SE->getAddExpr(Start, AE->getOperand(i)); - } - - } else if (isa<SCEVAddRecExpr>(SH)) { - TheAddRec = SH; - } else { - return false; // not analyzable. - } - - const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(TheAddRec); - if (!AddRec || AddRec->getLoop() != L) return false; - - // FIXME: Generalize to non-affine IV's. - if (!AddRec->isAffine()) return false; - - // If Start contains an SCEVAddRecExpr from a different loop, other than an - // outer loop of the current loop, reject it. SCEV has no concept of - // operating on more than one loop at a time so don't confuse it with such - // expressions. - if (containsAddRecFromDifferentLoop(AddRec->getOperand(0), L)) - return false; - - Start = SE->getAddExpr(Start, AddRec->getOperand(0)); - - if (!isa<SCEVConstant>(AddRec->getOperand(1))) { - // If stride is an instruction, make sure it dominates the loop preheader. - // Otherwise we could end up with a use before def situation. - BasicBlock *Preheader = L->getLoopPreheader(); - if (!AddRec->getOperand(1)->dominates(Preheader, DT)) - return false; - - DOUT << "[" << L->getHeader()->getName() - << "] Variable stride: " << *AddRec << "\n"; - } - - Stride = AddRec->getOperand(1); - return true; -} - -/// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression -/// and now we need to decide whether the user should use the preinc or post-inc -/// value. If this user should use the post-inc version of the IV, return true. -/// -/// Choosing wrong here can break dominance properties (if we choose to use the -/// post-inc value when we cannot) or it can end up adding extra live-ranges to -/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we -/// should use the post-inc value). -static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV, - Loop *L, DominatorTree *DT, Pass *P, - SmallVectorImpl<Instruction*> &DeadInsts){ - // If the user is in the loop, use the preinc value. - if (L->contains(User->getParent())) return false; - - BasicBlock *LatchBlock = L->getLoopLatch(); - - // Ok, the user is outside of the loop. If it is dominated by the latch - // block, use the post-inc value. - if (DT->dominates(LatchBlock, User->getParent())) - return true; - - // There is one case we have to be careful of: PHI nodes. These little guys - // can live in blocks that do not dominate the latch block, but (since their - // uses occur in the predecessor block, not the block the PHI lives in) should - // still use the post-inc value. Check for this case now. - PHINode *PN = dyn_cast<PHINode>(User); - if (!PN) return false; // not a phi, not dominated by latch block. - - // Look at all of the uses of IV by the PHI node. If any use corresponds to - // a block that is not dominated by the latch block, give up and use the - // preincremented value. - unsigned NumUses = 0; - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (PN->getIncomingValue(i) == IV) { - ++NumUses; - if (!DT->dominates(LatchBlock, PN->getIncomingBlock(i))) - return false; - } - - // Okay, all uses of IV by PN are in predecessor blocks that really are - // dominated by the latch block. Use the post-incremented value. - return true; -} - /// isAddressUse - Returns true if the specified instruction is using the /// specified value as an address. static bool isAddressUse(Instruction *Inst, Value *OperandVal) { @@ -467,90 +310,6 @@ static const Type *getAccessType(const Instruction *Inst) { return UseTy; } -/// AddUsersIfInteresting - Inspect the specified instruction. If it is a -/// reducible SCEV, recursively add its users to the IVUsesByStride set and -/// return true. Otherwise, return false. -bool LoopStrengthReduce::AddUsersIfInteresting(Instruction *I, Loop *L, - SmallPtrSet<Instruction*,16> &Processed) { - if (!SE->isSCEVable(I->getType())) - return false; // Void and FP expressions cannot be reduced. - - // LSR is not APInt clean, do not touch integers bigger than 64-bits. - if (SE->getTypeSizeInBits(I->getType()) > 64) - return false; - - if (!Processed.insert(I)) - return true; // Instruction already handled. - - // Get the symbolic expression for this instruction. - SCEVHandle ISE = SE->getSCEV(I); - if (isa<SCEVCouldNotCompute>(ISE)) return false; - - // Get the start and stride for this expression. - SCEVHandle Start = SE->getIntegerSCEV(0, ISE->getType()); - SCEVHandle Stride = Start; - if (!getSCEVStartAndStride(ISE, L, Start, Stride, SE, DT)) - return false; // Non-reducible symbolic expression, bail out. - - std::vector<Instruction *> IUsers; - // Collect all I uses now because IVUseShouldUsePostIncValue may - // invalidate use_iterator. - for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI) - IUsers.push_back(cast<Instruction>(*UI)); - - for (unsigned iused_index = 0, iused_size = IUsers.size(); - iused_index != iused_size; ++iused_index) { - - Instruction *User = IUsers[iused_index]; - - // Do not infinitely recurse on PHI nodes. - if (isa<PHINode>(User) && Processed.count(User)) - continue; - - // Descend recursively, but not into PHI nodes outside the current loop. - // It's important to see the entire expression outside the loop to get - // choices that depend on addressing mode use right, although we won't - // consider references ouside the loop in all cases. - // If User is already in Processed, we don't want to recurse into it again, - // but do want to record a second reference in the same instruction. - bool AddUserToIVUsers = false; - if (LI->getLoopFor(User->getParent()) != L) { - if (isa<PHINode>(User) || Processed.count(User) || - !AddUsersIfInteresting(User, L, Processed)) { - DOUT << "FOUND USER in other loop: " << *User - << " OF SCEV: " << *ISE << "\n"; - AddUserToIVUsers = true; - } - } else if (Processed.count(User) || - !AddUsersIfInteresting(User, L, Processed)) { - DOUT << "FOUND USER: " << *User - << " OF SCEV: " << *ISE << "\n"; - AddUserToIVUsers = true; - } - - if (AddUserToIVUsers) { - IVUsersOfOneStride &StrideUses = IVUsesByStride[Stride]; - if (StrideUses.Users.empty()) // First occurrence of this stride? - StrideOrder.push_back(Stride); - - // Okay, we found a user that we cannot reduce. Analyze the instruction - // and decide what to do with it. If we are a use inside of the loop, use - // the value before incrementation, otherwise use it after incrementation. - if (IVUseShouldUsePostIncValue(User, I, L, DT, this, DeadInsts)) { - // The value used will be incremented by the stride more than we are - // expecting, so subtract this off. - SCEVHandle NewStart = SE->getMinusSCEV(Start, Stride); - StrideUses.addUser(NewStart, User, I); - StrideUses.Users.back().isUseOfPostIncrementedValue = true; - DOUT << " USING POSTINC SCEV, START=" << *NewStart<< "\n"; - } else { - StrideUses.addUser(Start, User, I); - } - } - } - return true; -} - namespace { /// BasedUser - For a particular base value, keep information about how we've /// partitioned the expression so far. @@ -571,6 +330,13 @@ namespace { /// EmittedBase. Value *OperandValToReplace; + /// isSigned - The stride (and thus also the Base) of this use may be in + /// a narrower type than the use itself (OperandValToReplace->getType()). + /// When this is the case, the isSigned field indicates whether the + /// IV expression should be signed-extended instead of zero-extended to + /// fit the type of the use. + bool isSigned; + /// Imm - The immediate value that should be added to the base immediately /// before Inst, because it will be folded into the imm field of the /// instruction. This is also sometimes used for loop-variant values that @@ -589,10 +355,11 @@ namespace { bool isUseOfPostIncrementedValue; BasedUser(IVStrideUse &IVSU, ScalarEvolution *se) - : SE(se), Base(IVSU.Offset), Inst(IVSU.User), - OperandValToReplace(IVSU.OperandValToReplace), + : SE(se), Base(IVSU.getOffset()), Inst(IVSU.getUser()), + OperandValToReplace(IVSU.getOperandValToReplace()), + isSigned(IVSU.isSigned()), Imm(SE->getIntegerSCEV(0, Base->getType())), - isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue) {} + isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue()) {} // Once we rewrite the code to insert the new IVs we want, update the // operands of Inst to use the new expression 'NewBase', with 'Imm' added @@ -600,7 +367,7 @@ namespace { void RewriteInstructionToUseNewBase(const SCEVHandle &NewBase, Instruction *InsertPt, SCEVExpander &Rewriter, Loop *L, Pass *P, - SmallVectorImpl<Instruction*> &DeadInsts); + SmallVectorImpl<WeakVH> &DeadInsts); Value *InsertCodeForBaseAtPosition(const SCEVHandle &NewBase, const Type *Ty, @@ -638,19 +405,27 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEVHandle &NewBase, InsertLoop = InsertLoop->getParentLoop(); } - Value *Base = Rewriter.expandCodeFor(NewBase, Ty, BaseInsertPt); + Value *Base = Rewriter.expandCodeFor(NewBase, NewBase->getType(), + BaseInsertPt); + + SCEVHandle NewValSCEV = SE->getUnknown(Base); // If there is no immediate value, skip the next part. - if (Imm->isZero()) - return Base; + if (!Imm->isZero()) { + // If we are inserting the base and imm values in the same block, make sure + // to adjust the IP position if insertion reused a result. + if (IP == BaseInsertPt) + IP = Rewriter.getInsertionPoint(); + + // Always emit the immediate (if non-zero) into the same block as the user. + NewValSCEV = SE->getAddExpr(NewValSCEV, Imm); + } + + if (isSigned) + NewValSCEV = SE->getTruncateOrSignExtend(NewValSCEV, Ty); + else + NewValSCEV = SE->getTruncateOrZeroExtend(NewValSCEV, Ty); - // If we are inserting the base and imm values in the same block, make sure to - // adjust the IP position if insertion reused a result. - if (IP == BaseInsertPt) - IP = Rewriter.getInsertionPoint(); - - // Always emit the immediate (if non-zero) into the same block as the user. - SCEVHandle NewValSCEV = SE->getAddExpr(SE->getUnknown(Base), Imm); return Rewriter.expandCodeFor(NewValSCEV, Ty, IP); } @@ -664,7 +439,7 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEVHandle &NewBase, void BasedUser::RewriteInstructionToUseNewBase(const SCEVHandle &NewBase, Instruction *NewBasePt, SCEVExpander &Rewriter, Loop *L, Pass *P, - SmallVectorImpl<Instruction*> &DeadInsts){ + SmallVectorImpl<WeakVH> &DeadInsts) { if (!isa<PHINode>(Inst)) { // By default, insert code at the user instruction. BasicBlock::iterator InsertPt = Inst; @@ -1158,6 +933,39 @@ bool LoopStrengthReduce::ValidScale(bool HasBaseReg, int64_t Scale, return true; } +/// ValidOffset - Check whether the given Offset is valid for all loads and +/// stores in UsersToProcess. +/// +bool LoopStrengthReduce::ValidOffset(bool HasBaseReg, + int64_t Offset, + int64_t Scale, + const std::vector<BasedUser>& UsersToProcess) { + if (!TLI) + return true; + + for (unsigned i=0, e = UsersToProcess.size(); i!=e; ++i) { + // If this is a load or other access, pass the type of the access in. + const Type *AccessTy = Type::VoidTy; + if (isAddressUse(UsersToProcess[i].Inst, + UsersToProcess[i].OperandValToReplace)) + AccessTy = getAccessType(UsersToProcess[i].Inst); + else if (isa<PHINode>(UsersToProcess[i].Inst)) + continue; + + TargetLowering::AddrMode AM; + if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(UsersToProcess[i].Imm)) + AM.BaseOffs = SC->getValue()->getSExtValue(); + AM.BaseOffs = (uint64_t)AM.BaseOffs + (uint64_t)Offset; + AM.HasBaseReg = HasBaseReg || !UsersToProcess[i].Base->isZero(); + AM.Scale = Scale; + + // If load[imm+r*scale] is illegal, bail out. + if (!TLI->isLegalAddressingMode(AM, AccessTy)) + return false; + } + return true; +} + /// RequiresTypeConversion - Returns true if converting Ty1 to Ty2 is not /// a nop. bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty1, @@ -1196,10 +1004,10 @@ SCEVHandle LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg, if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Stride)) { int64_t SInt = SC->getValue()->getSExtValue(); - for (unsigned NewStride = 0, e = StrideOrder.size(); NewStride != e; - ++NewStride) { + for (unsigned NewStride = 0, e = IU->StrideOrder.size(); + NewStride != e; ++NewStride) { std::map<SCEVHandle, IVsOfOneStride>::iterator SI = - IVsByStride.find(StrideOrder[NewStride]); + IVsByStride.find(IU->StrideOrder[NewStride]); if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first) || StrideNoReuse.count(SI->first)) continue; @@ -1215,24 +1023,44 @@ SCEVHandle LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg, // multiplications. if (Scale == 1 || (AllUsesAreAddresses && - ValidScale(HasBaseReg, Scale, UsersToProcess))) + ValidScale(HasBaseReg, Scale, UsersToProcess))) { + // Prefer to reuse an IV with a base of zero. for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(), IE = SI->second.IVs.end(); II != IE; ++II) - // FIXME: Only handle base == 0 for now. - // Only reuse previous IV if it would not require a type conversion. + // Only reuse previous IV if it would not require a type conversion + // and if the base difference can be folded. if (II->Base->isZero() && !RequiresTypeConversion(II->Base->getType(), Ty)) { IV = *II; return SE->getIntegerSCEV(Scale, Stride->getType()); } + // Otherwise, settle for an IV with a foldable base. + if (AllUsesAreAddresses) + for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(), + IE = SI->second.IVs.end(); II != IE; ++II) + // Only reuse previous IV if it would not require a type conversion + // and if the base difference can be folded. + if (SE->getEffectiveSCEVType(II->Base->getType()) == + SE->getEffectiveSCEVType(Ty) && + isa<SCEVConstant>(II->Base)) { + int64_t Base = + cast<SCEVConstant>(II->Base)->getValue()->getSExtValue(); + if (Base > INT32_MIN && Base <= INT32_MAX && + ValidOffset(HasBaseReg, -Base * Scale, + Scale, UsersToProcess)) { + IV = *II; + return SE->getIntegerSCEV(Scale, Stride->getType()); + } + } + } } } else if (AllUsesAreOutsideLoop) { // Accept nonconstant strides here; it is really really right to substitute // an existing IV if we can. - for (unsigned NewStride = 0, e = StrideOrder.size(); NewStride != e; - ++NewStride) { + for (unsigned NewStride = 0, e = IU->StrideOrder.size(); + NewStride != e; ++NewStride) { std::map<SCEVHandle, IVsOfOneStride>::iterator SI = - IVsByStride.find(StrideOrder[NewStride]); + IVsByStride.find(IU->StrideOrder[NewStride]); if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first)) continue; int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue(); @@ -1249,10 +1077,10 @@ SCEVHandle LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg, } // Special case, old IV is -1*x and this one is x. Can treat this one as // -1*old. - for (unsigned NewStride = 0, e = StrideOrder.size(); NewStride != e; - ++NewStride) { + for (unsigned NewStride = 0, e = IU->StrideOrder.size(); + NewStride != e; ++NewStride) { std::map<SCEVHandle, IVsOfOneStride>::iterator SI = - IVsByStride.find(StrideOrder[NewStride]); + IVsByStride.find(IU->StrideOrder[NewStride]); if (SI == IVsByStride.end()) continue; if (const SCEVMulExpr *ME = dyn_cast<SCEVMulExpr>(SI->first)) @@ -1303,10 +1131,15 @@ SCEVHandle LoopStrengthReduce::CollectIVUsers(const SCEVHandle &Stride, bool &AllUsesAreAddresses, bool &AllUsesAreOutsideLoop, std::vector<BasedUser> &UsersToProcess) { + // FIXME: Generalize to non-affine IV's. + if (!Stride->isLoopInvariant(L)) + return SE->getIntegerSCEV(0, Stride->getType()); + UsersToProcess.reserve(Uses.Users.size()); - for (unsigned i = 0, e = Uses.Users.size(); i != e; ++i) { - UsersToProcess.push_back(BasedUser(Uses.Users[i], SE)); - + for (ilist<IVStrideUse>::iterator I = Uses.Users.begin(), + E = Uses.Users.end(); I != E; ++I) { + UsersToProcess.push_back(BasedUser(*I, SE)); + // Move any loop variant operands from the offset field to the immediate // field of the use, so that we don't try to use something before it is // computed. @@ -1404,7 +1237,7 @@ bool LoopStrengthReduce::ShouldUseFullStrengthReductionMode( // TODO: For now, don't do full strength reduction if there could // potentially be greater-stride multiples of the current stride // which could reuse the current stride IV. - if (StrideOrder.back() != Stride) + if (IU->StrideOrder.back() != Stride) return false; // Iterate through the uses to find conditions that automatically rule out @@ -1853,8 +1686,8 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, SCEVHandle RewriteExpr = SE->getUnknown(RewriteOp); - if (SE->getTypeSizeInBits(RewriteOp->getType()) != - SE->getTypeSizeInBits(ReplacedTy)) { + if (SE->getEffectiveSCEVType(RewriteOp->getType()) != + SE->getEffectiveSCEVType(ReplacedTy)) { assert(SE->getTypeSizeInBits(RewriteOp->getType()) > SE->getTypeSizeInBits(ReplacedTy) && "Unexpected widening cast!"); @@ -1884,8 +1717,8 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, // it here. if (!ReuseIV.Base->isZero()) { SCEVHandle typedBase = ReuseIV.Base; - if (SE->getTypeSizeInBits(RewriteExpr->getType()) != - SE->getTypeSizeInBits(ReuseIV.Base->getType())) { + if (SE->getEffectiveSCEVType(RewriteExpr->getType()) != + SE->getEffectiveSCEVType(ReuseIV.Base->getType())) { // It's possible the original IV is a larger type than the new IV, // in which case we have to truncate the Base. We checked in // RequiresTypeConversion that this is valid. @@ -1929,7 +1762,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, // Mark old value we replaced as possibly dead, so that it is eliminated // if we just replaced the last use of that value. - DeadInsts.push_back(cast<Instruction>(User.OperandValToReplace)); + DeadInsts.push_back(User.OperandValToReplace); UsersToProcess.pop_back(); ++NumReduced; @@ -1949,19 +1782,19 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, /// false. bool LoopStrengthReduce::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse, const SCEVHandle *&CondStride) { - for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e && !CondUse; - ++Stride) { - std::map<SCEVHandle, IVUsersOfOneStride>::iterator SI = - IVUsesByStride.find(StrideOrder[Stride]); - assert(SI != IVUsesByStride.end() && "Stride doesn't exist!"); - - for (std::vector<IVStrideUse>::iterator UI = SI->second.Users.begin(), - E = SI->second.Users.end(); UI != E; ++UI) - if (UI->User == Cond) { + for (unsigned Stride = 0, e = IU->StrideOrder.size(); + Stride != e && !CondUse; ++Stride) { + std::map<SCEVHandle, IVUsersOfOneStride *>::iterator SI = + IU->IVUsesByStride.find(IU->StrideOrder[Stride]); + assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); + + for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(), + E = SI->second->Users.end(); UI != E; ++UI) + if (UI->getUser() == Cond) { // NOTE: we could handle setcc instructions with multiple uses here, but // InstCombine does it as well for simple uses, it's not clear that it // occurs enough in real life to handle. - CondUse = &*UI; + CondUse = UI; CondStride = &SI->first; return true; } @@ -2022,9 +1855,18 @@ namespace { ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, IVStrideUse* &CondUse, const SCEVHandle* &CondStride) { - if (StrideOrder.size() < 2 || - IVUsesByStride[*CondStride].Users.size() != 1) + // If there's only one stride in the loop, there's nothing to do here. + if (IU->StrideOrder.size() < 2) + return Cond; + // If there are other users of the condition's stride, don't bother + // trying to change the condition because the stride will still + // remain. + std::map<SCEVHandle, IVUsersOfOneStride *>::iterator I = + IU->IVUsesByStride.find(*CondStride); + if (I == IU->IVUsesByStride.end() || + I->second->Users.size() != 1) return Cond; + // Only handle constant strides for now. const SCEVConstant *SC = dyn_cast<SCEVConstant>(*CondStride); if (!SC) return Cond; @@ -2051,9 +1893,9 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, return Cond; // Look for a suitable stride / iv as replacement. - for (unsigned i = 0, e = StrideOrder.size(); i != e; ++i) { - std::map<SCEVHandle, IVUsersOfOneStride>::iterator SI = - IVUsesByStride.find(StrideOrder[i]); + for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) { + std::map<SCEVHandle, IVUsersOfOneStride *>::iterator SI = + IU->IVUsesByStride.find(IU->StrideOrder[i]); if (!isa<SCEVConstant>(SI->first)) continue; int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue(); @@ -2069,6 +1911,9 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, // Check for overflow. if (!Mul.isSignedIntN(BitWidth)) continue; + // Check for overflow in the stride's type too. + if (!Mul.isSignedIntN(SE->getTypeSizeInBits(SI->first->getType()))) + continue; // Watch out for overflow. if (ICmpInst::isSignedPredicate(Predicate) && @@ -2079,9 +1924,27 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, continue; // Pick the best iv to use trying to avoid a cast. NewCmpLHS = NULL; - for (std::vector<IVStrideUse>::iterator UI = SI->second.Users.begin(), - E = SI->second.Users.end(); UI != E; ++UI) { - NewCmpLHS = UI->OperandValToReplace; + for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(), + E = SI->second->Users.end(); UI != E; ++UI) { + Value *Op = UI->getOperandValToReplace(); + + // If the IVStrideUse implies a cast, check for an actual cast which + // can be used to find the original IV expression. + if (SE->getEffectiveSCEVType(Op->getType()) != + SE->getEffectiveSCEVType(SI->first->getType())) { + CastInst *CI = dyn_cast<CastInst>(Op); + // If it's not a simple cast, it's complicated. + if (!CI) + continue; + // If it's a cast from a type other than the stride type, + // it's complicated. + if (CI->getOperand(0)->getType() != SI->first->getType()) + continue; + // Ok, we found the IV expression in the stride's type. + Op = CI->getOperand(0); + } + + NewCmpLHS = Op; if (NewCmpLHS->getType() == CmpTy) break; } @@ -2105,13 +1968,13 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, // Don't rewrite if use offset is non-constant and the new type is // of a different type. // FIXME: too conservative? - if (NewTyBits != TyBits && !isa<SCEVConstant>(CondUse->Offset)) + if (NewTyBits != TyBits && !isa<SCEVConstant>(CondUse->getOffset())) continue; bool AllUsesAreAddresses = true; bool AllUsesAreOutsideLoop = true; std::vector<BasedUser> UsersToProcess; - SCEVHandle CommonExprs = CollectIVUsers(SI->first, SI->second, L, + SCEVHandle CommonExprs = CollectIVUsers(SI->first, *SI->second, L, AllUsesAreAddresses, AllUsesAreOutsideLoop, UsersToProcess); @@ -2127,7 +1990,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, if (Scale < 0 && !Cond->isEquality()) Predicate = ICmpInst::getSwappedPredicate(Predicate); - NewStride = &StrideOrder[i]; + NewStride = &IU->StrideOrder[i]; if (!isa<PointerType>(NewCmpTy)) NewCmpRHS = ConstantInt::get(NewCmpTy, NewCmpVal); else { @@ -2135,10 +1998,11 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, NewCmpRHS = ConstantExpr::getIntToPtr(CI, NewCmpTy); } NewOffset = TyBits == NewTyBits - ? SE->getMulExpr(CondUse->Offset, + ? SE->getMulExpr(CondUse->getOffset(), SE->getConstant(ConstantInt::get(CmpTy, Scale))) : SE->getConstant(ConstantInt::get(NewCmpIntTy, - cast<SCEVConstant>(CondUse->Offset)->getValue()->getSExtValue()*Scale)); + cast<SCEVConstant>(CondUse->getOffset())->getValue() + ->getSExtValue()*Scale)); break; } } @@ -2165,13 +2029,12 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, OldCond); // Remove the old compare instruction. The old indvar is probably dead too. - DeadInsts.push_back(cast<Instruction>(CondUse->OperandValToReplace)); + DeadInsts.push_back(CondUse->getOperandValToReplace()); OldCond->replaceAllUsesWith(Cond); OldCond->eraseFromParent(); - IVUsesByStride[*CondStride].Users.pop_back(); - IVUsesByStride[*NewStride].addUser(NewOffset, Cond, NewCmpLHS); - CondUse = &IVUsesByStride[*NewStride].Users.back(); + IU->IVUsesByStride[*NewStride]->addUser(NewOffset, Cond, NewCmpLHS, false); + CondUse = &IU->IVUsesByStride[*NewStride]->Users.back(); CondStride = NewStride; ++NumEliminated; Changed = true; @@ -2287,12 +2150,12 @@ ICmpInst *LoopStrengthReduce::OptimizeSMax(Loop *L, ICmpInst *Cond, // Delete the max calculation instructions. Cond->replaceAllUsesWith(NewCond); - Cond->eraseFromParent(); + CondUse->setUser(NewCond); Instruction *Cmp = cast<Instruction>(Sel->getOperand(0)); + Cond->eraseFromParent(); Sel->eraseFromParent(); if (Cmp->use_empty()) Cmp->eraseFromParent(); - CondUse->User = NewCond; return NewCond; } @@ -2304,19 +2167,19 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) { if (isa<SCEVCouldNotCompute>(BackedgeTakenCount)) return; - for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e; + for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e; ++Stride) { - std::map<SCEVHandle, IVUsersOfOneStride>::iterator SI = - IVUsesByStride.find(StrideOrder[Stride]); - assert(SI != IVUsesByStride.end() && "Stride doesn't exist!"); + std::map<SCEVHandle, IVUsersOfOneStride *>::iterator SI = + IU->IVUsesByStride.find(IU->StrideOrder[Stride]); + assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); if (!isa<SCEVConstant>(SI->first)) continue; - for (std::vector<IVStrideUse>::iterator UI = SI->second.Users.begin(), - E = SI->second.Users.end(); UI != E; /* empty */) { - std::vector<IVStrideUse>::iterator CandidateUI = UI; + for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(), + E = SI->second->Users.end(); UI != E; /* empty */) { + ilist<IVStrideUse>::iterator CandidateUI = UI; ++UI; - Instruction *ShadowUse = CandidateUI->User; + Instruction *ShadowUse = CandidateUI->getUser(); const Type *DestTy = NULL; /* If shadow use is a int->float cast then insert a second IV @@ -2331,9 +2194,9 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) { for (unsigned i = 0; i < n; ++i, ++d) foo(d); */ - if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->User)) + if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) DestTy = UCast->getDestTy(); - else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->User)) + else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) DestTy = SCast->getDestTy(); if (!DestTy) continue; @@ -2400,7 +2263,6 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) { /* Remove cast operation */ ShadowUse->replaceAllUsesWith(NewPH); ShadowUse->eraseFromParent(); - SI->second.Users.erase(CandidateUI); NumShadow++; break; } @@ -2450,11 +2312,12 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { // transform the icmp to use post-inc iv. Otherwise do so only if it would // not reuse another iv and its iv would be reused by other uses. We are // optimizing for the case where the icmp is the only use of the iv. - IVUsersOfOneStride &StrideUses = IVUsesByStride[*CondStride]; - for (unsigned i = 0, e = StrideUses.Users.size(); i != e; ++i) { - if (StrideUses.Users[i].User == Cond) + IVUsersOfOneStride &StrideUses = *IU->IVUsesByStride[*CondStride]; + for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(), + E = StrideUses.Users.end(); I != E; ++I) { + if (I->getUser() == Cond) continue; - if (!StrideUses.Users[i].isUseOfPostIncrementedValue) + if (!I->isUseOfPostIncrementedValue()) return; } @@ -2463,10 +2326,10 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { // StrengthReduceStridedIVUsers? if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(*CondStride)) { int64_t SInt = SC->getValue()->getSExtValue(); - for (unsigned NewStride = 0, ee = StrideOrder.size(); NewStride != ee; + for (unsigned NewStride = 0, ee = IU->StrideOrder.size(); NewStride != ee; ++NewStride) { - std::map<SCEVHandle, IVUsersOfOneStride>::iterator SI = - IVUsesByStride.find(StrideOrder[NewStride]); + std::map<SCEVHandle, IVUsersOfOneStride *>::iterator SI = + IU->IVUsesByStride.find(IU->StrideOrder[NewStride]); if (!isa<SCEVConstant>(SI->first) || SI->first == *CondStride) continue; int64_t SSInt = @@ -2479,7 +2342,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { bool AllUsesAreAddresses = true; bool AllUsesAreOutsideLoop = true; std::vector<BasedUser> UsersToProcess; - SCEVHandle CommonExprs = CollectIVUsers(SI->first, SI->second, L, + SCEVHandle CommonExprs = CollectIVUsers(SI->first, *SI->second, L, AllUsesAreAddresses, AllUsesAreOutsideLoop, UsersToProcess); @@ -2518,17 +2381,18 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { LatchBlock->getInstList().insert(TermBr, Cond); // Clone the IVUse, as the old use still exists! - IVUsesByStride[*CondStride].addUser(CondUse->Offset, Cond, - CondUse->OperandValToReplace); - CondUse = &IVUsesByStride[*CondStride].Users.back(); + IU->IVUsesByStride[*CondStride]->addUser(CondUse->getOffset(), Cond, + CondUse->getOperandValToReplace(), + false); + CondUse = &IU->IVUsesByStride[*CondStride]->Users.back(); } } // If we get to here, we know that we can transform the setcc instruction to // use the post-incremented version of the IV, allowing us to coalesce the // live ranges for the IV correctly. - CondUse->Offset = SE->getMinusSCEV(CondUse->Offset, *CondStride); - CondUse->isUseOfPostIncrementedValue = true; + CondUse->setOffset(SE->getMinusSCEV(CondUse->getOffset(), *CondStride)); + CondUse->setIsUseOfPostIncrementedValue(true); Changed = true; ++NumLoopCond; @@ -2644,19 +2508,13 @@ void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) { bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { + IU = &getAnalysis<IVUsers>(); LI = &getAnalysis<LoopInfo>(); DT = &getAnalysis<DominatorTree>(); SE = &getAnalysis<ScalarEvolution>(); Changed = false; - // Find all uses of induction variables in this loop, and categorize - // them by stride. Start by finding all of the PHI nodes in the header for - // this loop. If they are induction variables, inspect their uses. - SmallPtrSet<Instruction*,16> Processed; // Don't reprocess instructions. - for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) - AddUsersIfInteresting(I, L, Processed); - - if (!IVUsesByStride.empty()) { + if (!IU->IVUsesByStride.empty()) { #ifndef NDEBUG DOUT << "\nLSR on \"" << L->getHeader()->getParent()->getNameStart() << "\" "; @@ -2664,7 +2522,8 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { #endif // Sort the StrideOrder so we process larger strides first. - std::stable_sort(StrideOrder.begin(), StrideOrder.end(), StrideCompare(SE)); + std::stable_sort(IU->StrideOrder.begin(), IU->StrideOrder.end(), + StrideCompare(SE)); // Optimize induction variables. Some indvar uses can be transformed to use // strides that will be needed for other purposes. A common example of this @@ -2695,11 +2554,15 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { // Also, note that we iterate over IVUsesByStride indirectly by using // StrideOrder. This extra layer of indirection makes the ordering of // strides deterministic - not dependent on map order. - for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e; ++Stride) { - std::map<SCEVHandle, IVUsersOfOneStride>::iterator SI = - IVUsesByStride.find(StrideOrder[Stride]); - assert(SI != IVUsesByStride.end() && "Stride doesn't exist!"); - StrengthReduceStridedIVUsers(SI->first, SI->second, L); + for (unsigned Stride = 0, e = IU->StrideOrder.size(); + Stride != e; ++Stride) { + std::map<SCEVHandle, IVUsersOfOneStride *>::iterator SI = + IU->IVUsesByStride.find(IU->StrideOrder[Stride]); + assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); + // FIXME: Generalize to non-affine IV's. + if (!SI->first->isLoopInvariant(L)) + continue; + StrengthReduceStridedIVUsers(SI->first, *SI->second, L); } } @@ -2708,9 +2571,7 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { OptimizeLoopCountIV(L); // We're done analyzing this loop; release all the state we built up for it. - IVUsesByStride.clear(); IVsByStride.clear(); - StrideOrder.clear(); StrideNoReuse.clear(); // Clean up after ourselves |