diff options
Diffstat (limited to 'lib/Transforms/Scalar/LoopUnswitch.cpp')
-rw-r--r-- | lib/Transforms/Scalar/LoopUnswitch.cpp | 291 |
1 files changed, 203 insertions, 88 deletions
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index a2d0e98..2c75f63 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -48,6 +48,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> +#include <map> #include <set> using namespace llvm; @@ -56,14 +57,70 @@ STATISTIC(NumSwitches, "Number of switches unswitched"); STATISTIC(NumSelects , "Number of selects unswitched"); STATISTIC(NumTrivial , "Number of unswitches that are trivial"); STATISTIC(NumSimplify, "Number of simplifications of unswitched code"); +STATISTIC(TotalInsts, "Total number of instructions analyzed"); -// The specific value of 50 here was chosen based only on intuition and a +// The specific value of 100 here was chosen based only on intuition and a // few specific examples. static cl::opt<unsigned> Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), - cl::init(50), cl::Hidden); + cl::init(100), cl::Hidden); namespace { + + class LUAnalysisCache { + + typedef DenseMap<const SwitchInst*, SmallPtrSet<const Value *, 8> > + UnswitchedValsMap; + + typedef UnswitchedValsMap::iterator UnswitchedValsIt; + + struct LoopProperties { + unsigned CanBeUnswitchedCount; + unsigned SizeEstimation; + UnswitchedValsMap UnswitchedVals; + }; + + // Here we use std::map instead of DenseMap, since we need to keep valid + // LoopProperties pointer for current loop for better performance. + typedef std::map<const Loop*, LoopProperties> LoopPropsMap; + typedef LoopPropsMap::iterator LoopPropsMapIt; + + LoopPropsMap LoopsProperties; + UnswitchedValsMap* CurLoopInstructions; + LoopProperties* CurrentLoopProperties; + + // Max size of code we can produce on remained iterations. + unsigned MaxSize; + + public: + + LUAnalysisCache() : + CurLoopInstructions(NULL), CurrentLoopProperties(NULL), + MaxSize(Threshold) + {} + + // Analyze loop. Check its size, calculate is it possible to unswitch + // it. Returns true if we can unswitch this loop. + bool countLoop(const Loop* L); + + // Clean all data related to given loop. + void forgetLoop(const Loop* L); + + // Mark case value as unswitched. + // Since SI instruction can be partly unswitched, in order to avoid + // extra unswitching in cloned loops keep track all unswitched values. + void setUnswitched(const SwitchInst* SI, const Value* V); + + // Check was this case value unswitched before or not. + bool isUnswitched(const SwitchInst* SI, const Value* V); + + // Clone all loop-unswitch related loop properties. + // Redistribute unswitching quotas. + // Note, that new loop data is stored inside the VMap. + void cloneData(const Loop* NewLoop, const Loop* OldLoop, + const ValueToValueMapTy& VMap); + }; + class LoopUnswitch : public LoopPass { LoopInfo *LI; // Loop information LPPassManager *LPM; @@ -71,9 +128,8 @@ namespace { // LoopProcessWorklist - Used to check if second loop needs processing // after RewriteLoopBodyWithConditionConstant rewrites first loop. std::vector<Loop*> LoopProcessWorklist; - - // FIXME: Consider custom class for this. - std::map<const SwitchInst*, SmallPtrSet<const Value *,8> > UnswitchedVals; + + LUAnalysisCache BranchesInfo; bool OptimizeForSize; bool redoLoop; @@ -119,15 +175,7 @@ namespace { private: virtual void releaseMemory() { - // We need to forget about all switches in the current loop. - // FIXME: Do it better than enumerating all blocks of code - // and see if it is a switch instruction. - for (Loop::block_iterator I = currentLoop->block_begin(), - E = currentLoop->block_end(); I != E; ++I) { - SwitchInst* SI = dyn_cast<SwitchInst>((*I)->getTerminator()); - if (SI) - UnswitchedVals.erase(SI); - } + BranchesInfo.forgetLoop(currentLoop); } /// RemoveLoopFromWorklist - If the specified loop is on the loop worklist, @@ -139,12 +187,6 @@ namespace { LoopProcessWorklist.erase(I); } - /// For new loop switches we clone info about values that was - /// already unswitched and has redundant successors. - /// Note, that new loop data is stored inside the VMap. - void CloneUnswitchedVals(const ValueToValueMapTy& VMap, - const BasicBlock* SrcBB); - void initLoopData() { loopHeader = currentLoop->getHeader(); loopPreheader = currentLoop->getLoopPreheader(); @@ -176,6 +218,112 @@ namespace { }; } + +// Analyze loop. Check its size, calculate is it possible to unswitch +// it. Returns true if we can unswitch this loop. +bool LUAnalysisCache::countLoop(const Loop* L) { + + std::pair<LoopPropsMapIt, bool> InsertRes = + LoopsProperties.insert(std::make_pair(L, LoopProperties())); + + LoopProperties& Props = InsertRes.first->second; + + if (InsertRes.second) { + // New loop. + + // Limit the number of instructions to avoid causing significant code + // expansion, and the number of basic blocks, to avoid loops with + // large numbers of branches which cause loop unswitching to go crazy. + // This is a very ad-hoc heuristic. + + // FIXME: This is overly conservative because it does not take into + // consideration code simplification opportunities and code that can + // be shared by the resultant unswitched loops. + CodeMetrics Metrics; + for (Loop::block_iterator I = L->block_begin(), + E = L->block_end(); + I != E; ++I) + Metrics.analyzeBasicBlock(*I); + + Props.SizeEstimation = std::min(Metrics.NumInsts, Metrics.NumBlocks * 5); + Props.CanBeUnswitchedCount = MaxSize / (Props.SizeEstimation); + MaxSize -= Props.SizeEstimation * Props.CanBeUnswitchedCount; + } + + if (!Props.CanBeUnswitchedCount) { + DEBUG(dbgs() << "NOT unswitching loop %" + << L->getHeader()->getName() << ", cost too high: " + << L->getBlocks().size() << "\n"); + + return false; + } + + // Be careful. This links are good only before new loop addition. + CurrentLoopProperties = &Props; + CurLoopInstructions = &Props.UnswitchedVals; + + return true; +} + +// Clean all data related to given loop. +void LUAnalysisCache::forgetLoop(const Loop* L) { + + LoopPropsMapIt LIt = LoopsProperties.find(L); + + if (LIt != LoopsProperties.end()) { + LoopProperties& Props = LIt->second; + MaxSize += Props.CanBeUnswitchedCount * Props.SizeEstimation; + LoopsProperties.erase(LIt); + } + + CurrentLoopProperties = NULL; + CurLoopInstructions = NULL; +} + +// Mark case value as unswitched. +// Since SI instruction can be partly unswitched, in order to avoid +// extra unswitching in cloned loops keep track all unswitched values. +void LUAnalysisCache::setUnswitched(const SwitchInst* SI, const Value* V) { + (*CurLoopInstructions)[SI].insert(V); +} + +// Check was this case value unswitched before or not. +bool LUAnalysisCache::isUnswitched(const SwitchInst* SI, const Value* V) { + return (*CurLoopInstructions)[SI].count(V); +} + +// Clone all loop-unswitch related loop properties. +// Redistribute unswitching quotas. +// Note, that new loop data is stored inside the VMap. +void LUAnalysisCache::cloneData(const Loop* NewLoop, const Loop* OldLoop, + const ValueToValueMapTy& VMap) { + + LoopProperties& NewLoopProps = LoopsProperties[NewLoop]; + LoopProperties& OldLoopProps = *CurrentLoopProperties; + UnswitchedValsMap& Insts = OldLoopProps.UnswitchedVals; + + // Reallocate "can-be-unswitched quota" + + --OldLoopProps.CanBeUnswitchedCount; + unsigned Quota = OldLoopProps.CanBeUnswitchedCount; + NewLoopProps.CanBeUnswitchedCount = Quota / 2; + OldLoopProps.CanBeUnswitchedCount = Quota - Quota / 2; + + NewLoopProps.SizeEstimation = OldLoopProps.SizeEstimation; + + // Clone unswitched values info: + // for new loop switches we clone info about values that was + // already unswitched and has redundant successors. + for (UnswitchedValsIt I = Insts.begin(); I != Insts.end(); ++I) { + const SwitchInst* OldInst = I->first; + Value* NewI = VMap.lookup(OldInst); + const SwitchInst* NewInst = cast_or_null<SwitchInst>(NewI); + assert(NewInst && "All instructions that are in SrcBB must be in VMap."); + + NewLoopProps.UnswitchedVals[NewInst] = OldLoopProps.UnswitchedVals[OldInst]; + } +} + char LoopUnswitch::ID = 0; INITIALIZE_PASS_BEGIN(LoopUnswitch, "loop-unswitch", "Unswitch loops", false, false) @@ -193,6 +341,10 @@ Pass *llvm::createLoopUnswitchPass(bool Os) { /// invariant in the loop, or has an invariant piece, return the invariant. /// Otherwise, return null. static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) { + + // We started analyze new instruction, increment scanned instructions counter. + ++TotalInsts; + // We can never unswitch on vector conditions. if (Cond->getType()->isVectorTy()) return 0; @@ -246,7 +398,19 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) { /// and profitable. bool LoopUnswitch::processCurrentLoop() { bool Changed = false; - LLVMContext &Context = currentLoop->getHeader()->getContext(); + + initLoopData(); + + // If LoopSimplify was unable to form a preheader, don't do any unswitching. + if (!loopPreheader) + return false; + + LLVMContext &Context = loopHeader->getContext(); + + // Probably we reach the quota of branches for this loop. If so + // stop unswitching. + if (!BranchesInfo.countLoop(currentLoop)) + return false; // Loop over all of the basic blocks in the loop. If we find an interior // block that is branching on a loop-invariant condition, we can unswitch this @@ -272,7 +436,7 @@ bool LoopUnswitch::processCurrentLoop() { Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), currentLoop, Changed); unsigned NumCases = SI->getNumCases(); - if (LoopCond && NumCases > 1) { + if (LoopCond && NumCases) { // Find a value to unswitch on: // FIXME: this should chose the most expensive case! // FIXME: scan for a case with a non-critical edge? @@ -281,9 +445,9 @@ bool LoopUnswitch::processCurrentLoop() { // Do not process same value again and again. // At this point we have some cases already unswitched and // some not yet unswitched. Let's find the first not yet unswitched one. - for (unsigned i = 1; i < NumCases; ++i) { + for (unsigned i = 0; i < NumCases; ++i) { Constant* UnswitchValCandidate = SI->getCaseValue(i); - if (!UnswitchedVals[SI].count(UnswitchValCandidate)) { + if (!BranchesInfo.isUnswitched(SI, UnswitchValCandidate)) { UnswitchVal = UnswitchValCandidate; break; } @@ -315,23 +479,6 @@ bool LoopUnswitch::processCurrentLoop() { return Changed; } -/// For new loop switches we clone info about values that was -/// already unswitched and has redundant successors. -/// Not that new loop data is stored inside the VMap. -void LoopUnswitch::CloneUnswitchedVals(const ValueToValueMapTy& VMap, - const BasicBlock* SrcBB) { - - const SwitchInst* SI = dyn_cast<SwitchInst>(SrcBB->getTerminator()); - if (SI && UnswitchedVals.count(SI)) { - // Don't clone a totally simplified switch. - if (isa<Constant>(SI->getCondition())) - return; - Value* I = VMap.lookup(SI); - assert(I && "All instructions that are in SrcBB must be in VMap."); - UnswitchedVals[cast<SwitchInst>(I)] = UnswitchedVals[SI]; - } -} - /// isTrivialLoopExitBlock - Check to see if all paths from BB exit the /// loop with no side effects (including infinite loops). /// @@ -342,7 +489,8 @@ static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB, BasicBlock *&ExitBB, std::set<BasicBlock*> &Visited) { if (!Visited.insert(BB).second) { - // Already visited. Without more analysis, this could indicate an infinte loop. + // Already visited. Without more analysis, this could indicate an infinite + // loop. return false; } else if (!L->contains(BB)) { // Otherwise, this is a loop exit, this is fine so long as this is the @@ -426,16 +574,16 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val, // this. // Note that we can't trivially unswitch on the default case or // on already unswitched cases. - for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) { + for (unsigned i = 0, e = SI->getNumCases(); i != e; ++i) { BasicBlock* LoopExitCandidate; if ((LoopExitCandidate = isTrivialLoopExitBlock(currentLoop, - SI->getSuccessor(i)))) { + SI->getCaseSuccessor(i)))) { // Okay, we found a trivial case, remember the value that is trivial. ConstantInt* CaseVal = SI->getCaseValue(i); // Check that it was not unswitched before, since already unswitched // trivial vals are looks trivial too. - if (UnswitchedVals[SI].count(CaseVal)) + if (BranchesInfo.isUnswitched(SI, CaseVal)) continue; LoopExitBB = LoopExitCandidate; if (Val) *Val = CaseVal; @@ -467,12 +615,6 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val, /// unswitch the loop, reprocess the pieces, then return true. bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) { - initLoopData(); - - // If LoopSimplify was unable to form a preheader, don't do any unswitching. - if (!loopPreheader) - return false; - Function *F = loopHeader->getParent(); Constant *CondVal = 0; @@ -490,34 +632,6 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) { if (OptimizeForSize || F->hasFnAttr(Attribute::OptimizeForSize)) return false; - // FIXME: This is overly conservative because it does not take into - // consideration code simplification opportunities and code that can - // be shared by the resultant unswitched loops. - CodeMetrics Metrics; - for (Loop::block_iterator I = currentLoop->block_begin(), - E = currentLoop->block_end(); - I != E; ++I) - Metrics.analyzeBasicBlock(*I); - - // Limit the number of instructions to avoid causing significant code - // expansion, and the number of basic blocks, to avoid loops with - // large numbers of branches which cause loop unswitching to go crazy. - // This is a very ad-hoc heuristic. - - unsigned NumUnswitched = - (NumSwitches + NumBranches) + 1 /*take in account current iteration*/; - - unsigned NumInsts = Metrics.NumInsts * NumUnswitched; - unsigned NumBlocks = Metrics.NumBlocks * NumUnswitched; - - if (NumInsts > Threshold || NumBlocks * 5 > Threshold || - Metrics.containsIndirectBr || Metrics.isRecursive) { - DEBUG(dbgs() << "NOT unswitching loop %" - << currentLoop->getHeader()->getName() << ", cost too high: " - << currentLoop->getBlocks().size() << "\n"); - return false; - } - UnswitchNontrivialCondition(LoopCond, Val, currentLoop); return true; } @@ -683,11 +797,6 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) { BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F); - // Inherit simplified switches info for NewBB - // We needn't pass NewBB since its instructions are already contained - // inside the VMap. - CloneUnswitchedVals(VMap, LoopBlocks[i]); - NewBlocks.push_back(NewBB); VMap[LoopBlocks[i]] = NewBB; // Keep the BB mapping. LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, L); @@ -700,6 +809,11 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, // Now we create the new Loop object for the versioned loop. Loop *NewLoop = CloneLoop(L, L->getParentLoop(), VMap, LI, LPM); + + // Recalculate unswitching quota, inherit simplified switches info for NewBB, + // Probably clone more loop-unswitch related loop properties. + BranchesInfo.cloneData(NewLoop, L, VMap); + Loop *ParentLoop = L->getParentLoop(); if (ParentLoop) { // Make sure to add the cloned preheader and exit blocks to the parent loop @@ -1004,17 +1118,18 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, if (SI == 0 || !isa<ConstantInt>(Val)) continue; unsigned DeadCase = SI->findCaseValue(cast<ConstantInt>(Val)); - if (DeadCase == 0) continue; // Default case is live for multiple values. + // Default case is live for multiple values. + if (DeadCase == SwitchInst::ErrorIndex) continue; // Found a dead case value. Don't remove PHI nodes in the // successor if they become single-entry, those PHI nodes may // be in the Users list. BasicBlock *Switch = SI->getParent(); - BasicBlock *SISucc = SI->getSuccessor(DeadCase); + BasicBlock *SISucc = SI->getCaseSuccessor(DeadCase); BasicBlock *Latch = L->getLoopLatch(); - UnswitchedVals[SI].insert(Val); + BranchesInfo.setUnswitched(SI, Val); if (!SI->findCaseDest(SISucc)) continue; // Edge is critical. // If the DeadCase successor dominates the loop latch, then the @@ -1031,7 +1146,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, // Compute the successors instead of relying on the return value // of SplitEdge, since it may have split the switch successor // after PHI nodes. - BasicBlock *NewSISucc = SI->getSuccessor(DeadCase); + BasicBlock *NewSISucc = SI->getCaseSuccessor(DeadCase); BasicBlock *OldSISucc = *succ_begin(NewSISucc); // Create an "unreachable" destination. BasicBlock *Abort = BasicBlock::Create(Context, "us-unreachable", |