diff options
Diffstat (limited to 'lib/Transforms/Utils')
23 files changed, 2045 insertions, 1034 deletions
diff --git a/lib/Transforms/Utils/Android.mk b/lib/Transforms/Utils/Android.mk index df87208..9bf9ef3 100644 --- a/lib/Transforms/Utils/Android.mk +++ b/lib/Transforms/Utils/Android.mk @@ -29,6 +29,7 @@ transforms_utils_SRC_FILES := \ SimplifyIndVar.cpp \ SimplifyInstructions.cpp \ SimplifyLibCalls.cpp \ + SpecialCaseList.cpp \ UnifyFunctionExitNodes.cpp \ Utils.cpp \ ValueMapper.cpp diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index ba99d2e..e17a416 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -14,6 +14,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" @@ -170,7 +171,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) { if (DomTreeNode *DTN = DT->getNode(BB)) { DomTreeNode *PredDTN = DT->getNode(PredBB); SmallVector<DomTreeNode*, 8> Children(DTN->begin(), DTN->end()); - for (SmallVector<DomTreeNode*, 8>::iterator DI = Children.begin(), + for (SmallVectorImpl<DomTreeNode *>::iterator DI = Children.begin(), DE = Children.end(); DI != DE; ++DI) DT->changeImmediateDominator(*DI, PredDTN); @@ -235,22 +236,6 @@ void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) { ReplaceInstWithInst(From->getParent()->getInstList(), BI, To); } -/// GetSuccessorNumber - Search for the specified successor of basic block BB -/// and return its position in the terminator instruction's list of -/// successors. It is an error to call this with a block that is not a -/// successor. -unsigned llvm::GetSuccessorNumber(BasicBlock *BB, BasicBlock *Succ) { - TerminatorInst *Term = BB->getTerminator(); -#ifndef NDEBUG - unsigned e = Term->getNumSuccessors(); -#endif - for (unsigned i = 0; ; ++i) { - assert(i != e && "Didn't find edge?"); - if (Term->getSuccessor(i) == Succ) - return i; - } -} - /// SplitEdge - Split the edge connecting specified block. Pass P must /// not be NULL. BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) { @@ -598,52 +583,6 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, } } -/// FindFunctionBackedges - Analyze the specified function to find all of the -/// loop backedges in the function and return them. This is a relatively cheap -/// (compared to computing dominators and loop info) analysis. -/// -/// The output is added to Result, as pairs of <from,to> edge info. -void llvm::FindFunctionBackedges(const Function &F, - SmallVectorImpl<std::pair<const BasicBlock*,const BasicBlock*> > &Result) { - const BasicBlock *BB = &F.getEntryBlock(); - if (succ_begin(BB) == succ_end(BB)) - return; - - SmallPtrSet<const BasicBlock*, 8> Visited; - SmallVector<std::pair<const BasicBlock*, succ_const_iterator>, 8> VisitStack; - SmallPtrSet<const BasicBlock*, 8> InStack; - - Visited.insert(BB); - VisitStack.push_back(std::make_pair(BB, succ_begin(BB))); - InStack.insert(BB); - do { - std::pair<const BasicBlock*, succ_const_iterator> &Top = VisitStack.back(); - const BasicBlock *ParentBB = Top.first; - succ_const_iterator &I = Top.second; - - bool FoundNew = false; - while (I != succ_end(ParentBB)) { - BB = *I++; - if (Visited.insert(BB)) { - FoundNew = true; - break; - } - // Successor is in VisitStack, it's a back edge. - if (InStack.count(BB)) - Result.push_back(std::make_pair(ParentBB, BB)); - } - - if (FoundNew) { - // Go down one level if there is a unvisited successor. - InStack.insert(BB); - VisitStack.push_back(std::make_pair(BB, succ_begin(BB))); - } else { - // Go up one level. - InStack.erase(VisitStack.pop_back_val().first); - } - } while (!VisitStack.empty()); -} - /// FoldReturnIntoUncondBranch - This method duplicates the specified return /// instruction into a predecessor which ends in an unconditional branch. If /// the return instruction returns a value defined by a PHI, propagate the @@ -726,3 +665,104 @@ TerminatorInst *llvm::SplitBlockAndInsertIfThen(Instruction *Cmp, ReplaceInstWithInst(HeadOldTerm, HeadNewTerm); return CheckTerm; } + +/// GetIfCondition - Given a basic block (BB) with two predecessors, +/// check to see if the merge at this block is due +/// to an "if condition". If so, return the boolean condition that determines +/// which entry into BB will be taken. Also, return by references the block +/// that will be entered from if the condition is true, and the block that will +/// be entered if the condition is false. +/// +/// This does no checking to see if the true/false blocks have large or unsavory +/// instructions in them. +Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, + BasicBlock *&IfFalse) { + PHINode *SomePHI = dyn_cast<PHINode>(BB->begin()); + BasicBlock *Pred1 = NULL; + BasicBlock *Pred2 = NULL; + + if (SomePHI) { + if (SomePHI->getNumIncomingValues() != 2) + return NULL; + Pred1 = SomePHI->getIncomingBlock(0); + Pred2 = SomePHI->getIncomingBlock(1); + } else { + pred_iterator PI = pred_begin(BB), PE = pred_end(BB); + if (PI == PE) // No predecessor + return NULL; + Pred1 = *PI++; + if (PI == PE) // Only one predecessor + return NULL; + Pred2 = *PI++; + if (PI != PE) // More than two predecessors + return NULL; + } + + // We can only handle branches. Other control flow will be lowered to + // branches if possible anyway. + BranchInst *Pred1Br = dyn_cast<BranchInst>(Pred1->getTerminator()); + BranchInst *Pred2Br = dyn_cast<BranchInst>(Pred2->getTerminator()); + if (Pred1Br == 0 || Pred2Br == 0) + return 0; + + // Eliminate code duplication by ensuring that Pred1Br is conditional if + // either are. + if (Pred2Br->isConditional()) { + // If both branches are conditional, we don't have an "if statement". In + // reality, we could transform this case, but since the condition will be + // required anyway, we stand no chance of eliminating it, so the xform is + // probably not profitable. + if (Pred1Br->isConditional()) + return 0; + + std::swap(Pred1, Pred2); + std::swap(Pred1Br, Pred2Br); + } + + if (Pred1Br->isConditional()) { + // The only thing we have to watch out for here is to make sure that Pred2 + // doesn't have incoming edges from other blocks. If it does, the condition + // doesn't dominate BB. + if (Pred2->getSinglePredecessor() == 0) + return 0; + + // If we found a conditional branch predecessor, make sure that it branches + // to BB and Pred2Br. If it doesn't, this isn't an "if statement". + if (Pred1Br->getSuccessor(0) == BB && + Pred1Br->getSuccessor(1) == Pred2) { + IfTrue = Pred1; + IfFalse = Pred2; + } else if (Pred1Br->getSuccessor(0) == Pred2 && + Pred1Br->getSuccessor(1) == BB) { + IfTrue = Pred2; + IfFalse = Pred1; + } else { + // We know that one arm of the conditional goes to BB, so the other must + // go somewhere unrelated, and this must not be an "if statement". + return 0; + } + + return Pred1Br->getCondition(); + } + + // Ok, if we got here, both predecessors end with an unconditional branch to + // BB. Don't panic! If both blocks only have a single (identical) + // predecessor, and THAT is a conditional branch, then we're all ok! + BasicBlock *CommonPred = Pred1->getSinglePredecessor(); + if (CommonPred == 0 || CommonPred != Pred2->getSinglePredecessor()) + return 0; + + // Otherwise, if this is a conditional branch, then we can use it! + BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator()); + if (BI == 0) return 0; + + assert(BI->isConditional() && "Two successors but not conditional?"); + if (BI->getSuccessor(0) == Pred1) { + IfTrue = Pred1; + IfFalse = Pred2; + } else { + IfTrue = Pred2; + IfFalse = Pred1; + } + return BI->getCondition(); +} diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp index 8513772..8f3ff96 100644 --- a/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -19,6 +19,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ProfileInfo.h" @@ -84,39 +85,6 @@ bool BreakCriticalEdges::runOnFunction(Function &F) { // Implementation of the external critical edge manipulation functions //===----------------------------------------------------------------------===// -// isCriticalEdge - Return true if the specified edge is a critical edge. -// Critical edges are edges from a block with multiple successors to a block -// with multiple predecessors. -// -bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum, - bool AllowIdenticalEdges) { - assert(SuccNum < TI->getNumSuccessors() && "Illegal edge specification!"); - if (TI->getNumSuccessors() == 1) return false; - - const BasicBlock *Dest = TI->getSuccessor(SuccNum); - const_pred_iterator I = pred_begin(Dest), E = pred_end(Dest); - - // If there is more than one predecessor, this is a critical edge... - assert(I != E && "No preds, but we have an edge to the block?"); - const BasicBlock *FirstPred = *I; - ++I; // Skip one edge due to the incoming arc from TI. - if (!AllowIdenticalEdges) - return I != E; - - // If AllowIdenticalEdges is true, then we allow this edge to be considered - // non-critical iff all preds come from TI's block. - while (I != E) { - const BasicBlock *P = *I; - if (P != FirstPred) - return true; - // Note: leave this as is until no one ever compiles with either gcc 4.0.1 - // or Xcode 2. This seems to work around the pred_iterator assert in PR 2207 - E = pred_end(P); - ++I; - } - return false; -} - /// createPHIsForSplitLoopExit - When a loop exit edge is split, LCSSA form /// may require new PHIs in the new exit block. This function inserts the /// new PHIs, as needed. Preds is a list of preds inside the loop, SplitBB diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt index b71628b..3648fd6 100644 --- a/lib/Transforms/Utils/CMakeLists.txt +++ b/lib/Transforms/Utils/CMakeLists.txt @@ -25,9 +25,11 @@ add_llvm_library(LLVMTransformUtils PromoteMemoryToRegister.cpp SSAUpdater.cpp SimplifyCFG.cpp + FlattenCFG.cpp SimplifyIndVar.cpp SimplifyInstructions.cpp SimplifyLibCalls.cpp + SpecialCaseList.cpp UnifyFunctionExitNodes.cpp Utils.cpp ValueMapper.cpp diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index be8d39e..d105f5e 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -78,7 +78,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, bool ModuleLevelChanges, SmallVectorImpl<ReturnInst*> &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo, - ValueMapTypeRemapper *TypeMapper) { + ValueMapTypeRemapper *TypeMapper, + ValueMaterializer *Materializer) { assert(NameSuffix && "NameSuffix cannot be null!"); #ifndef NDEBUG @@ -147,7 +148,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II) RemapInstruction(II, VMap, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, - TypeMapper); + TypeMapper, Materializer); } /// CloneFunction - Return a copy of the specified function, but without diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index f7c659f..82013f9 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -277,8 +277,8 @@ void CodeExtractor::splitReturnBlocks() { DomTreeNode *NewNode = DT->addNewBlock(New, *I); - for (SmallVector<DomTreeNode*, 8>::iterator I = Children.begin(), - E = Children.end(); I != E; ++I) + for (SmallVectorImpl<DomTreeNode *>::iterator I = Children.begin(), + E = Children.end(); I != E; ++I) DT->changeImmediateDominator(*I, NewNode); } } diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp index db525cd..0723b35 100644 --- a/lib/Transforms/Utils/DemoteRegToStack.cpp +++ b/lib/Transforms/Utils/DemoteRegToStack.cpp @@ -10,6 +10,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/Analysis/CFG.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Type.h" diff --git a/lib/Transforms/Utils/FlattenCFG.cpp b/lib/Transforms/Utils/FlattenCFG.cpp new file mode 100644 index 0000000..9cbe15d --- /dev/null +++ b/lib/Transforms/Utils/FlattenCFG.cpp @@ -0,0 +1,488 @@ +//===- FlatternCFG.cpp - Code to perform CFG flattening ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Reduce conditional branches in CFG. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "flattencfg" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +using namespace llvm; + +namespace { +class FlattenCFGOpt { + AliasAnalysis *AA; + /// \brief Use parallel-and or parallel-or to generate conditions for + /// conditional branches. + bool FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder, Pass *P = 0); + /// \brief If \param BB is the merge block of an if-region, attempt to merge + /// the if-region with an adjacent if-region upstream if two if-regions + /// contain identical instructions. + bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder, Pass *P = 0); + /// \brief Compare a pair of blocks: \p Block1 and \p Block2, which + /// are from two if-regions whose entry blocks are \p Head1 and \p + /// Head2. \returns true if \p Block1 and \p Block2 contain identical + /// instructions, and have no memory reference alias with \p Head2. + /// This is used as a legality check for merging if-regions. + bool CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2, + BasicBlock *Block1, BasicBlock *Block2); + +public: + FlattenCFGOpt(AliasAnalysis *AA) : AA(AA) {} + bool run(BasicBlock *BB); +}; +} + +/// If \param [in] BB has more than one predecessor that is a conditional +/// branch, attempt to use parallel and/or for the branch condition. \returns +/// true on success. +/// +/// Before: +/// ...... +/// %cmp10 = fcmp une float %tmp1, %tmp2 +/// br i1 %cmp1, label %if.then, label %lor.rhs +/// +/// lor.rhs: +/// ...... +/// %cmp11 = fcmp une float %tmp3, %tmp4 +/// br i1 %cmp11, label %if.then, label %ifend +/// +/// if.end: // the merge block +/// ...... +/// +/// if.then: // has two predecessors, both of them contains conditional branch. +/// ...... +/// br label %if.end; +/// +/// After: +/// ...... +/// %cmp10 = fcmp une float %tmp1, %tmp2 +/// ...... +/// %cmp11 = fcmp une float %tmp3, %tmp4 +/// %cmp12 = or i1 %cmp10, %cmp11 // parallel-or mode. +/// br i1 %cmp12, label %if.then, label %ifend +/// +/// if.end: +/// ...... +/// +/// if.then: +/// ...... +/// br label %if.end; +/// +/// Current implementation handles two cases. +/// Case 1: \param BB is on the else-path. +/// +/// BB1 +/// / | +/// BB2 | +/// / \ | +/// BB3 \ | where, BB1, BB2 contain conditional branches. +/// \ | / BB3 contains unconditional branch. +/// \ | / BB4 corresponds to \param BB which is also the merge. +/// BB => BB4 +/// +/// +/// Corresponding source code: +/// +/// if (a == b && c == d) +/// statement; // BB3 +/// +/// Case 2: \param BB BB is on the then-path. +/// +/// BB1 +/// / | +/// | BB2 +/// \ / | where BB1, BB2 contain conditional branches. +/// BB => BB3 | BB3 contains unconditiona branch and corresponds +/// \ / to \param BB. BB4 is the merge. +/// BB4 +/// +/// Corresponding source code: +/// +/// if (a == b || c == d) +/// statement; // BB3 +/// +/// In both cases, \param BB is the common successor of conditional branches. +/// In Case 1, \param BB (BB4) has an unconditional branch (BB3) as +/// its predecessor. In Case 2, \param BB (BB3) only has conditional branches +/// as its predecessors. +/// +bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder, + Pass *P) { + PHINode *PHI = dyn_cast<PHINode>(BB->begin()); + if (PHI) + return false; // For simplicity, avoid cases containing PHI nodes. + + BasicBlock *LastCondBlock = NULL; + BasicBlock *FirstCondBlock = NULL; + BasicBlock *UnCondBlock = NULL; + int Idx = -1; + + // Check predecessors of \param BB. + SmallPtrSet<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB)); + for (SmallPtrSetIterator<BasicBlock *> PI = Preds.begin(), PE = Preds.end(); + PI != PE; ++PI) { + BasicBlock *Pred = *PI; + BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator()); + + // All predecessors should terminate with a branch. + if (!PBI) + return false; + + BasicBlock *PP = Pred->getSinglePredecessor(); + + if (PBI->isUnconditional()) { + // Case 1: Pred (BB3) is an unconditional block, it should + // have a single predecessor (BB2) that is also a predecessor + // of \param BB (BB4) and should not have address-taken. + // There should exist only one such unconditional + // branch among the predecessors. + if (UnCondBlock || !PP || (Preds.count(PP) == 0) || + Pred->hasAddressTaken()) + return false; + + UnCondBlock = Pred; + continue; + } + + // Only conditional branches are allowed beyond this point. + assert(PBI->isConditional()); + + // Condition's unique use should be the branch instruction. + Value *PC = PBI->getCondition(); + if (!PC || !PC->hasOneUse()) + return false; + + if (PP && Preds.count(PP)) { + // These are internal condition blocks to be merged from, e.g., + // BB2 in both cases. + // Should not be address-taken. + if (Pred->hasAddressTaken()) + return false; + + // Instructions in the internal condition blocks should be safe + // to hoist up. + for (BasicBlock::iterator BI = Pred->begin(), BE = PBI; BI != BE;) { + Instruction *CI = BI++; + if (isa<PHINode>(CI) || !isSafeToSpeculativelyExecute(CI)) + return false; + } + } else { + // This is the condition block to be merged into, e.g. BB1 in + // both cases. + if (FirstCondBlock) + return false; + FirstCondBlock = Pred; + } + + // Find whether BB is uniformly on the true (or false) path + // for all of its predecessors. + BasicBlock *PS1 = PBI->getSuccessor(0); + BasicBlock *PS2 = PBI->getSuccessor(1); + BasicBlock *PS = (PS1 == BB) ? PS2 : PS1; + int CIdx = (PS1 == BB) ? 0 : 1; + + if (Idx == -1) + Idx = CIdx; + else if (CIdx != Idx) + return false; + + // PS is the successor which is not BB. Check successors to identify + // the last conditional branch. + if (Preds.count(PS) == 0) { + // Case 2. + LastCondBlock = Pred; + } else { + // Case 1 + BranchInst *BPS = dyn_cast<BranchInst>(PS->getTerminator()); + if (BPS && BPS->isUnconditional()) { + // Case 1: PS(BB3) should be an unconditional branch. + LastCondBlock = Pred; + } + } + } + + if (!FirstCondBlock || !LastCondBlock || (FirstCondBlock == LastCondBlock)) + return false; + + TerminatorInst *TBB = LastCondBlock->getTerminator(); + BasicBlock *PS1 = TBB->getSuccessor(0); + BasicBlock *PS2 = TBB->getSuccessor(1); + BranchInst *PBI1 = dyn_cast<BranchInst>(PS1->getTerminator()); + BranchInst *PBI2 = dyn_cast<BranchInst>(PS2->getTerminator()); + + // If PS1 does not jump into PS2, but PS2 jumps into PS1, + // attempt branch inversion. + if (!PBI1 || !PBI1->isUnconditional() || + (PS1->getTerminator()->getSuccessor(0) != PS2)) { + // Check whether PS2 jumps into PS1. + if (!PBI2 || !PBI2->isUnconditional() || + (PS2->getTerminator()->getSuccessor(0) != PS1)) + return false; + + // Do branch inversion. + BasicBlock *CurrBlock = LastCondBlock; + bool EverChanged = false; + while (1) { + BranchInst *BI = dyn_cast<BranchInst>(CurrBlock->getTerminator()); + CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition()); + CmpInst::Predicate Predicate = CI->getPredicate(); + // Cannonicalize icmp_ne -> icmp_eq, fcmp_one -> fcmp_oeq + if ((Predicate == CmpInst::ICMP_NE) || (Predicate == CmpInst::FCMP_ONE)) { + CI->setPredicate(ICmpInst::getInversePredicate(Predicate)); + BI->swapSuccessors(); + EverChanged = true; + } + if (CurrBlock == FirstCondBlock) + break; + CurrBlock = CurrBlock->getSinglePredecessor(); + } + return EverChanged; + } + + // PS1 must have a conditional branch. + if (!PBI1 || !PBI1->isUnconditional()) + return false; + + // PS2 should not contain PHI node. + PHI = dyn_cast<PHINode>(PS2->begin()); + if (PHI) + return false; + + // Do the transformation. + BasicBlock *CB; + BranchInst *PBI = dyn_cast<BranchInst>(FirstCondBlock->getTerminator()); + bool Iteration = true; + BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); + BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + Value *PC = PBI->getCondition(); + + do { + CB = PBI->getSuccessor(1 - Idx); + // Delete the conditional branch. + FirstCondBlock->getInstList().pop_back(); + FirstCondBlock->getInstList() + .splice(FirstCondBlock->end(), CB->getInstList()); + PBI = cast<BranchInst>(FirstCondBlock->getTerminator()); + Value *CC = PBI->getCondition(); + // Merge conditions. + Builder.SetInsertPoint(PBI); + Value *NC; + if (Idx == 0) + // Case 2, use parallel or. + NC = Builder.CreateOr(PC, CC); + else + // Case 1, use parallel and. + NC = Builder.CreateAnd(PC, CC); + + PBI->replaceUsesOfWith(CC, NC); + PC = NC; + if (CB == LastCondBlock) + Iteration = false; + // Remove internal conditional branches. + CB->dropAllReferences(); + // make CB unreachable and let downstream to delete the block. + new UnreachableInst(CB->getContext(), CB); + } while (Iteration); + + Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt); + DEBUG(dbgs() << "Use parallel and/or in:\n" << *FirstCondBlock); + return true; +} + +/// Compare blocks from two if-regions, where \param Head1 is the entry of the +/// 1st if-region. \param Head2 is the entry of the 2nd if-region. \param +/// Block1 is a block in the 1st if-region to compare. \param Block2 is a block +// in the 2nd if-region to compare. \returns true if \param Block1 and \param +/// Block2 have identical instructions and do not have memory reference alias +/// with \param Head2. +/// +bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2, + BasicBlock *Block1, + BasicBlock *Block2) { + TerminatorInst *PTI2 = Head2->getTerminator(); + Instruction *PBI2 = Head2->begin(); + + bool eq1 = (Block1 == Head1); + bool eq2 = (Block2 == Head2); + if (eq1 || eq2) { + // An empty then-path or else-path. + return (eq1 == eq2); + } + + // Check whether instructions in Block1 and Block2 are identical + // and do not alias with instructions in Head2. + BasicBlock::iterator iter1 = Block1->begin(); + BasicBlock::iterator end1 = Block1->getTerminator(); + BasicBlock::iterator iter2 = Block2->begin(); + BasicBlock::iterator end2 = Block2->getTerminator(); + + while (1) { + if (iter1 == end1) { + if (iter2 != end2) + return false; + break; + } + + if (!iter1->isIdenticalTo(iter2)) + return false; + + // Illegal to remove instructions with side effects except + // non-volatile stores. + if (iter1->mayHaveSideEffects()) { + Instruction *CurI = &*iter1; + StoreInst *SI = dyn_cast<StoreInst>(CurI); + if (!SI || SI->isVolatile()) + return false; + } + + // For simplicity and speed, data dependency check can be + // avoided if read from memory doesn't exist. + if (iter1->mayReadFromMemory()) + return false; + + if (iter1->mayWriteToMemory()) { + for (BasicBlock::iterator BI = PBI2, BE = PTI2; BI != BE; ++BI) { + if (BI->mayReadFromMemory() || BI->mayWriteToMemory()) { + // Check alias with Head2. + if (!AA || AA->alias(iter1, BI)) + return false; + } + } + } + ++iter1; + ++iter2; + } + + return true; +} + +/// Check whether \param BB is the merge block of a if-region. If yes, check +/// whether there exists an adjacent if-region upstream, the two if-regions +/// contain identical instuctions and can be legally merged. \returns true if +/// the two if-regions are merged. +/// +/// From: +/// if (a) +/// statement; +/// if (b) +/// statement; +/// +/// To: +/// if (a || b) +/// statement; +/// +bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder, + Pass *P) { + BasicBlock *IfTrue2, *IfFalse2; + Value *IfCond2 = GetIfCondition(BB, IfTrue2, IfFalse2); + Instruction *CInst2 = dyn_cast_or_null<Instruction>(IfCond2); + if (!CInst2) + return false; + + BasicBlock *SecondEntryBlock = CInst2->getParent(); + if (SecondEntryBlock->hasAddressTaken()) + return false; + + BasicBlock *IfTrue1, *IfFalse1; + Value *IfCond1 = GetIfCondition(SecondEntryBlock, IfTrue1, IfFalse1); + Instruction *CInst1 = dyn_cast_or_null<Instruction>(IfCond1); + if (!CInst1) + return false; + + BasicBlock *FirstEntryBlock = CInst1->getParent(); + + // Either then-path or else-path should be empty. + if ((IfTrue1 != FirstEntryBlock) && (IfFalse1 != FirstEntryBlock)) + return false; + if ((IfTrue2 != SecondEntryBlock) && (IfFalse2 != SecondEntryBlock)) + return false; + + TerminatorInst *PTI2 = SecondEntryBlock->getTerminator(); + Instruction *PBI2 = SecondEntryBlock->begin(); + + if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfTrue1, + IfTrue2)) + return false; + + if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfFalse1, + IfFalse2)) + return false; + + // Check whether \param SecondEntryBlock has side-effect and is safe to + // speculate. + for (BasicBlock::iterator BI = PBI2, BE = PTI2; BI != BE; ++BI) { + Instruction *CI = BI; + if (isa<PHINode>(CI) || CI->mayHaveSideEffects() || + !isSafeToSpeculativelyExecute(CI)) + return false; + } + + // Merge \param SecondEntryBlock into \param FirstEntryBlock. + FirstEntryBlock->getInstList().pop_back(); + FirstEntryBlock->getInstList() + .splice(FirstEntryBlock->end(), SecondEntryBlock->getInstList()); + BranchInst *PBI = dyn_cast<BranchInst>(FirstEntryBlock->getTerminator()); + Value *CC = PBI->getCondition(); + BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); + BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + Builder.SetInsertPoint(PBI); + Value *NC = Builder.CreateOr(CInst1, CC); + PBI->replaceUsesOfWith(CC, NC); + Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt); + + // Remove IfTrue1 + if (IfTrue1 != FirstEntryBlock) { + IfTrue1->dropAllReferences(); + IfTrue1->eraseFromParent(); + } + + // Remove IfFalse1 + if (IfFalse1 != FirstEntryBlock) { + IfFalse1->dropAllReferences(); + IfFalse1->eraseFromParent(); + } + + // Remove \param SecondEntryBlock + SecondEntryBlock->dropAllReferences(); + SecondEntryBlock->eraseFromParent(); + DEBUG(dbgs() << "If conditions merged into:\n" << *FirstEntryBlock); + return true; +} + +bool FlattenCFGOpt::run(BasicBlock *BB) { + bool Changed = false; + assert(BB && BB->getParent() && "Block not embedded in function!"); + assert(BB->getTerminator() && "Degenerate basic block encountered!"); + + IRBuilder<> Builder(BB); + + if (FlattenParallelAndOr(BB, Builder)) + return true; + + if (MergeIfRegion(BB, Builder)) + return true; + + return Changed; +} + +/// FlattenCFG - This function is used to flatten a CFG. For +/// example, it uses parallel-and and parallel-or mode to collapse +// if-conditions and merge if-regions with identical statements. +/// +bool llvm::FlattenCFG(BasicBlock *BB, AliasAnalysis *AA) { + return FlattenCFGOpt(AA).run(BB); +} diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 0d2598a..dabb67b 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -82,7 +82,8 @@ namespace { /// a simple branch. When there is more than one predecessor, we need to /// split the landing pad block after the landingpad instruction and jump /// to there. - void forwardResume(ResumeInst *RI); + void forwardResume(ResumeInst *RI, + SmallPtrSet<LandingPadInst*, 16> &InlinedLPads); /// addIncomingPHIValuesFor - Add incoming-PHI values to the unwind /// destination block for the given basic block, using the values for the @@ -140,8 +141,10 @@ BasicBlock *InvokeInliningInfo::getInnerResumeDest() { /// block. When the landing pad block has only one predecessor, this is a simple /// branch. When there is more than one predecessor, we need to split the /// landing pad block after the landingpad instruction and jump to there. -void InvokeInliningInfo::forwardResume(ResumeInst *RI) { +void InvokeInliningInfo::forwardResume(ResumeInst *RI, + SmallPtrSet<LandingPadInst*, 16> &InlinedLPads) { BasicBlock *Dest = getInnerResumeDest(); + LandingPadInst *OuterLPad = getLandingPadInst(); BasicBlock *Src = RI->getParent(); BranchInst::Create(Dest, Src); @@ -152,6 +155,16 @@ void InvokeInliningInfo::forwardResume(ResumeInst *RI) { InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src); RI->eraseFromParent(); + + // Append the clauses from the outer landing pad instruction into the inlined + // landing pad instructions. + for (SmallPtrSet<LandingPadInst*, 16>::iterator I = InlinedLPads.begin(), + E = InlinedLPads.end(); I != E; ++I) { + LandingPadInst *InlinedLPad = *I; + for (unsigned OuterIdx = 0, OuterNum = OuterLPad->getNumClauses(); + OuterIdx != OuterNum; ++OuterIdx) + InlinedLPad->addClause(OuterLPad->getClause(OuterIdx)); + } } /// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into @@ -229,19 +242,15 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, // The inlined code is currently at the end of the function, scan from the // start of the inlined code to its end, checking for stuff we need to - // rewrite. If the code doesn't have calls or unwinds, we know there is - // nothing to rewrite. - if (!InlinedCodeInfo.ContainsCalls) { - // Now that everything is happy, we have one final detail. The PHI nodes in - // the exception destination block still have entries due to the original - // invoke instruction. Eliminate these entries (which might even delete the - // PHI node) now. - InvokeDest->removePredecessor(II->getParent()); - return; - } - + // rewrite. InvokeInliningInfo Invoke(II); - + + // Get all of the inlined landing pad instructions. + SmallPtrSet<LandingPadInst*, 16> InlinedLPads; + for (Function::iterator I = FirstNewBlock, E = Caller->end(); I != E; ++I) + if (InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) + InlinedLPads.insert(II->getLandingPadInst()); + for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){ if (InlinedCodeInfo.ContainsCalls) if (HandleCallsInBlockInlinedThroughInvoke(BB, Invoke)) { @@ -250,13 +259,14 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, continue; } + // Forward any resumes that are remaining here. if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) - Invoke.forwardResume(RI); + Invoke.forwardResume(RI, InlinedLPads); } // Now that everything is happy, we have one final detail. The PHI nodes in // the exception destination block still have entries due to the original - // invoke instruction. Eliminate these entries (which might even delete the + // invoke instruction. Eliminate these entries (which might even delete the // PHI node) now. InvokeDest->removePredecessor(II->getParent()); } @@ -748,8 +758,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // If the call site was an invoke instruction, add a branch to the normal // destination. - if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) - BranchInst::Create(II->getNormalDest(), TheCall); + if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { + BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall); + NewBr->setDebugLoc(Returns[0]->getDebugLoc()); + } // If the return instruction returned a value, replace uses of the call with // uses of the returned value. @@ -777,15 +789,16 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // "starter" and "ender" blocks. How we accomplish this depends on whether // this is an invoke instruction or a call instruction. BasicBlock *AfterCallBB; + BranchInst *CreatedBranchToNormalDest = NULL; if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { // Add an unconditional branch to make this look like the CallInst case... - BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall); + CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), TheCall); // Split the basic block. This guarantees that no PHI nodes will have to be // updated due to new incoming edges, and make the invoke case more // symmetric to the call case. - AfterCallBB = OrigBB->splitBasicBlock(NewBr, + AfterCallBB = OrigBB->splitBasicBlock(CreatedBranchToNormalDest, CalledFunc->getName()+".exit"); } else { // It's a call @@ -840,11 +853,20 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Add a branch to the merge points and remove return instructions. + DebugLoc Loc; for (unsigned i = 0, e = Returns.size(); i != e; ++i) { ReturnInst *RI = Returns[i]; - BranchInst::Create(AfterCallBB, RI); + BranchInst* BI = BranchInst::Create(AfterCallBB, RI); + Loc = RI->getDebugLoc(); + BI->setDebugLoc(Loc); RI->eraseFromParent(); } + // We need to set the debug location to *somewhere* inside the + // inlined function. The line number may be nonsensical, but the + // instruction will at least be associated with the right + // function. + if (CreatedBranchToNormalDest) + CreatedBranchToNormalDest->setDebugLoc(Loc); } else if (!Returns.empty()) { // Otherwise, if there is exactly one return value, just replace anything // using the return value of the call with the computed value. @@ -864,6 +886,9 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, AfterCallBB->getInstList().splice(AfterCallBB->begin(), ReturnBB->getInstList()); + if (CreatedBranchToNormalDest) + CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc()); + // Delete the return instruction now and empty ReturnBB now. Returns[0]->eraseFromParent(); ReturnBB->eraseFromParent(); diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index a54ee08..08e1808 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -84,7 +84,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, BI->eraseFromParent(); return true; } - + if (Dest2 == Dest1) { // Conditional branch to same location? // This branch matches something like this: // br bool %cond, label %Dest, label %Dest @@ -104,7 +104,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, } return false; } - + if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) { // If we are switching on a constant, we can convert the switch into a // single branch instruction! @@ -188,7 +188,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI); return true; } - + if (SI->getNumCases() == 1) { // Otherwise, we can fold this switch into a conditional branch // instruction if it has only one non-default destination. @@ -231,7 +231,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, BasicBlock *TheOnlyDest = BA->getBasicBlock(); // Insert the new branch. Builder.CreateBr(TheOnlyDest); - + for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { if (IBI->getDestination(i) == TheOnlyDest) TheOnlyDest = 0; @@ -242,7 +242,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, IBI->eraseFromParent(); if (DeleteDeadConditions) RecursivelyDeleteTriviallyDeadInstructions(Address, TLI); - + // If we didn't find our destination in the IBI successor list, then we // have undefined behavior. Replace the unconditional branch with an // 'unreachable' instruction. @@ -250,11 +250,11 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, BB->getTerminator()->eraseFromParent(); new UnreachableInst(BB->getContext(), BB); } - + return true; } } - + return false; } @@ -321,10 +321,10 @@ llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V, Instruction *I = dyn_cast<Instruction>(V); if (!I || !I->use_empty() || !isInstructionTriviallyDead(I, TLI)) return false; - + SmallVector<Instruction*, 16> DeadInsts; DeadInsts.push_back(I); - + do { I = DeadInsts.pop_back_val(); @@ -333,9 +333,9 @@ llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V, for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { Value *OpV = I->getOperand(i); I->setOperand(i, 0); - + if (!OpV->use_empty()) continue; - + // If the operand is an instruction that became dead as we nulled out the // operand, and if it is 'trivially' dead, delete it in a future loop // iteration. @@ -343,7 +343,7 @@ llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V, if (isInstructionTriviallyDead(OpI, TLI)) DeadInsts.push_back(OpI); } - + I->eraseFromParent(); } while (!DeadInsts.empty()); @@ -450,12 +450,12 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred, // This only adjusts blocks with PHI nodes. if (!isa<PHINode>(BB->begin())) return; - + // Remove the entries for Pred from the PHI nodes in BB, but do not simplify // them down. This will leave us with single entry phi nodes and other phis // that can be removed. BB->removePredecessor(Pred, true); - + WeakVH PhiIt = &BB->front(); while (PHINode *PN = dyn_cast<PHINode>(PhiIt)) { PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt)); @@ -486,10 +486,10 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) { PN->replaceAllUsesWith(NewVal); PN->eraseFromParent(); } - + BasicBlock *PredBB = DestBB->getSinglePredecessor(); assert(PredBB && "Block doesn't have a single predecessor!"); - + // Zap anything that took the address of DestBB. Not doing this will give the // address an invalid value. if (DestBB->hasAddressTaken()) { @@ -500,10 +500,10 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) { BA->getType())); BA->destroyConstant(); } - + // Anything that branched to PredBB now branches to DestBB. PredBB->replaceAllUsesWith(DestBB); - + // Splice all the instructions from PredBB to DestBB. PredBB->getTerminator()->eraseFromParent(); DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList()); @@ -525,6 +525,13 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) { PredBB->eraseFromParent(); } +/// CanMergeValues - Return true if we can choose one of these values to use +/// in place of the other. Note that we will always choose the non-undef +/// value to keep. +static bool CanMergeValues(Value *First, Value *Second) { + return First == Second || isa<UndefValue>(First) || isa<UndefValue>(Second); +} + /// CanPropagatePredecessorsForPHIs - Return true if we can fold BB, an /// almost-empty BB ending in an unconditional branch to Succ, into succ. /// @@ -533,7 +540,7 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) { static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!"); - DEBUG(dbgs() << "Looking to fold " << BB->getName() << " into " + DEBUG(dbgs() << "Looking to fold " << BB->getName() << " into " << Succ->getName() << "\n"); // Shortcut, if there is only a single predecessor it must be BB and merging // is always safe @@ -555,9 +562,10 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { for (unsigned PI = 0, PE = PN->getNumIncomingValues(); PI != PE; ++PI) { BasicBlock *IBB = PN->getIncomingBlock(PI); if (BBPreds.count(IBB) && - BBPN->getIncomingValueForBlock(IBB) != PN->getIncomingValue(PI)) { - DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in " - << Succ->getName() << " is conflicting with " + !CanMergeValues(BBPN->getIncomingValueForBlock(IBB), + PN->getIncomingValue(PI))) { + DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in " + << Succ->getName() << " is conflicting with " << BBPN->getName() << " with regard to common predecessor " << IBB->getName() << "\n"); return false; @@ -570,8 +578,9 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { // one for BB, in which case this phi node will not prevent the merging // of the block. BasicBlock *IBB = PN->getIncomingBlock(PI); - if (BBPreds.count(IBB) && Val != PN->getIncomingValue(PI)) { - DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in " + if (BBPreds.count(IBB) && + !CanMergeValues(Val, PN->getIncomingValue(PI))) { + DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in " << Succ->getName() << " is conflicting with regard to common " << "predecessor " << IBB->getName() << "\n"); return false; @@ -583,6 +592,139 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { return true; } +typedef SmallVector<BasicBlock *, 16> PredBlockVector; +typedef DenseMap<BasicBlock *, Value *> IncomingValueMap; + +/// \brief Determines the value to use as the phi node input for a block. +/// +/// Select between \p OldVal any value that we know flows from \p BB +/// to a particular phi on the basis of which one (if either) is not +/// undef. Update IncomingValues based on the selected value. +/// +/// \param OldVal The value we are considering selecting. +/// \param BB The block that the value flows in from. +/// \param IncomingValues A map from block-to-value for other phi inputs +/// that we have examined. +/// +/// \returns the selected value. +static Value *selectIncomingValueForBlock(Value *OldVal, BasicBlock *BB, + IncomingValueMap &IncomingValues) { + if (!isa<UndefValue>(OldVal)) { + assert((!IncomingValues.count(BB) || + IncomingValues.find(BB)->second == OldVal) && + "Expected OldVal to match incoming value from BB!"); + + IncomingValues.insert(std::make_pair(BB, OldVal)); + return OldVal; + } + + IncomingValueMap::const_iterator It = IncomingValues.find(BB); + if (It != IncomingValues.end()) return It->second; + + return OldVal; +} + +/// \brief Create a map from block to value for the operands of a +/// given phi. +/// +/// Create a map from block to value for each non-undef value flowing +/// into \p PN. +/// +/// \param PN The phi we are collecting the map for. +/// \param IncomingValues [out] The map from block to value for this phi. +static void gatherIncomingValuesToPhi(PHINode *PN, + IncomingValueMap &IncomingValues) { + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + BasicBlock *BB = PN->getIncomingBlock(i); + Value *V = PN->getIncomingValue(i); + + if (!isa<UndefValue>(V)) + IncomingValues.insert(std::make_pair(BB, V)); + } +} + +/// \brief Replace the incoming undef values to a phi with the values +/// from a block-to-value map. +/// +/// \param PN The phi we are replacing the undefs in. +/// \param IncomingValues A map from block to value. +static void replaceUndefValuesInPhi(PHINode *PN, + const IncomingValueMap &IncomingValues) { + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *V = PN->getIncomingValue(i); + + if (!isa<UndefValue>(V)) continue; + + BasicBlock *BB = PN->getIncomingBlock(i); + IncomingValueMap::const_iterator It = IncomingValues.find(BB); + if (It == IncomingValues.end()) continue; + + PN->setIncomingValue(i, It->second); + } +} + +/// \brief Replace a value flowing from a block to a phi with +/// potentially multiple instances of that value flowing from the +/// block's predecessors to the phi. +/// +/// \param BB The block with the value flowing into the phi. +/// \param BBPreds The predecessors of BB. +/// \param PN The phi that we are updating. +static void redirectValuesFromPredecessorsToPhi(BasicBlock *BB, + const PredBlockVector &BBPreds, + PHINode *PN) { + Value *OldVal = PN->removeIncomingValue(BB, false); + assert(OldVal && "No entry in PHI for Pred BB!"); + + IncomingValueMap IncomingValues; + + // We are merging two blocks - BB, and the block containing PN - and + // as a result we need to redirect edges from the predecessors of BB + // to go to the block containing PN, and update PN + // accordingly. Since we allow merging blocks in the case where the + // predecessor and successor blocks both share some predecessors, + // and where some of those common predecessors might have undef + // values flowing into PN, we want to rewrite those values to be + // consistent with the non-undef values. + + gatherIncomingValuesToPhi(PN, IncomingValues); + + // If this incoming value is one of the PHI nodes in BB, the new entries + // in the PHI node are the entries from the old PHI. + if (isa<PHINode>(OldVal) && cast<PHINode>(OldVal)->getParent() == BB) { + PHINode *OldValPN = cast<PHINode>(OldVal); + for (unsigned i = 0, e = OldValPN->getNumIncomingValues(); i != e; ++i) { + // Note that, since we are merging phi nodes and BB and Succ might + // have common predecessors, we could end up with a phi node with + // identical incoming branches. This will be cleaned up later (and + // will trigger asserts if we try to clean it up now, without also + // simplifying the corresponding conditional branch). + BasicBlock *PredBB = OldValPN->getIncomingBlock(i); + Value *PredVal = OldValPN->getIncomingValue(i); + Value *Selected = selectIncomingValueForBlock(PredVal, PredBB, + IncomingValues); + + // And add a new incoming value for this predecessor for the + // newly retargeted branch. + PN->addIncoming(Selected, PredBB); + } + } else { + for (unsigned i = 0, e = BBPreds.size(); i != e; ++i) { + // Update existing incoming values in PN for this + // predecessor of BB. + BasicBlock *PredBB = BBPreds[i]; + Value *Selected = selectIncomingValueForBlock(OldVal, PredBB, + IncomingValues); + + // And add a new incoming value for this predecessor for the + // newly retargeted branch. + PN->addIncoming(Selected, PredBB); + } + } + + replaceUndefValuesInPhi(PN, IncomingValues); +} + /// TryToSimplifyUncondBranchFromEmptyBlock - BB is known to contain an /// unconditional branch, and contains no instructions other than PHI nodes, /// potential side-effect free intrinsics and the branch. If possible, @@ -595,7 +737,7 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) { // We can't eliminate infinite loops. BasicBlock *Succ = cast<BranchInst>(BB->getTerminator())->getSuccessor(0); if (BB == Succ) return false; - + // Check to see if merging these blocks would cause conflicts for any of the // phi nodes in BB or Succ. If not, we can safely merge. if (!CanPropagatePredecessorsForPHIs(BB, Succ)) return false; @@ -629,39 +771,21 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) { } DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB); - + if (isa<PHINode>(Succ->begin())) { // If there is more than one pred of succ, and there are PHI nodes in // the successor, then we need to add incoming edges for the PHI nodes // - const SmallVector<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB)); - + const PredBlockVector BBPreds(pred_begin(BB), pred_end(BB)); + // Loop over all of the PHI nodes in the successor of BB. for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) { PHINode *PN = cast<PHINode>(I); - Value *OldVal = PN->removeIncomingValue(BB, false); - assert(OldVal && "No entry in PHI for Pred BB!"); - - // If this incoming value is one of the PHI nodes in BB, the new entries - // in the PHI node are the entries from the old PHI. - if (isa<PHINode>(OldVal) && cast<PHINode>(OldVal)->getParent() == BB) { - PHINode *OldValPN = cast<PHINode>(OldVal); - for (unsigned i = 0, e = OldValPN->getNumIncomingValues(); i != e; ++i) - // Note that, since we are merging phi nodes and BB and Succ might - // have common predecessors, we could end up with a phi node with - // identical incoming branches. This will be cleaned up later (and - // will trigger asserts if we try to clean it up now, without also - // simplifying the corresponding conditional branch). - PN->addIncoming(OldValPN->getIncomingValue(i), - OldValPN->getIncomingBlock(i)); - } else { - // Add an incoming value for each of the new incoming values. - for (unsigned i = 0, e = BBPreds.size(); i != e; ++i) - PN->addIncoming(OldVal, BBPreds[i]); - } + + redirectValuesFromPredecessorsToPhi(BB, BBPreds, PN); } } - + if (Succ->getSinglePredecessor()) { // BB is the only predecessor of Succ, so Succ will end up with exactly // the same predecessors BB had. @@ -676,7 +800,7 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) { PN->eraseFromParent(); } } - + // Everything that jumped to BB now goes to Succ. BB->replaceAllUsesWith(Succ); if (!Succ->hasName()) Succ->takeName(BB); @@ -784,7 +908,7 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align, // the final program then it is impossible for us to reliably enforce the // preferred alignment. if (GV->isWeakForLinker()) return Align; - + if (GV->getAlignment() >= PrefAlign) return GV->getAlignment(); // We can only increase the alignment of the global if it has no alignment @@ -804,26 +928,27 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align, /// and it is more than the alignment of the ultimate object, see if we can /// increase the alignment of the ultimate object, making this check succeed. unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, - const DataLayout *TD) { + const DataLayout *DL) { assert(V->getType()->isPointerTy() && "getOrEnforceKnownAlignment expects a pointer!"); - unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64; + unsigned BitWidth = DL ? DL->getPointerTypeSizeInBits(V->getType()) : 64; + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - ComputeMaskedBits(V, KnownZero, KnownOne, TD); + ComputeMaskedBits(V, KnownZero, KnownOne, DL); unsigned TrailZ = KnownZero.countTrailingOnes(); - - // Avoid trouble with rediculously large TrailZ values, such as + + // Avoid trouble with ridiculously large TrailZ values, such as // those computed from a null pointer. TrailZ = std::min(TrailZ, unsigned(sizeof(unsigned) * CHAR_BIT - 1)); - + unsigned Align = 1u << std::min(BitWidth - 1, TrailZ); - + // LLVM doesn't support alignments larger than this currently. Align = std::min(Align, +Value::MaximumAlignment); - + if (PrefAlign > Align) - Align = enforceKnownAlignment(V, Align, PrefAlign, TD); - + Align = enforceKnownAlignment(V, Align, PrefAlign, DL); + // We don't need to make any adjustment. return Align; } @@ -832,14 +957,36 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, /// Dbg Intrinsic utilities /// -/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value +/// See if there is a dbg.value intrinsic for DIVar before I. +static bool LdStHasDebugValue(DIVariable &DIVar, Instruction *I) { + // Since we can't guarantee that the original dbg.declare instrinsic + // is removed by LowerDbgDeclare(), we need to make sure that we are + // not inserting the same dbg.value intrinsic over and over. + llvm::BasicBlock::InstListType::iterator PrevI(I); + if (PrevI != I->getParent()->getInstList().begin()) { + --PrevI; + if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(PrevI)) + if (DVI->getValue() == I->getOperand(0) && + DVI->getOffset() == 0 && + DVI->getVariable() == DIVar) + return true; + } + return false; +} + +/// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value /// that has an associated llvm.dbg.decl intrinsic. bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, StoreInst *SI, DIBuilder &Builder) { DIVariable DIVar(DDI->getVariable()); - if (!DIVar.Verify()) + assert((!DIVar || DIVar.isVariable()) && + "Variable in DbgDeclareInst should be either null or a DIVariable."); + if (!DIVar) return false; + if (LdStHasDebugValue(DIVar, SI)) + return true; + Instruction *DbgVal = NULL; // If an argument is zero extended then use argument directly. The ZExt // may be zapped by an optimization pass in future. @@ -863,18 +1010,23 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, return true; } -/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value +/// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value /// that has an associated llvm.dbg.decl intrinsic. bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, LoadInst *LI, DIBuilder &Builder) { DIVariable DIVar(DDI->getVariable()); - if (!DIVar.Verify()) + assert((!DIVar || DIVar.isVariable()) && + "Variable in DbgDeclareInst should be either null or a DIVariable."); + if (!DIVar) return false; - Instruction *DbgVal = + if (LdStHasDebugValue(DIVar, LI)) + return true; + + Instruction *DbgVal = Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0, DIVar, LI); - + // Propagate any debug metadata from the store onto the dbg.value. DebugLoc LIDL = LI->getDebugLoc(); if (!LIDL.isUnknown()) @@ -898,10 +1050,12 @@ bool llvm::LowerDbgDeclare(Function &F) { if (Dbgs.empty()) return false; - for (SmallVector<DbgDeclareInst *, 4>::iterator I = Dbgs.begin(), + for (SmallVectorImpl<DbgDeclareInst *>::iterator I = Dbgs.begin(), E = Dbgs.end(); I != E; ++I) { DbgDeclareInst *DDI = *I; if (AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress())) { + // We only remove the dbg.declare intrinsic if all uses are + // converted to dbg.value intrinsics. bool RemoveDDI = true; for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ++UI) @@ -936,7 +1090,9 @@ bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress, if (!DDI) return false; DIVariable DIVar(DDI->getVariable()); - if (!DIVar.Verify()) + assert((!DIVar || DIVar.isVariable()) && + "Variable in DbgDeclareInst should be either null or a DIVariable."); + if (!DIVar) return false; // Create a copy of the original DIDescriptor for user variable, appending @@ -985,22 +1141,17 @@ bool llvm::removeUnreachableBlocks(Function &F) { if (Reachable.count(I)) continue; - // Remove the block as predecessor of all its reachable successors. - // Unreachable successors don't matter as they'll soon be removed, too. for (succ_iterator SI = succ_begin(I), SE = succ_end(I); SI != SE; ++SI) if (Reachable.count(*SI)) (*SI)->removePredecessor(I); + I->dropAllReferences(); + } - // Zap all instructions in this basic block. - while (!I->empty()) { - Instruction &Inst = I->back(); - if (!Inst.use_empty()) - Inst.replaceAllUsesWith(UndefValue::get(Inst.getType())); - I->getInstList().pop_back(); - } + for (Function::iterator I = llvm::next(F.begin()), E=F.end(); I != E;) + if (!Reachable.count(I)) + I = F.getBasicBlockList().erase(I); + else + ++I; - --I; - llvm::next(I)->eraseFromParent(); - } return true; } diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index 37819cc..6d5f16c 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -59,6 +59,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/LoopUtils.h" using namespace llvm; STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted"); @@ -100,16 +101,16 @@ namespace { private: bool ProcessLoop(Loop *L, LPPassManager &LPM); BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit); - BasicBlock *InsertPreheaderForLoop(Loop *L); Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM, BasicBlock *Preheader); BasicBlock *InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader); - void PlaceSplitBlockCarefully(BasicBlock *NewBB, - SmallVectorImpl<BasicBlock*> &SplitPreds, - Loop *L); }; } +static void PlaceSplitBlockCarefully(BasicBlock *NewBB, + SmallVectorImpl<BasicBlock*> &SplitPreds, + Loop *L); + char LoopSimplify::ID = 0; INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify", "Canonicalize natural loops", true, false) @@ -208,7 +209,7 @@ ReprocessLoop: // Does the loop already have a preheader? If so, don't insert one. BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { - Preheader = InsertPreheaderForLoop(L); + Preheader = InsertPreheaderForLoop(L, this); if (Preheader) { ++NumInserted; Changed = true; @@ -367,7 +368,7 @@ ReprocessLoop: /// preheader, this method is called to insert one. This method has two phases: /// preheader insertion and analysis updating. /// -BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) { +BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) { BasicBlock *Header = L->getHeader(); // Compute the set of predecessors of the loop that are not in the loop. @@ -390,11 +391,11 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) { BasicBlock *PreheaderBB; if (!Header->isLandingPad()) { PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader", - this); + PP); } else { SmallVector<BasicBlock*, 2> NewBBs; SplitLandingPadPredecessors(Header, OutsideBlocks, ".preheader", - ".split-lp", this, NewBBs); + ".split-lp", PP, NewBBs); PreheaderBB = NewBBs[0]; } @@ -491,9 +492,9 @@ static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT, // PlaceSplitBlockCarefully - If the block isn't already, move the new block to // right after some 'outside block' block. This prevents the preheader from // being placed inside the loop body, e.g. when the loop hasn't been rotated. -void LoopSimplify::PlaceSplitBlockCarefully(BasicBlock *NewBB, - SmallVectorImpl<BasicBlock*> &SplitPreds, - Loop *L) { +void PlaceSplitBlockCarefully(BasicBlock *NewBB, + SmallVectorImpl<BasicBlock*> &SplitPreds, + Loop *L) { // Check to see if NewBB is already well placed. Function::iterator BBI = NewBB; --BBI; for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) { diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp index 9ec84d7..f66b54d 100644 --- a/lib/Transforms/Utils/LowerInvoke.cpp +++ b/lib/Transforms/Utils/LowerInvoke.cpp @@ -61,6 +61,8 @@ static cl::opt<bool> ExpensiveEHSupport("enable-correct-eh-support", namespace { class LowerInvoke : public FunctionPass { + const TargetMachine *TM; + // Used for both models. Constant *AbortFn; @@ -70,15 +72,12 @@ namespace { Constant *SetJmpFn, *LongJmpFn, *StackSaveFn, *StackRestoreFn; bool useExpensiveEHSupport; - // We peek in TLI to grab the target's jmp_buf size and alignment - const TargetLowering *TLI; - public: static char ID; // Pass identification, replacement for typeid - explicit LowerInvoke(const TargetLowering *tli = NULL, + explicit LowerInvoke(const TargetMachine *TM = 0, bool useExpensiveEHSupport = ExpensiveEHSupport) - : FunctionPass(ID), useExpensiveEHSupport(useExpensiveEHSupport), - TLI(tli) { + : FunctionPass(ID), TM(TM), + useExpensiveEHSupport(useExpensiveEHSupport) { initializeLowerInvokePass(*PassRegistry::getPassRegistry()); } bool doInitialization(Module &M); @@ -108,12 +107,9 @@ INITIALIZE_PASS(LowerInvoke, "lowerinvoke", char &llvm::LowerInvokePassID = LowerInvoke::ID; // Public Interface To the LowerInvoke pass. -FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI) { - return new LowerInvoke(TLI, ExpensiveEHSupport); -} -FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI, +FunctionPass *llvm::createLowerInvokePass(const TargetMachine *TM, bool useExpensiveEHSupport) { - return new LowerInvoke(TLI, useExpensiveEHSupport); + return new LowerInvoke(TM, useExpensiveEHSupport || ExpensiveEHSupport); } // doInitialization - Make sure that there is a prototype for abort in the @@ -122,6 +118,7 @@ bool LowerInvoke::doInitialization(Module &M) { Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext()); if (useExpensiveEHSupport) { // Insert a type for the linked list of jump buffers. + const TargetLowering *TLI = TM ? TM->getTargetLowering() : 0; unsigned JBSize = TLI ? TLI->getJumpBufSize() : 0; JBSize = JBSize ? JBSize : 200; Type *JmpBufTy = ArrayType::get(VoidPtrTy, JBSize); @@ -430,6 +427,7 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) { // Create an alloca for the incoming jump buffer ptr and the new jump buffer // that needs to be restored on all exits from the function. This is an // alloca because the value needs to be live across invokes. + const TargetLowering *TLI = TM ? TM->getTargetLowering() : 0; unsigned Align = TLI ? TLI->getJumpBufAlignment() : 0; AllocaInst *JmpBuf = new AllocaInst(JBLinkTy, 0, Align, diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp index 61b3965..ebd7db6 100644 --- a/lib/Transforms/Utils/Mem2Reg.cpp +++ b/lib/Transforms/Utils/Mem2Reg.cpp @@ -16,6 +16,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Dominators.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" @@ -27,6 +28,7 @@ STATISTIC(NumPromoted, "Number of alloca's promoted"); namespace { struct PromotePass : public FunctionPass { static char ID; // Pass identification, replacement for typeid + PromotePass() : FunctionPass(ID) { initializePromotePassPass(*PassRegistry::getPassRegistry()); } @@ -62,6 +64,7 @@ bool PromotePass::runOnFunction(Function &F) { bool Changed = false; DominatorTree &DT = getAnalysis<DominatorTree>(); + const DataLayout *DL = getAnalysisIfAvailable<DataLayout>(); while (1) { Allocas.clear(); @@ -70,12 +73,12 @@ bool PromotePass::runOnFunction(Function &F) { // the entry node for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I) if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca? - if (isAllocaPromotable(AI)) + if (isAllocaPromotable(AI, DL)) Allocas.push_back(AI); if (Allocas.empty()) break; - PromoteMemToReg(Allocas, DT); + PromoteMemToReg(Allocas, DT, DL); NumPromoted += Allocas.size(); Changed = true; } diff --git a/lib/Transforms/Utils/MetaRenamer.cpp b/lib/Transforms/Utils/MetaRenamer.cpp index 3716f58..c370453 100644 --- a/lib/Transforms/Utils/MetaRenamer.cpp +++ b/lib/Transforms/Utils/MetaRenamer.cpp @@ -53,7 +53,7 @@ namespace { } bool runOnModule(Module &M) { - static const char *metaNames[] = { + static const char *const metaNames[] = { // See http://en.wikipedia.org/wiki/Metasyntactic_variable "foo", "bar", "baz", "quux", "barney", "snork", "zot", "blam", "hoge", "wibble", "wobble", "widget", "wombat", "ham", "eggs", "pluto", "spam" diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp index d090b48..ff6e6f9 100644 --- a/lib/Transforms/Utils/ModuleUtils.cpp +++ b/lib/Transforms/Utils/ModuleUtils.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/ModuleUtils.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" @@ -62,3 +63,20 @@ void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority) { void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority) { appendToGlobalArray("llvm.global_dtors", M, F, Priority); } + +GlobalVariable * +llvm::collectUsedGlobalVariables(Module &M, SmallPtrSet<GlobalValue *, 8> &Set, + bool CompilerUsed) { + const char *Name = CompilerUsed ? "llvm.compiler.used" : "llvm.used"; + GlobalVariable *GV = M.getGlobalVariable(Name); + if (!GV || !GV->hasInitializer()) + return GV; + + const ConstantArray *Init = cast<ConstantArray>(GV->getInitializer()); + for (unsigned I = 0, E = Init->getNumOperands(); I != E; ++I) { + Value *Op = Init->getOperand(I); + GlobalValue *G = cast<GlobalValue>(Op->stripPointerCastsNoFollowAliases()); + Set.insert(G); + } + return GV; +} diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index de335ec..6910180 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -27,9 +27,10 @@ #define DEBUG_TYPE "mem2reg" #include "llvm/Transforms/Utils/PromoteMemToReg.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/Hashing.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -45,6 +46,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Metadata.h" +#include "llvm/InstVisitor.h" #include "llvm/Support/CFG.h" #include "llvm/Transforms/Utils/Local.h" #include <algorithm> @@ -56,360 +58,560 @@ STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store"); STATISTIC(NumDeadAlloca, "Number of dead alloca's removed"); STATISTIC(NumPHIInsert, "Number of PHI nodes inserted"); -namespace llvm { -template<> -struct DenseMapInfo<std::pair<BasicBlock*, unsigned> > { - typedef std::pair<BasicBlock*, unsigned> EltTy; - static inline EltTy getEmptyKey() { - return EltTy(reinterpret_cast<BasicBlock*>(-1), ~0U); +namespace { + +struct AllocaInfo : private InstVisitor<AllocaInfo, bool> { + const DataLayout *DL; + + SmallVector<BasicBlock *, 32> DefiningBlocks; + SmallVector<BasicBlock *, 32> UsingBlocks; + SmallVector<Instruction *, 8> DeadInsts; + + Type *AllocaTy; + StoreInst *OnlyStore; + BasicBlock *OnlyBlock; + bool OnlyUsedInOneBlock; + + Value *AllocaPointerVal; + DbgDeclareInst *DbgDeclare; + + AllocaInfo(const DataLayout *DL) : DL(DL) {} + + void clear() { + DefiningBlocks.clear(); + UsingBlocks.clear(); + DeadInsts.clear(); + AllocaTy = 0; + OnlyStore = 0; + OnlyBlock = 0; + OnlyUsedInOneBlock = true; + AllocaPointerVal = 0; + DbgDeclare = 0; } - static inline EltTy getTombstoneKey() { - return EltTy(reinterpret_cast<BasicBlock*>(-2), 0U); + + /// Scan the uses of the specified alloca, filling in the AllocaInfo used + /// by the rest of the pass to reason about the uses of this alloca. + bool analyzeAlloca(AllocaInst &AI) { + clear(); + + AllocaTy = AI.getAllocatedType(); + enqueueUsers(AI); + + // Walk queued up uses in the worklist to handle nested uses. + while (!UseWorklist.empty()) { + U = UseWorklist.pop_back_val(); + Instruction &I = *cast<Instruction>(U->getUser()); + if (!visit(I)) + return false; // Propagate failure to promote up. + + if (OnlyUsedInOneBlock) { + if (OnlyBlock == 0) + OnlyBlock = I.getParent(); + else if (OnlyBlock != I.getParent()) + OnlyUsedInOneBlock = false; + } + } + + DbgDeclare = FindAllocaDbgDeclare(&AI); + return true; } - static unsigned getHashValue(const std::pair<BasicBlock*, unsigned> &Val) { - using llvm::hash_value; - return static_cast<unsigned>(hash_value(Val)); + +private: + // Befriend the base class so it can call through private visitor methods. + friend class InstVisitor<AllocaInfo, bool>; + + /// \brief A use pointer that is non-null when visiting uses. + Use *U; + + /// \brief A worklist for recursively visiting all uses of an alloca. + SmallVector<Use *, 8> UseWorklist; + + /// \brief A set for preventing cyclic visitation. + SmallPtrSet<Use *, 8> VisitedUses; + + void enqueueUsers(Instruction &I) { + for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE; + ++UI) + if (VisitedUses.insert(&UI.getUse())) + UseWorklist.push_back(&UI.getUse()); } - static bool isEqual(const EltTy &LHS, const EltTy &RHS) { - return LHS == RHS; + + bool visitLoadInst(LoadInst &LI) { + if (LI.isVolatile() || LI.getType() != AllocaTy) + return false; + + // Keep track of variable reads. + UsingBlocks.push_back(LI.getParent()); + AllocaPointerVal = &LI; + return true; } -}; -} -/// isAllocaPromotable - Return true if this alloca is legal for promotion. -/// This is true if there are only loads and stores to the alloca. -/// -bool llvm::isAllocaPromotable(const AllocaInst *AI) { - // FIXME: If the memory unit is of pointer or integer type, we can permit - // assignments to subsections of the memory unit. - - // Only allow direct and non-volatile loads and stores... - for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end(); - UI != UE; ++UI) { // Loop over all of the uses of the alloca - const User *U = *UI; - if (const LoadInst *LI = dyn_cast<LoadInst>(U)) { - // Note that atomic loads can be transformed; atomic semantics do - // not have any meaning for a local alloca. - if (LI->isVolatile()) - return false; - } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { - if (SI->getOperand(0) == AI) - return false; // Don't allow a store OF the AI, only INTO the AI. - // Note that atomic stores can be transformed; atomic semantics do - // not have any meaning for a local alloca. - if (SI->isVolatile()) - return false; - } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { - if (II->getIntrinsicID() != Intrinsic::lifetime_start && - II->getIntrinsicID() != Intrinsic::lifetime_end) - return false; - } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { - if (BCI->getType() != Type::getInt8PtrTy(U->getContext())) - return false; - if (!onlyUsedByLifetimeMarkers(BCI)) - return false; - } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) { - if (GEPI->getType() != Type::getInt8PtrTy(U->getContext())) - return false; - if (!GEPI->hasAllZeroIndices()) - return false; - if (!onlyUsedByLifetimeMarkers(GEPI)) - return false; - } else { + bool visitStoreInst(StoreInst &SI) { + if (SI.isVolatile() || SI.getValueOperand() == U->get() || + SI.getValueOperand()->getType() != AllocaTy) return false; + + // Remember the basic blocks which define new values for the alloca + DefiningBlocks.push_back(SI.getParent()); + AllocaPointerVal = SI.getOperand(0); + OnlyStore = &SI; + return true; + } + + bool visitBitCastInst(BitCastInst &BC) { + if (BC.use_empty()) + DeadInsts.push_back(&BC); + else + enqueueUsers(BC); + return true; + } + + bool visitGetElementPtrInst(GetElementPtrInst &GEPI) { + if (GEPI.use_empty()) { + DeadInsts.push_back(&GEPI); + return true; } + + enqueueUsers(GEPI); + + return GEPI.hasAllZeroIndices(); } - return true; -} + // We can promote through debug info intrinsics as they don't alter the + // value stored in memory. + bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &I) { + DeadInsts.push_back(&I); + return true; + } -namespace { - struct AllocaInfo; - - // Data package used by RenamePass() - class RenamePassData { - public: - typedef std::vector<Value *> ValVector; - - RenamePassData() : BB(NULL), Pred(NULL), Values() {} - RenamePassData(BasicBlock *B, BasicBlock *P, - const ValVector &V) : BB(B), Pred(P), Values(V) {} - BasicBlock *BB; - BasicBlock *Pred; - ValVector Values; - - void swap(RenamePassData &RHS) { - std::swap(BB, RHS.BB); - std::swap(Pred, RHS.Pred); - Values.swap(RHS.Values); + bool visitIntrinsicInst(IntrinsicInst &II) { + switch (II.getIntrinsicID()) { + default: + return false; + + // Lifetime intrinsics don't preclude promoting the memory to a register. + // FIXME: We should use these to promote to undef when outside of a valid + // lifetime. + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + DeadInsts.push_back(&II); + return true; } - }; - - /// LargeBlockInfo - This assigns and keeps a per-bb relative ordering of - /// load/store instructions in the block that directly load or store an alloca. + } + + // The fallback is that the alloca cannot be promoted. + bool visitInstruction(Instruction &I) { return false; } +}; + +// Data package used by RenamePass() +class RenamePassData { +public: + typedef std::vector<Value *> ValVector; + + RenamePassData() : BB(NULL), Pred(NULL), Values() {} + RenamePassData(BasicBlock *B, BasicBlock *P, const ValVector &V) + : BB(B), Pred(P), Values(V) {} + BasicBlock *BB; + BasicBlock *Pred; + ValVector Values; + + void swap(RenamePassData &RHS) { + std::swap(BB, RHS.BB); + std::swap(Pred, RHS.Pred); + Values.swap(RHS.Values); + } +}; + +/// \brief This assigns and keeps a per-bb relative ordering of load/store +/// instructions in the block that directly load or store an alloca. +/// +/// This functionality is important because it avoids scanning large basic +/// blocks multiple times when promoting many allocas in the same block. +class LargeBlockInfo { + /// \brief For each instruction that we track, keep the index of the + /// instruction. /// - /// This functionality is important because it avoids scanning large basic - /// blocks multiple times when promoting many allocas in the same block. - class LargeBlockInfo { - /// InstNumbers - For each instruction that we track, keep the index of the - /// instruction. The index starts out as the number of the instruction from - /// the start of the block. - DenseMap<const Instruction *, unsigned> InstNumbers; - public: - - /// isInterestingInstruction - This code only looks at accesses to allocas. - static bool isInterestingInstruction(const Instruction *I) { - return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) || - (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1))); - } - - /// getInstructionIndex - Get or calculate the index of the specified - /// instruction. - unsigned getInstructionIndex(const Instruction *I) { - assert(isInterestingInstruction(I) && - "Not a load/store to/from an alloca?"); - - // If we already have this instruction number, return it. - DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I); - if (It != InstNumbers.end()) return It->second; - - // Scan the whole block to get the instruction. This accumulates - // information for every interesting instruction in the block, in order to - // avoid gratuitus rescans. - const BasicBlock *BB = I->getParent(); - unsigned InstNo = 0; - for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end(); - BBI != E; ++BBI) - if (isInterestingInstruction(BBI)) - InstNumbers[BBI] = InstNo++; - It = InstNumbers.find(I); - - assert(It != InstNumbers.end() && "Didn't insert instruction?"); + /// The index starts out as the number of the instruction from the start of + /// the block. + DenseMap<const Instruction *, unsigned> InstNumbers; + +public: + + /// This code only looks at accesses to allocas. + static bool isInterestingInstruction(const Instruction *I) { + return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) || + (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1))); + } + + /// Get or calculate the index of the specified instruction. + unsigned getInstructionIndex(const Instruction *I) { + assert(isInterestingInstruction(I) && + "Not a load/store to/from an alloca?"); + + // If we already have this instruction number, return it. + DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I); + if (It != InstNumbers.end()) return It->second; - } - - void deleteValue(const Instruction *I) { - InstNumbers.erase(I); - } - - void clear() { - InstNumbers.clear(); - } - }; - - struct PromoteMem2Reg { - /// Allocas - The alloca instructions being promoted. - /// - std::vector<AllocaInst*> Allocas; - DominatorTree &DT; - DIBuilder *DIB; - - /// AST - An AliasSetTracker object to update. If null, don't update it. - /// - AliasSetTracker *AST; - - /// AllocaLookup - Reverse mapping of Allocas. - /// - DenseMap<AllocaInst*, unsigned> AllocaLookup; - - /// NewPhiNodes - The PhiNodes we're adding. That map is used to simplify - /// some Phi nodes as we iterate over it, so it should have deterministic - /// iterators. We could use a MapVector, but since we already maintain a - /// map from BasicBlock* to a stable numbering (BBNumbers), the DenseMap is - /// more efficient (also supports removal). - /// - DenseMap<std::pair<unsigned, unsigned>, PHINode*> NewPhiNodes; - - /// PhiToAllocaMap - For each PHI node, keep track of which entry in Allocas - /// it corresponds to. - DenseMap<PHINode*, unsigned> PhiToAllocaMap; - - /// PointerAllocaValues - If we are updating an AliasSetTracker, then for - /// each alloca that is of pointer type, we keep track of what to copyValue - /// to the inserted PHI nodes here. - /// - std::vector<Value*> PointerAllocaValues; - - /// AllocaDbgDeclares - For each alloca, we keep track of the dbg.declare - /// intrinsic that describes it, if any, so that we can convert it to a - /// dbg.value intrinsic if the alloca gets promoted. - SmallVector<DbgDeclareInst*, 8> AllocaDbgDeclares; - - /// Visited - The set of basic blocks the renamer has already visited. - /// - SmallPtrSet<BasicBlock*, 16> Visited; - - /// BBNumbers - Contains a stable numbering of basic blocks to avoid - /// non-determinstic behavior. - DenseMap<BasicBlock*, unsigned> BBNumbers; - - /// DomLevels - Maps DomTreeNodes to their level in the dominator tree. - DenseMap<DomTreeNode*, unsigned> DomLevels; - - /// BBNumPreds - Lazily compute the number of predecessors a block has. - DenseMap<const BasicBlock*, unsigned> BBNumPreds; - public: - PromoteMem2Reg(const std::vector<AllocaInst*> &A, DominatorTree &dt, - AliasSetTracker *ast) - : Allocas(A), DT(dt), DIB(0), AST(ast) {} - ~PromoteMem2Reg() { - delete DIB; - } - void run(); + // Scan the whole block to get the instruction. This accumulates + // information for every interesting instruction in the block, in order to + // avoid gratuitus rescans. + const BasicBlock *BB = I->getParent(); + unsigned InstNo = 0; + for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end(); BBI != E; + ++BBI) + if (isInterestingInstruction(BBI)) + InstNumbers[BBI] = InstNo++; + It = InstNumbers.find(I); + + assert(It != InstNumbers.end() && "Didn't insert instruction?"); + return It->second; + } - /// dominates - Return true if BB1 dominates BB2 using the DominatorTree. - /// - bool dominates(BasicBlock *BB1, BasicBlock *BB2) const { - return DT.dominates(BB1, BB2); - } + void deleteValue(const Instruction *I) { InstNumbers.erase(I); } - private: - void RemoveFromAllocasList(unsigned &AllocaIdx) { - Allocas[AllocaIdx] = Allocas.back(); - Allocas.pop_back(); - --AllocaIdx; - } + void clear() { InstNumbers.clear(); } +}; + +struct PromoteMem2Reg { + /// The alloca instructions being promoted. + std::vector<AllocaInst *> Allocas; + DominatorTree &DT; + DIBuilder DIB; + const DataLayout *DL; + + /// An AliasSetTracker object to update. If null, don't update it. + AliasSetTracker *AST; + + /// Reverse mapping of Allocas. + DenseMap<AllocaInst *, unsigned> AllocaLookup; + + /// \brief The PhiNodes we're adding. + /// + /// That map is used to simplify some Phi nodes as we iterate over it, so + /// it should have deterministic iterators. We could use a MapVector, but + /// since we already maintain a map from BasicBlock* to a stable numbering + /// (BBNumbers), the DenseMap is more efficient (also supports removal). + DenseMap<std::pair<unsigned, unsigned>, PHINode *> NewPhiNodes; + + /// For each PHI node, keep track of which entry in Allocas it corresponds + /// to. + DenseMap<PHINode *, unsigned> PhiToAllocaMap; + + /// If we are updating an AliasSetTracker, then for each alloca that is of + /// pointer type, we keep track of what to copyValue to the inserted PHI + /// nodes here. + std::vector<Value *> PointerAllocaValues; + + /// For each alloca, we keep track of the dbg.declare intrinsic that + /// describes it, if any, so that we can convert it to a dbg.value + /// intrinsic if the alloca gets promoted. + SmallVector<DbgDeclareInst *, 8> AllocaDbgDeclares; + + /// The set of basic blocks the renamer has already visited. + /// + SmallPtrSet<BasicBlock *, 16> Visited; + + /// Contains a stable numbering of basic blocks to avoid non-determinstic + /// behavior. + DenseMap<BasicBlock *, unsigned> BBNumbers; + + /// Maps DomTreeNodes to their level in the dominator tree. + DenseMap<DomTreeNode *, unsigned> DomLevels; + + /// Lazily compute the number of predecessors a block has. + DenseMap<const BasicBlock *, unsigned> BBNumPreds; + +public: + PromoteMem2Reg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT, + const DataLayout *DL, AliasSetTracker *AST) + : Allocas(Allocas.begin(), Allocas.end()), DT(DT), + DIB(*DT.getRoot()->getParent()->getParent()), DL(DL), AST(AST) {} + + void run(); + +private: + void RemoveFromAllocasList(unsigned &AllocaIdx) { + Allocas[AllocaIdx] = Allocas.back(); + Allocas.pop_back(); + --AllocaIdx; + } + + unsigned getNumPreds(const BasicBlock *BB) { + unsigned &NP = BBNumPreds[BB]; + if (NP == 0) + NP = std::distance(pred_begin(BB), pred_end(BB)) + 1; + return NP - 1; + } - unsigned getNumPreds(const BasicBlock *BB) { - unsigned &NP = BBNumPreds[BB]; - if (NP == 0) - NP = std::distance(pred_begin(BB), pred_end(BB))+1; - return NP-1; + void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, + AllocaInfo &Info); + void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, + const SmallPtrSet<BasicBlock *, 32> &DefBlocks, + SmallPtrSet<BasicBlock *, 32> &LiveInBlocks); + void RenamePass(BasicBlock *BB, BasicBlock *Pred, + RenamePassData::ValVector &IncVals, + std::vector<RenamePassData> &Worklist); + bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version); +}; + +} // end of anonymous namespace + +/// \brief Walk a small vector of dead instructions and recursively remove them +/// and subsequently dead instructions. +/// +/// This is only valid to call on dead instructions using an alloca which is +/// promotable, as we leverage that assumption to delete them faster. +static void removeDeadInstructions(AllocaInst *AI, + SmallVectorImpl<Instruction *> &DeadInsts) { + while (!DeadInsts.empty()) { + Instruction *I = DeadInsts.pop_back_val(); + + // Don't delete the alloca itself. + if (I == AI) + continue; + + // Note that we open code the deletion algorithm here because we know + // apriori that all of the instructions using an alloca that reaches here + // are trivially dead when their use list becomes empty (The only risk are + // lifetime markers which we specifically want to nuke). By coding it here + // we can skip the triviality test and be more efficient. + // + // Null out all of the instruction's operands to see if any operand becomes + // dead as we go. + for (User::op_iterator OI = I->op_begin(), OE = I->op_end(); OI != OE; + ++OI) { + Instruction *Op = dyn_cast<Instruction>(*OI); + if (!Op) + continue; + + OI->set(0); + if (!Op->use_empty()) + continue; + + DeadInsts.push_back(Op); } + I->eraseFromParent(); + } +} + +/// \brief Rewrite as many loads as possible given a single store. +/// +/// When there is only a single store, we can use the domtree to trivially +/// replace all of the dominated loads with the stored value. Do so, and return +/// true if this has successfully promoted the alloca entirely. If this returns +/// false there were some loads which were not dominated by the single store +/// and thus must be phi-ed with undef. We fall back to the standard alloca +/// promotion algorithm in that case. +static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, + LargeBlockInfo &LBI, + DominatorTree &DT, + AliasSetTracker *AST) { + StoreInst *OnlyStore = Info.OnlyStore; + bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0)); + BasicBlock *StoreBB = OnlyStore->getParent(); + int StoreIndex = -1; + + // Clear out UsingBlocks. We will reconstruct it here if needed. + Info.UsingBlocks.clear(); - void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, - AllocaInfo &Info); - void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, - const SmallPtrSet<BasicBlock*, 32> &DefBlocks, - SmallPtrSet<BasicBlock*, 32> &LiveInBlocks); - - void RewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, - LargeBlockInfo &LBI); - void PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, - LargeBlockInfo &LBI); - - void RenamePass(BasicBlock *BB, BasicBlock *Pred, - RenamePassData::ValVector &IncVals, - std::vector<RenamePassData> &Worklist); - bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version); - }; - - struct AllocaInfo { - SmallVector<BasicBlock*, 32> DefiningBlocks; - SmallVector<BasicBlock*, 32> UsingBlocks; - - StoreInst *OnlyStore; - BasicBlock *OnlyBlock; - bool OnlyUsedInOneBlock; - - Value *AllocaPointerVal; - DbgDeclareInst *DbgDeclare; - - void clear() { - DefiningBlocks.clear(); - UsingBlocks.clear(); - OnlyStore = 0; - OnlyBlock = 0; - OnlyUsedInOneBlock = true; - AllocaPointerVal = 0; - DbgDeclare = 0; + for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) { + Instruction *UserInst = cast<Instruction>(*UI++); + if (!isa<LoadInst>(UserInst)) { + assert(UserInst == OnlyStore && "Should only have load/stores"); + continue; } - - /// AnalyzeAlloca - Scan the uses of the specified alloca, filling in our - /// ivars. - void AnalyzeAlloca(AllocaInst *AI) { - clear(); - - // As we scan the uses of the alloca instruction, keep track of stores, - // and decide whether all of the loads and stores to the alloca are within - // the same basic block. - for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); - UI != E;) { - Instruction *User = cast<Instruction>(*UI++); - - if (StoreInst *SI = dyn_cast<StoreInst>(User)) { - // Remember the basic blocks which define new values for the alloca - DefiningBlocks.push_back(SI->getParent()); - AllocaPointerVal = SI->getOperand(0); - OnlyStore = SI; - } else { - LoadInst *LI = cast<LoadInst>(User); - // Otherwise it must be a load instruction, keep track of variable - // reads. - UsingBlocks.push_back(LI->getParent()); - AllocaPointerVal = LI; - } - - if (OnlyUsedInOneBlock) { - if (OnlyBlock == 0) - OnlyBlock = User->getParent(); - else if (OnlyBlock != User->getParent()) - OnlyUsedInOneBlock = false; + LoadInst *LI = cast<LoadInst>(UserInst); + + // Okay, if we have a load from the alloca, we want to replace it with the + // only value stored to the alloca. We can do this if the value is + // dominated by the store. If not, we use the rest of the mem2reg machinery + // to insert the phi nodes as needed. + if (!StoringGlobalVal) { // Non-instructions are always dominated. + if (LI->getParent() == StoreBB) { + // If we have a use that is in the same block as the store, compare the + // indices of the two instructions to see which one came first. If the + // load came before the store, we can't handle it. + if (StoreIndex == -1) + StoreIndex = LBI.getInstructionIndex(OnlyStore); + + if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) { + // Can't handle this load, bail out. + Info.UsingBlocks.push_back(StoreBB); + continue; } + + } else if (LI->getParent() != StoreBB && + !DT.dominates(StoreBB, LI->getParent())) { + // If the load and store are in different blocks, use BB dominance to + // check their relationships. If the store doesn't dom the use, bail + // out. + Info.UsingBlocks.push_back(LI->getParent()); + continue; } - - DbgDeclare = FindAllocaDbgDeclare(AI); } - }; - typedef std::pair<DomTreeNode*, unsigned> DomTreeNodePair; + // Otherwise, we *can* safely rewrite this load. + Value *ReplVal = OnlyStore->getOperand(0); + // If the replacement value is the load, this must occur in unreachable + // code. + if (ReplVal == LI) + ReplVal = UndefValue::get(LI->getType()); + LI->replaceAllUsesWith(ReplVal); + if (AST && LI->getType()->isPointerTy()) + AST->deleteValue(LI); + LI->eraseFromParent(); + LBI.deleteValue(LI); + } + + // Finally, after the scan, check to see if the store is all that is left. + if (!Info.UsingBlocks.empty()) + return false; // If not, we'll have to fall back for the remainder. - struct DomTreeNodeCompare { - bool operator()(const DomTreeNodePair &LHS, const DomTreeNodePair &RHS) { - return LHS.second < RHS.second; - } - }; -} // end of anonymous namespace - -static void removeLifetimeIntrinsicUsers(AllocaInst *AI) { - // Knowing that this alloca is promotable, we know that it's safe to kill all - // instructions except for load and store. - - for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end(); - UI != UE;) { - Instruction *I = cast<Instruction>(*UI); - ++UI; - if (isa<LoadInst>(I) || isa<StoreInst>(I)) + // Record debuginfo for the store and remove the declaration's + // debuginfo. + if (DbgDeclareInst *DDI = Info.DbgDeclare) { + DIBuilder DIB(*AI->getParent()->getParent()->getParent()); + ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, DIB); + DDI->eraseFromParent(); + } + // Remove the (now dead) store and alloca. + Info.OnlyStore->eraseFromParent(); + LBI.deleteValue(Info.OnlyStore); + + if (AST) + AST->deleteValue(AI); + AI->eraseFromParent(); + LBI.deleteValue(AI); + return true; +} + +namespace { +/// This is a helper predicate used to search by the first element of a pair. +struct StoreIndexSearchPredicate { + bool operator()(const std::pair<unsigned, StoreInst *> &LHS, + const std::pair<unsigned, StoreInst *> &RHS) { + return LHS.first < RHS.first; + } +}; +} + +/// Many allocas are only used within a single basic block. If this is the +/// case, avoid traversing the CFG and inserting a lot of potentially useless +/// PHI nodes by just performing a single linear pass over the basic block +/// using the Alloca. +/// +/// If we cannot promote this alloca (because it is read before it is written), +/// return true. This is necessary in cases where, due to control flow, the +/// alloca is potentially undefined on some control flow paths. e.g. code like +/// this is potentially correct: +/// +/// for (...) { if (c) { A = undef; undef = B; } } +/// +/// ... so long as A is not used before undef is set. +static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, + LargeBlockInfo &LBI, + AliasSetTracker *AST) { + // The trickiest case to handle is when we have large blocks. Because of this, + // this code is optimized assuming that large blocks happen. This does not + // significantly pessimize the small block case. This uses LargeBlockInfo to + // make it efficient to get the index of various operations in the block. + + // Walk the use-def list of the alloca, getting the locations of all stores. + typedef SmallVector<std::pair<unsigned, StoreInst *>, 64> StoresByIndexTy; + StoresByIndexTy StoresByIndex; + + for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; + ++UI) + if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) + StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI)); + + // Sort the stores by their index, making it efficient to do a lookup with a + // binary search. + std::sort(StoresByIndex.begin(), StoresByIndex.end(), + StoreIndexSearchPredicate()); + + // Walk all of the loads from this alloca, replacing them with the nearest + // store above them, if any. + for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) { + LoadInst *LI = dyn_cast<LoadInst>(*UI++); + if (!LI) continue; - if (!I->getType()->isVoidTy()) { - // The only users of this bitcast/GEP instruction are lifetime intrinsics. - // Follow the use/def chain to erase them now instead of leaving it for - // dead code elimination later. - for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); - UI != UE;) { - Instruction *Inst = cast<Instruction>(*UI); - ++UI; - Inst->eraseFromParent(); - } + unsigned LoadIdx = LBI.getInstructionIndex(LI); + + // Find the nearest store that has a lower index than this load. + StoresByIndexTy::iterator I = + std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(), + std::make_pair(LoadIdx, static_cast<StoreInst *>(0)), + StoreIndexSearchPredicate()); + + if (I == StoresByIndex.begin()) + // If there is no store before this load, the load takes the undef value. + LI->replaceAllUsesWith(UndefValue::get(LI->getType())); + else + // Otherwise, there was a store before this load, the load takes its value. + LI->replaceAllUsesWith(llvm::prior(I)->second->getOperand(0)); + + if (AST && LI->getType()->isPointerTy()) + AST->deleteValue(LI); + LI->eraseFromParent(); + LBI.deleteValue(LI); + } + + // Remove the (now dead) stores and alloca. + while (!AI->use_empty()) { + StoreInst *SI = cast<StoreInst>(AI->use_back()); + // Record debuginfo for the store before removing it. + if (DbgDeclareInst *DDI = Info.DbgDeclare) { + DIBuilder DIB(*AI->getParent()->getParent()->getParent()); + ConvertDebugDeclareToDebugValue(DDI, SI, DIB); } - I->eraseFromParent(); + SI->eraseFromParent(); + LBI.deleteValue(SI); } + + if (AST) + AST->deleteValue(AI); + AI->eraseFromParent(); + LBI.deleteValue(AI); + + // The alloca's debuginfo can be removed as well. + if (DbgDeclareInst *DDI = Info.DbgDeclare) + DDI->eraseFromParent(); + + ++NumLocalPromoted; } void PromoteMem2Reg::run() { Function &F = *DT.getRoot()->getParent(); - if (AST) PointerAllocaValues.resize(Allocas.size()); + if (AST) + PointerAllocaValues.resize(Allocas.size()); AllocaDbgDeclares.resize(Allocas.size()); - AllocaInfo Info; + AllocaInfo Info(DL); LargeBlockInfo LBI; for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) { AllocaInst *AI = Allocas[AllocaNum]; - assert(isAllocaPromotable(AI) && - "Cannot promote non-promotable alloca!"); assert(AI->getParent()->getParent() == &F && "All allocas should be in the same function, which is same as DF!"); - removeLifetimeIntrinsicUsers(AI); + // Calculate the set of read and write-locations for each alloca. This is + // analogous to finding the 'uses' and 'definitions' of each variable. + bool Good = Info.analyzeAlloca(*AI); + (void)Good; + assert(Good && "Cannot promote non-promotable alloca!"); + + // Nuke all of the dead instructions. + removeDeadInstructions(AI, Info.DeadInsts); if (AI->use_empty()) { // If there are no uses of the alloca, just delete it now. - if (AST) AST->deleteValue(AI); + if (AST) + AST->deleteValue(AI); AI->eraseFromParent(); // Remove the alloca from the Allocas list, since it has been processed @@ -417,83 +619,31 @@ void PromoteMem2Reg::run() { ++NumDeadAlloca; continue; } - - // Calculate the set of read and write-locations for each alloca. This is - // analogous to finding the 'uses' and 'definitions' of each variable. - Info.AnalyzeAlloca(AI); // If there is only a single store to this value, replace any loads of // it that are directly dominated by the definition with the value stored. if (Info.DefiningBlocks.size() == 1) { - RewriteSingleStoreAlloca(AI, Info, LBI); - - // Finally, after the scan, check to see if the store is all that is left. - if (Info.UsingBlocks.empty()) { - // Record debuginfo for the store and remove the declaration's - // debuginfo. - if (DbgDeclareInst *DDI = Info.DbgDeclare) { - if (!DIB) - DIB = new DIBuilder(*DDI->getParent()->getParent()->getParent()); - ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, *DIB); - DDI->eraseFromParent(); - } - // Remove the (now dead) store and alloca. - Info.OnlyStore->eraseFromParent(); - LBI.deleteValue(Info.OnlyStore); - - if (AST) AST->deleteValue(AI); - AI->eraseFromParent(); - LBI.deleteValue(AI); - + if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AST)) { // The alloca has been processed, move on. RemoveFromAllocasList(AllocaNum); - ++NumSingleStore; continue; } } - + // If the alloca is only read and written in one basic block, just perform a // linear sweep over the block to eliminate it. if (Info.OnlyUsedInOneBlock) { - PromoteSingleBlockAlloca(AI, Info, LBI); - - // Finally, after the scan, check to see if the stores are all that is - // left. - if (Info.UsingBlocks.empty()) { - - // Remove the (now dead) stores and alloca. - while (!AI->use_empty()) { - StoreInst *SI = cast<StoreInst>(AI->use_back()); - // Record debuginfo for the store before removing it. - if (DbgDeclareInst *DDI = Info.DbgDeclare) { - if (!DIB) - DIB = new DIBuilder(*SI->getParent()->getParent()->getParent()); - ConvertDebugDeclareToDebugValue(DDI, SI, *DIB); - } - SI->eraseFromParent(); - LBI.deleteValue(SI); - } - - if (AST) AST->deleteValue(AI); - AI->eraseFromParent(); - LBI.deleteValue(AI); - - // The alloca has been processed, move on. - RemoveFromAllocasList(AllocaNum); - - // The alloca's debuginfo can be removed as well. - if (DbgDeclareInst *DDI = Info.DbgDeclare) - DDI->eraseFromParent(); + promoteSingleBlockAlloca(AI, Info, LBI, AST); - ++NumLocalPromoted; - continue; - } + // The alloca has been processed, move on. + RemoveFromAllocasList(AllocaNum); + continue; } // If we haven't computed dominator tree levels, do so now. if (DomLevels.empty()) { - SmallVector<DomTreeNode*, 32> Worklist; + SmallVector<DomTreeNode *, 32> Worklist; DomTreeNode *Root = DT.getRootNode(); DomLevels[Root] = 0; @@ -522,10 +672,11 @@ void PromoteMem2Reg::run() { // stored into the alloca. if (AST) PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal; - + // Remember the dbg.declare intrinsic describing this alloca, if any. - if (Info.DbgDeclare) AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare; - + if (Info.DbgDeclare) + AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare; + // Keep the reverse mapping of the 'Allocas' array for the rename pass. AllocaLookup[Allocas[AllocaNum]] = AllocaNum; @@ -540,8 +691,7 @@ void PromoteMem2Reg::run() { return; // All of the allocas must have been trivial! LBI.clear(); - - + // Set the incoming values for the basic block to be null values for all of // the alloca's. We do this in case there is a load of a value that has not // been stored yet. In this case, it will get this null value. @@ -562,7 +712,7 @@ void PromoteMem2Reg::run() { // RenamePass may add new worklist entries. RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList); } while (!RenamePassWorkList.empty()); - + // The renamer uses the Visited set to avoid infinite loops. Clear it now. Visited.clear(); @@ -575,7 +725,8 @@ void PromoteMem2Reg::run() { // tree. Just delete the users now. if (!A->use_empty()) A->replaceAllUsesWith(UndefValue::get(A->getType())); - if (AST) AST->deleteValue(A); + if (AST) + AST->deleteValue(A); A->eraseFromParent(); } @@ -591,13 +742,15 @@ void PromoteMem2Reg::run() { bool EliminatedAPHI = true; while (EliminatedAPHI) { EliminatedAPHI = false; - + // Iterating over NewPhiNodes is deterministic, so it is safe to try to // simplify and RAUW them as we go. If it was not, we could add uses to // the values we replace with in a non deterministic order, thus creating // non deterministic def->use chains. - for (DenseMap<std::pair<unsigned, unsigned>, PHINode*>::iterator I = - NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) { + for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator + I = NewPhiNodes.begin(), + E = NewPhiNodes.end(); + I != E;) { PHINode *PN = I->second; // If this PHI node merges one value and/or undefs, get the value. @@ -613,15 +766,17 @@ void PromoteMem2Reg::run() { ++I; } } - + // At this point, the renamer has added entries to PHI nodes for all reachable // code. Unfortunately, there may be unreachable blocks which the renamer // hasn't traversed. If this is the case, the PHI nodes may not // have incoming values for all predecessors. Loop over all PHI nodes we have // created, inserting undef values if they are missing any incoming values. // - for (DenseMap<std::pair<unsigned, unsigned>, PHINode*>::iterator I = - NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) { + for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator + I = NewPhiNodes.begin(), + E = NewPhiNodes.end(); + I != E; ++I) { // We want to do this once per basic block. As such, only process a block // when we find the PHI that is the first entry in the block. PHINode *SomePHI = I->second; @@ -636,21 +791,20 @@ void PromoteMem2Reg::run() { continue; // Get the preds for BB. - SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB)); - + SmallVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB)); + // Ok, now we know that all of the PHI nodes are missing entries for some // basic blocks. Start by sorting the incoming predecessors for efficient // access. std::sort(Preds.begin(), Preds.end()); - + // Now we loop through all BB's which have entries in SomePHI and remove // them from the Preds list. for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) { // Do a log(n) search of the Preds list for the entry we want. - SmallVector<BasicBlock*, 16>::iterator EntIt = - std::lower_bound(Preds.begin(), Preds.end(), - SomePHI->getIncomingBlock(i)); - assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i)&& + SmallVectorImpl<BasicBlock *>::iterator EntIt = std::lower_bound( + Preds.begin(), Preds.end(), SomePHI->getIncomingBlock(i)); + assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i) && "PHI node has entry for a block which is not a predecessor!"); // Remove the entry @@ -670,39 +824,41 @@ void PromoteMem2Reg::run() { SomePHI->addIncoming(UndefVal, Preds[pred]); } } - + NewPhiNodes.clear(); } +/// \brief Determine which blocks the value is live in. +/// +/// These are blocks which lead to uses. Knowing this allows us to avoid +/// inserting PHI nodes into blocks which don't lead to uses (thus, the +/// inserted phi nodes would be dead). +void PromoteMem2Reg::ComputeLiveInBlocks( + AllocaInst *AI, AllocaInfo &Info, + const SmallPtrSet<BasicBlock *, 32> &DefBlocks, + SmallPtrSet<BasicBlock *, 32> &LiveInBlocks) { -/// ComputeLiveInBlocks - Determine which blocks the value is live in. These -/// are blocks which lead to uses. Knowing this allows us to avoid inserting -/// PHI nodes into blocks which don't lead to uses (thus, the inserted phi nodes -/// would be dead). -void PromoteMem2Reg:: -ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, - const SmallPtrSet<BasicBlock*, 32> &DefBlocks, - SmallPtrSet<BasicBlock*, 32> &LiveInBlocks) { - // To determine liveness, we must iterate through the predecessors of blocks // where the def is live. Blocks are added to the worklist if we need to // check their predecessors. Start with all the using blocks. - SmallVector<BasicBlock*, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(), - Info.UsingBlocks.end()); - + SmallVector<BasicBlock *, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(), + Info.UsingBlocks.end()); + // If any of the using blocks is also a definition block, check to see if the // definition occurs before or after the use. If it happens before the use, // the value isn't really live-in. for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) { BasicBlock *BB = LiveInBlockWorklist[i]; - if (!DefBlocks.count(BB)) continue; - + if (!DefBlocks.count(BB)) + continue; + // Okay, this is a block that both uses and defines the value. If the first // reference to the alloca is a def (store), then we know it isn't live-in. - for (BasicBlock::iterator I = BB->begin(); ; ++I) { + for (BasicBlock::iterator I = BB->begin();; ++I) { if (StoreInst *SI = dyn_cast<StoreInst>(I)) { - if (SI->getOperand(1) != AI) continue; - + if (SI->getOperand(1) != AI) + continue; + // We found a store to the alloca before a load. The alloca is not // actually live-in here. LiveInBlockWorklist[i] = LiveInBlockWorklist.back(); @@ -710,73 +866,86 @@ ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, --i, --e; break; } - + if (LoadInst *LI = dyn_cast<LoadInst>(I)) { - if (LI->getOperand(0) != AI) continue; - + if (LI->getOperand(0) != AI) + continue; + // Okay, we found a load before a store to the alloca. It is actually // live into this block. break; } } } - + // Now that we have a set of blocks where the phi is live-in, recursively add // their predecessors until we find the full region the value is live. while (!LiveInBlockWorklist.empty()) { BasicBlock *BB = LiveInBlockWorklist.pop_back_val(); - + // The block really is live in here, insert it into the set. If already in // the set, then it has already been processed. if (!LiveInBlocks.insert(BB)) continue; - + // Since the value is live into BB, it is either defined in a predecessor or // live into it to. Add the preds to the worklist unless they are a // defining block. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { BasicBlock *P = *PI; - + // The value is not live into a predecessor if it defines the value. if (DefBlocks.count(P)) continue; - + // Otherwise it is, add to the worklist. LiveInBlockWorklist.push_back(P); } } } -/// DetermineInsertionPoint - At this point, we're committed to promoting the -/// alloca using IDF's, and the standard SSA construction algorithm. Determine -/// which blocks need phi nodes and see if we can optimize out some work by -/// avoiding insertion of dead phi nodes. +namespace { +typedef std::pair<DomTreeNode *, unsigned> DomTreeNodePair; + +struct DomTreeNodeCompare { + bool operator()(const DomTreeNodePair &LHS, const DomTreeNodePair &RHS) { + return LHS.second < RHS.second; + } +}; +} // end anonymous namespace + +/// At this point, we're committed to promoting the alloca using IDF's, and the +/// standard SSA construction algorithm. Determine which blocks need phi nodes +/// and see if we can optimize out some work by avoiding insertion of dead phi +/// nodes. void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, AllocaInfo &Info) { // Unique the set of defining blocks for efficient lookup. - SmallPtrSet<BasicBlock*, 32> DefBlocks; + SmallPtrSet<BasicBlock *, 32> DefBlocks; DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end()); // Determine which blocks the value is live in. These are blocks which lead // to uses. - SmallPtrSet<BasicBlock*, 32> LiveInBlocks; + SmallPtrSet<BasicBlock *, 32> LiveInBlocks; ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks); // Use a priority queue keyed on dominator tree level so that inserted nodes // are handled from the bottom of the dominator tree upwards. - typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>, + typedef std::priority_queue<DomTreeNodePair, + SmallVector<DomTreeNodePair, 32>, DomTreeNodeCompare> IDFPriorityQueue; IDFPriorityQueue PQ; - for (SmallPtrSet<BasicBlock*, 32>::const_iterator I = DefBlocks.begin(), - E = DefBlocks.end(); I != E; ++I) { + for (SmallPtrSet<BasicBlock *, 32>::const_iterator I = DefBlocks.begin(), + E = DefBlocks.end(); + I != E; ++I) { if (DomTreeNode *Node = DT.getNode(*I)) PQ.push(std::make_pair(Node, DomLevels[Node])); } - SmallVector<std::pair<unsigned, BasicBlock*>, 32> DFBlocks; - SmallPtrSet<DomTreeNode*, 32> Visited; - SmallVector<DomTreeNode*, 32> Worklist; + SmallVector<std::pair<unsigned, BasicBlock *>, 32> DFBlocks; + SmallPtrSet<DomTreeNode *, 32> Visited; + SmallVector<DomTreeNode *, 32> Worklist; while (!PQ.empty()) { DomTreeNodePair RootPair = PQ.top(); PQ.pop(); @@ -836,179 +1005,22 @@ void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, QueuePhiNode(DFBlocks[i].second, AllocaNum, CurrentVersion); } -/// RewriteSingleStoreAlloca - If there is only a single store to this value, -/// replace any loads of it that are directly dominated by the definition with -/// the value stored. -void PromoteMem2Reg::RewriteSingleStoreAlloca(AllocaInst *AI, - AllocaInfo &Info, - LargeBlockInfo &LBI) { - StoreInst *OnlyStore = Info.OnlyStore; - bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0)); - BasicBlock *StoreBB = OnlyStore->getParent(); - int StoreIndex = -1; - - // Clear out UsingBlocks. We will reconstruct it here if needed. - Info.UsingBlocks.clear(); - - for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ) { - Instruction *UserInst = cast<Instruction>(*UI++); - if (!isa<LoadInst>(UserInst)) { - assert(UserInst == OnlyStore && "Should only have load/stores"); - continue; - } - LoadInst *LI = cast<LoadInst>(UserInst); - - // Okay, if we have a load from the alloca, we want to replace it with the - // only value stored to the alloca. We can do this if the value is - // dominated by the store. If not, we use the rest of the mem2reg machinery - // to insert the phi nodes as needed. - if (!StoringGlobalVal) { // Non-instructions are always dominated. - if (LI->getParent() == StoreBB) { - // If we have a use that is in the same block as the store, compare the - // indices of the two instructions to see which one came first. If the - // load came before the store, we can't handle it. - if (StoreIndex == -1) - StoreIndex = LBI.getInstructionIndex(OnlyStore); - - if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) { - // Can't handle this load, bail out. - Info.UsingBlocks.push_back(StoreBB); - continue; - } - - } else if (LI->getParent() != StoreBB && - !dominates(StoreBB, LI->getParent())) { - // If the load and store are in different blocks, use BB dominance to - // check their relationships. If the store doesn't dom the use, bail - // out. - Info.UsingBlocks.push_back(LI->getParent()); - continue; - } - } - - // Otherwise, we *can* safely rewrite this load. - Value *ReplVal = OnlyStore->getOperand(0); - // If the replacement value is the load, this must occur in unreachable - // code. - if (ReplVal == LI) - ReplVal = UndefValue::get(LI->getType()); - LI->replaceAllUsesWith(ReplVal); - if (AST && LI->getType()->isPointerTy()) - AST->deleteValue(LI); - LI->eraseFromParent(); - LBI.deleteValue(LI); - } -} - -namespace { - -/// StoreIndexSearchPredicate - This is a helper predicate used to search by the -/// first element of a pair. -struct StoreIndexSearchPredicate { - bool operator()(const std::pair<unsigned, StoreInst*> &LHS, - const std::pair<unsigned, StoreInst*> &RHS) { - return LHS.first < RHS.first; - } -}; - -} - -/// PromoteSingleBlockAlloca - Many allocas are only used within a single basic -/// block. If this is the case, avoid traversing the CFG and inserting a lot of -/// potentially useless PHI nodes by just performing a single linear pass over -/// the basic block using the Alloca. -/// -/// If we cannot promote this alloca (because it is read before it is written), -/// return true. This is necessary in cases where, due to control flow, the -/// alloca is potentially undefined on some control flow paths. e.g. code like -/// this is potentially correct: -/// -/// for (...) { if (c) { A = undef; undef = B; } } -/// -/// ... so long as A is not used before undef is set. +/// \brief Queue a phi-node to be added to a basic-block for a specific Alloca. /// -void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, - LargeBlockInfo &LBI) { - // The trickiest case to handle is when we have large blocks. Because of this, - // this code is optimized assuming that large blocks happen. This does not - // significantly pessimize the small block case. This uses LargeBlockInfo to - // make it efficient to get the index of various operations in the block. - - // Clear out UsingBlocks. We will reconstruct it here if needed. - Info.UsingBlocks.clear(); - - // Walk the use-def list of the alloca, getting the locations of all stores. - typedef SmallVector<std::pair<unsigned, StoreInst*>, 64> StoresByIndexTy; - StoresByIndexTy StoresByIndex; - - for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); - UI != E; ++UI) - if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) - StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI)); - - // If there are no stores to the alloca, just replace any loads with undef. - if (StoresByIndex.empty()) { - for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) - if (LoadInst *LI = dyn_cast<LoadInst>(*UI++)) { - LI->replaceAllUsesWith(UndefValue::get(LI->getType())); - if (AST && LI->getType()->isPointerTy()) - AST->deleteValue(LI); - LBI.deleteValue(LI); - LI->eraseFromParent(); - } - return; - } - - // Sort the stores by their index, making it efficient to do a lookup with a - // binary search. - std::sort(StoresByIndex.begin(), StoresByIndex.end()); - - // Walk all of the loads from this alloca, replacing them with the nearest - // store above them, if any. - for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) { - LoadInst *LI = dyn_cast<LoadInst>(*UI++); - if (!LI) continue; - - unsigned LoadIdx = LBI.getInstructionIndex(LI); - - // Find the nearest store that has a lower than this load. - StoresByIndexTy::iterator I = - std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(), - std::pair<unsigned, StoreInst*>(LoadIdx, static_cast<StoreInst*>(0)), - StoreIndexSearchPredicate()); - - // If there is no store before this load, then we can't promote this load. - if (I == StoresByIndex.begin()) { - // Can't handle this load, bail out. - Info.UsingBlocks.push_back(LI->getParent()); - continue; - } - - // Otherwise, there was a store before this load, the load takes its value. - --I; - LI->replaceAllUsesWith(I->second->getOperand(0)); - if (AST && LI->getType()->isPointerTy()) - AST->deleteValue(LI); - LI->eraseFromParent(); - LBI.deleteValue(LI); - } -} - -// QueuePhiNode - queues a phi-node to be added to a basic-block for a specific -// Alloca returns true if there wasn't already a phi-node for that variable -// +/// Returns true if there wasn't already a phi-node for that variable bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, unsigned &Version) { // Look up the basic-block in question. PHINode *&PN = NewPhiNodes[std::make_pair(BBNumbers[BB], AllocaNo)]; // If the BB already has a phi node added for the i'th alloca then we're done! - if (PN) return false; + if (PN) + return false; // Create a PhiNode using the dereferenced type... and add the phi-node to the // BasicBlock. PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB), - Allocas[AllocaNo]->getName() + "." + Twine(Version++), + Allocas[AllocaNo]->getName() + "." + Twine(Version++), BB->begin()); ++NumPHIInsert; PhiToAllocaMap[PN] = AllocaNo; @@ -1019,10 +1031,11 @@ bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, return true; } -// RenamePass - Recursively traverse the CFG of the function, renaming loads and -// stores to the allocas which we are promoting. IncomingVals indicates what -// value each Alloca contains on exit from the predecessor block Pred. -// +/// \brief Recursively traverse the CFG of the function, renaming loads and +/// stores to the allocas which we are promoting. +/// +/// IncomingVals indicates what value each Alloca contains on exit from the +/// predecessor block Pred. void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred, RenamePassData::ValVector &IncomingVals, std::vector<RenamePassData> &Worklist) { @@ -1040,48 +1053,49 @@ NextIteration: // inserted by this pass of mem2reg will have the same number of incoming // operands so far. Remember this count. unsigned NewPHINumOperands = APN->getNumOperands(); - - unsigned NumEdges = 0; - for (succ_iterator I = succ_begin(Pred), E = succ_end(Pred); I != E; ++I) - if (*I == BB) - ++NumEdges; + + unsigned NumEdges = std::count(succ_begin(Pred), succ_end(Pred), BB); assert(NumEdges && "Must be at least one edge from Pred to BB!"); - + // Add entries for all the phis. BasicBlock::iterator PNI = BB->begin(); do { unsigned AllocaNo = PhiToAllocaMap[APN]; - + // Add N incoming values to the PHI node. for (unsigned i = 0; i != NumEdges; ++i) APN->addIncoming(IncomingVals[AllocaNo], Pred); - + // The currently active variable for this block is now the PHI. IncomingVals[AllocaNo] = APN; - + // Get the next phi node. ++PNI; APN = dyn_cast<PHINode>(PNI); - if (APN == 0) break; - + if (APN == 0) + break; + // Verify that it is missing entries. If not, it is not being inserted // by this mem2reg invocation so we want to ignore it. } while (APN->getNumOperands() == NewPHINumOperands); } } - + // Don't revisit blocks. - if (!Visited.insert(BB)) return; + if (!Visited.insert(BB)) + return; - for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II); ) { + for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II);) { Instruction *I = II++; // get the instruction, increment iterator if (LoadInst *LI = dyn_cast<LoadInst>(I)) { AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand()); - if (!Src) continue; - - DenseMap<AllocaInst*, unsigned>::iterator AI = AllocaLookup.find(Src); - if (AI == AllocaLookup.end()) continue; + if (!Src) + continue; + + DenseMap<AllocaInst *, unsigned>::iterator AI = AllocaLookup.find(Src); + if (AI == AllocaLookup.end()) + continue; Value *V = IncomingVals[AI->second]; @@ -1094,30 +1108,29 @@ NextIteration: // Delete this instruction and mark the name as the current holder of the // value AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand()); - if (!Dest) continue; - + if (!Dest) + continue; + DenseMap<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest); if (ai == AllocaLookup.end()) continue; - + // what value were we writing? IncomingVals[ai->second] = SI->getOperand(0); // Record debuginfo for the store before removing it. - if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second]) { - if (!DIB) - DIB = new DIBuilder(*SI->getParent()->getParent()->getParent()); - ConvertDebugDeclareToDebugValue(DDI, SI, *DIB); - } + if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second]) + ConvertDebugDeclareToDebugValue(DDI, SI, DIB); BB->getInstList().erase(SI); } } // 'Recurse' to our successors. succ_iterator I = succ_begin(BB), E = succ_end(BB); - if (I == E) return; + if (I == E) + return; // Keep track of the successors so we don't visit the same successor twice - SmallPtrSet<BasicBlock*, 8> VisitedSuccs; + SmallPtrSet<BasicBlock *, 8> VisitedSuccs; // Handle the first successor without using the worklist. VisitedSuccs.insert(*I); @@ -1132,18 +1145,19 @@ NextIteration: goto NextIteration; } -/// PromoteMemToReg - Promote the specified list of alloca instructions into -/// scalar registers, inserting PHI nodes as appropriate. This function does -/// not modify the CFG of the function at all. All allocas must be from the -/// same function. -/// -/// If AST is specified, the specified tracker is updated to reflect changes -/// made to the IR. -/// -void llvm::PromoteMemToReg(const std::vector<AllocaInst*> &Allocas, - DominatorTree &DT, AliasSetTracker *AST) { +bool llvm::isAllocaPromotable(const AllocaInst *AI, const DataLayout *DL) { + // We cast away constness because we re-use the non-const analysis that the + // actual promotion routine uses. While it is non-const, it doesn't actually + // mutate anything at this phase, and we discard the non-const results that + // promotion uses to mutate the alloca. + return AllocaInfo(DL).analyzeAlloca(*const_cast<AllocaInst *>(AI)); +} + +void llvm::PromoteMemToReg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT, + const DataLayout *DL, AliasSetTracker *AST) { // If there is nothing to do, bail out... - if (Allocas.empty()) return; + if (Allocas.empty()) + return; - PromoteMem2Reg(Allocas, DT, AST).run(); + PromoteMem2Reg(Allocas, DT, DL, AST).run(); } diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index 9d90fbe..fc85ef3 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -42,8 +42,6 @@ SSAUpdater::~SSAUpdater() { delete static_cast<AvailableValsTy*>(AV); } -/// Initialize - Reset this object to get ready for a new set of SSA -/// updates with type 'Ty'. PHI nodes get a name based on 'Name'. void SSAUpdater::Initialize(Type *Ty, StringRef Name) { if (AV == 0) AV = new AvailableValsTy(); @@ -53,14 +51,10 @@ void SSAUpdater::Initialize(Type *Ty, StringRef Name) { ProtoName = Name; } -/// HasValueForBlock - Return true if the SSAUpdater already has a value for -/// the specified block. bool SSAUpdater::HasValueForBlock(BasicBlock *BB) const { return getAvailableVals(AV).count(BB); } -/// AddAvailableValue - Indicate that a rewritten value is available in the -/// specified block with the specified value. void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) { assert(ProtoType != 0 && "Need to initialize SSAUpdater"); assert(ProtoType == V->getType() && @@ -68,8 +62,6 @@ void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) { getAvailableVals(AV)[BB] = V; } -/// IsEquivalentPHI - Check if PHI has the same incoming value as specified -/// in ValueMapping for each predecessor block. static bool IsEquivalentPHI(PHINode *PHI, DenseMap<BasicBlock*, Value*> &ValueMapping) { unsigned PHINumValues = PHI->getNumIncomingValues(); @@ -86,32 +78,11 @@ static bool IsEquivalentPHI(PHINode *PHI, return true; } -/// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is -/// live at the end of the specified block. Value *SSAUpdater::GetValueAtEndOfBlock(BasicBlock *BB) { Value *Res = GetValueAtEndOfBlockInternal(BB); return Res; } -/// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that -/// is live in the middle of the specified block. -/// -/// GetValueInMiddleOfBlock is the same as GetValueAtEndOfBlock except in one -/// important case: if there is a definition of the rewritten value after the -/// 'use' in BB. Consider code like this: -/// -/// X1 = ... -/// SomeBB: -/// use(X) -/// X2 = ... -/// br Cond, SomeBB, OutBB -/// -/// In this case, there are two values (X1 and X2) added to the AvailableVals -/// set by the client of the rewriter, and those values are both live out of -/// their respective blocks. However, the use of X happens in the *middle* of -/// a block. Because of this, we need to insert a new PHI node in SomeBB to -/// merge the appropriate values, and this value isn't live out of the block. -/// Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { // If there is no definition of the renamed variable in this block, just use // GetValueAtEndOfBlock to do our work. @@ -203,8 +174,6 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { return InsertedPHI; } -/// RewriteUse - Rewrite a use of the symbolic value. This handles PHI nodes, -/// which use their value in the corresponding predecessor. void SSAUpdater::RewriteUse(Use &U) { Instruction *User = cast<Instruction>(U.getUser()); @@ -222,10 +191,6 @@ void SSAUpdater::RewriteUse(Use &U) { U.set(V); } -/// RewriteUseAfterInsertions - Rewrite a use, just like RewriteUse. However, -/// this version of the method can rewrite uses in the same block as a -/// definition, because it assumes that all uses of a value are below any -/// inserted values. void SSAUpdater::RewriteUseAfterInsertions(Use &U) { Instruction *User = cast<Instruction>(U.getUser()); @@ -238,8 +203,6 @@ void SSAUpdater::RewriteUseAfterInsertions(Use &U) { U.set(V); } -/// SSAUpdaterTraits<SSAUpdater> - Traits for the SSAUpdaterImpl template, -/// specialized for SSAUpdater. namespace llvm { template<> class SSAUpdaterTraits<SSAUpdater> { @@ -342,10 +305,9 @@ public: } // End llvm namespace -/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry -/// for the specified BB and if so, return it. If not, construct SSA form by -/// first calculating the required placement of PHIs and then inserting new -/// PHIs where needed. +/// Check to see if AvailableVals has an entry for the specified BB and if so, +/// return it. If not, construct SSA form by first calculating the required +/// placement of PHIs and then inserting new PHIs where needed. Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { AvailableValsTy &AvailableVals = getAvailableVals(AV); if (Value *V = AvailableVals[BB]) diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 681bf9c..c4c1423 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -40,12 +40,14 @@ #include "llvm/Support/ConstantRange.h" #include "llvm/Support/Debug.h" #include "llvm/Support/NoFolder.h" +#include "llvm/Support/PatternMatch.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include <algorithm> #include <map> #include <set> using namespace llvm; +using namespace PatternMatch; static cl::opt<unsigned> PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(1), @@ -59,6 +61,10 @@ static cl::opt<bool> SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block")); +static cl::opt<bool> +HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), + cl::desc("Hoist conditional stores if an unconditional store preceeds")); + STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables"); STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block"); @@ -84,7 +90,6 @@ namespace { class SimplifyCFGOpt { const TargetTransformInfo &TTI; const DataLayout *const TD; - Value *isValueEqualityComparison(TerminatorInst *TI); BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI, std::vector<ValueEqualityComparisonCase> &Cases); @@ -190,94 +195,7 @@ static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, PN->addIncoming(PN->getIncomingValueForBlock(ExistPred), NewPred); } - -/// GetIfCondition - Given a basic block (BB) with two predecessors (and at -/// least one PHI node in it), check to see if the merge at this block is due -/// to an "if condition". If so, return the boolean condition that determines -/// which entry into BB will be taken. Also, return by references the block -/// that will be entered from if the condition is true, and the block that will -/// be entered if the condition is false. -/// -/// This does no checking to see if the true/false blocks have large or unsavory -/// instructions in them. -static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, - BasicBlock *&IfFalse) { - PHINode *SomePHI = cast<PHINode>(BB->begin()); - assert(SomePHI->getNumIncomingValues() == 2 && - "Function can only handle blocks with 2 predecessors!"); - BasicBlock *Pred1 = SomePHI->getIncomingBlock(0); - BasicBlock *Pred2 = SomePHI->getIncomingBlock(1); - - // We can only handle branches. Other control flow will be lowered to - // branches if possible anyway. - BranchInst *Pred1Br = dyn_cast<BranchInst>(Pred1->getTerminator()); - BranchInst *Pred2Br = dyn_cast<BranchInst>(Pred2->getTerminator()); - if (Pred1Br == 0 || Pred2Br == 0) - return 0; - - // Eliminate code duplication by ensuring that Pred1Br is conditional if - // either are. - if (Pred2Br->isConditional()) { - // If both branches are conditional, we don't have an "if statement". In - // reality, we could transform this case, but since the condition will be - // required anyway, we stand no chance of eliminating it, so the xform is - // probably not profitable. - if (Pred1Br->isConditional()) - return 0; - - std::swap(Pred1, Pred2); - std::swap(Pred1Br, Pred2Br); - } - - if (Pred1Br->isConditional()) { - // The only thing we have to watch out for here is to make sure that Pred2 - // doesn't have incoming edges from other blocks. If it does, the condition - // doesn't dominate BB. - if (Pred2->getSinglePredecessor() == 0) - return 0; - - // If we found a conditional branch predecessor, make sure that it branches - // to BB and Pred2Br. If it doesn't, this isn't an "if statement". - if (Pred1Br->getSuccessor(0) == BB && - Pred1Br->getSuccessor(1) == Pred2) { - IfTrue = Pred1; - IfFalse = Pred2; - } else if (Pred1Br->getSuccessor(0) == Pred2 && - Pred1Br->getSuccessor(1) == BB) { - IfTrue = Pred2; - IfFalse = Pred1; - } else { - // We know that one arm of the conditional goes to BB, so the other must - // go somewhere unrelated, and this must not be an "if statement". - return 0; - } - - return Pred1Br->getCondition(); - } - - // Ok, if we got here, both predecessors end with an unconditional branch to - // BB. Don't panic! If both blocks only have a single (identical) - // predecessor, and THAT is a conditional branch, then we're all ok! - BasicBlock *CommonPred = Pred1->getSinglePredecessor(); - if (CommonPred == 0 || CommonPred != Pred2->getSinglePredecessor()) - return 0; - - // Otherwise, if this is a conditional branch, then we can use it! - BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator()); - if (BI == 0) return 0; - - assert(BI->isConditional() && "Two successors but not conditional?"); - if (BI->getSuccessor(0) == Pred1) { - IfTrue = Pred1; - IfFalse = Pred2; - } else { - IfTrue = Pred2; - IfFalse = Pred1; - } - return BI->getCondition(); -} - -/// ComputeSpeculuationCost - Compute an abstract "cost" of speculating the +/// ComputeSpeculationCost - Compute an abstract "cost" of speculating the /// given instruction, which is assumed to be safe to speculate. 1 means /// cheap, 2 means less cheap, and UINT_MAX means prohibitively expensive. static unsigned ComputeSpeculationCost(const User *I) { @@ -428,7 +346,24 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra, // If this is an icmp against a constant, handle this as one of the cases. if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) { if (ConstantInt *C = GetConstantInt(I->getOperand(1), TD)) { + Value *RHSVal; + ConstantInt *RHSC; + if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ:ICmpInst::ICMP_NE)) { + // (x & ~2^x) == y --> x == y || x == y|2^x + // This undoes a transformation done by instcombine to fuse 2 compares. + if (match(ICI->getOperand(0), + m_And(m_Value(RHSVal), m_ConstantInt(RHSC)))) { + APInt Not = ~RHSC->getValue(); + if (Not.isPowerOf2()) { + Vals.push_back(C); + Vals.push_back( + ConstantInt::get(C->getContext(), C->getValue() | Not)); + UsedICmps++; + return RHSVal; + } + } + UsedICmps++; Vals.push_back(C); return I->getOperand(0); @@ -439,6 +374,13 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra, ConstantRange Span = ConstantRange::makeICmpRegion(ICI->getPredicate(), C->getValue()); + // Shift the range if the compare is fed by an add. This is the range + // compare idiom as emitted by instcombine. + bool hasAdd = + match(I->getOperand(0), m_Add(m_Value(RHSVal), m_ConstantInt(RHSC))); + if (hasAdd) + Span = Span.subtract(RHSC->getValue()); + // If this is an and/!= check then we want to optimize "x ugt 2" into // x != 0 && x != 1. if (!isEQ) @@ -451,7 +393,7 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra, for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp) Vals.push_back(ConstantInt::get(V->getContext(), Tmp)); UsedICmps++; - return I->getOperand(0); + return hasAdd ? RHSVal : I->getOperand(0); } return 0; } @@ -529,9 +471,7 @@ Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) { } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) if (BI->isConditional() && BI->getCondition()->hasOneUse()) if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) - if ((ICI->getPredicate() == ICmpInst::ICMP_EQ || - ICI->getPredicate() == ICmpInst::ICMP_NE) && - GetConstantInt(ICI->getOperand(1), TD)) + if (ICI->isEquality() && GetConstantInt(ICI->getOperand(1), TD)) CV = ICI->getOperand(0); // Unwrap any lossless ptrtoint cast. @@ -1079,9 +1019,9 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) { (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))) return false; - // If we get here, we can hoist at least one instruction. BasicBlock *BIParent = BI->getParent(); + bool Changed = false; do { // If we are hoisting the terminator instruction, don't move one (making a // broken BB), instead clone it, and remove BI. @@ -1096,6 +1036,7 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) { I2->replaceAllUsesWith(I1); I1->intersectOptionalDataWith(I2); I2->eraseFromParent(); + Changed = true; I1 = BB1_Itr++; I2 = BB2_Itr++; @@ -1115,7 +1056,23 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) { HoistTerminator: // It may not be possible to hoist an invoke. if (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)) - return true; + return Changed; + + for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) { + PHINode *PN; + for (BasicBlock::iterator BBI = SI->begin(); + (PN = dyn_cast<PHINode>(BBI)); ++BBI) { + Value *BB1V = PN->getIncomingValueForBlock(BB1); + Value *BB2V = PN->getIncomingValueForBlock(BB2); + if (BB1V == BB2V) + continue; + + if (isa<ConstantExpr>(BB1V) && !isSafeToSpeculativelyExecute(BB1V)) + return Changed; + if (isa<ConstantExpr>(BB2V) && !isSafeToSpeculativelyExecute(BB2V)) + return Changed; + } + } // Okay, it is safe to hoist the terminator. Instruction *NT = I1->clone(); @@ -1332,6 +1289,66 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { return Changed; } +/// \brief Determine if we can hoist sink a sole store instruction out of a +/// conditional block. +/// +/// We are looking for code like the following: +/// BrBB: +/// store i32 %add, i32* %arrayidx2 +/// ... // No other stores or function calls (we could be calling a memory +/// ... // function). +/// %cmp = icmp ult %x, %y +/// br i1 %cmp, label %EndBB, label %ThenBB +/// ThenBB: +/// store i32 %add5, i32* %arrayidx2 +/// br label EndBB +/// EndBB: +/// ... +/// We are going to transform this into: +/// BrBB: +/// store i32 %add, i32* %arrayidx2 +/// ... // +/// %cmp = icmp ult %x, %y +/// %add.add5 = select i1 %cmp, i32 %add, %add5 +/// store i32 %add.add5, i32* %arrayidx2 +/// ... +/// +/// \return The pointer to the value of the previous store if the store can be +/// hoisted into the predecessor block. 0 otherwise. +static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, + BasicBlock *StoreBB, BasicBlock *EndBB) { + StoreInst *StoreToHoist = dyn_cast<StoreInst>(I); + if (!StoreToHoist) + return 0; + + // Volatile or atomic. + if (!StoreToHoist->isSimple()) + return 0; + + Value *StorePtr = StoreToHoist->getPointerOperand(); + + // Look for a store to the same pointer in BrBB. + unsigned MaxNumInstToLookAt = 10; + for (BasicBlock::reverse_iterator RI = BrBB->rbegin(), + RE = BrBB->rend(); RI != RE && (--MaxNumInstToLookAt); ++RI) { + Instruction *CurI = &*RI; + + // Could be calling an instruction that effects memory like free(). + if (CurI->mayHaveSideEffects() && !isa<StoreInst>(CurI)) + return 0; + + StoreInst *SI = dyn_cast<StoreInst>(CurI); + // Found the previous store make sure it stores to the same location. + if (SI && SI->getPointerOperand() == StorePtr) + // Found the previous store, return its value operand. + return SI->getValueOperand(); + else if (SI) + return 0; // Unknown store. + } + + return 0; +} + /// \brief Speculate a conditional basic block flattening the CFG. /// /// Note that this is a very risky transform currently. Speculating @@ -1395,6 +1412,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts; unsigned SpeculationCost = 0; + Value *SpeculatedStoreValue = 0; + StoreInst *SpeculatedStore = 0; for (BasicBlock::iterator BBI = ThenBB->begin(), BBE = llvm::prior(ThenBB->end()); BBI != BBE; ++BBI) { @@ -1410,13 +1429,21 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { return false; // Don't hoist the instruction if it's unsafe or expensive. - if (!isSafeToSpeculativelyExecute(I)) + if (!isSafeToSpeculativelyExecute(I) && + !(HoistCondStores && + (SpeculatedStoreValue = isSafeToSpeculateStore(I, BB, ThenBB, + EndBB)))) return false; - if (ComputeSpeculationCost(I) > PHINodeFoldingThreshold) + if (!SpeculatedStoreValue && + ComputeSpeculationCost(I) > PHINodeFoldingThreshold) return false; + // Store the store speculation candidate. + if (SpeculatedStoreValue) + SpeculatedStore = cast<StoreInst>(I); + // Do not hoist the instruction if any of its operands are defined but not - // used in this BB. The transformation will prevent the operand from + // used in BB. The transformation will prevent the operand from // being sunk into the use block. for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) { @@ -1448,18 +1475,23 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { Value *OrigV = PN->getIncomingValueForBlock(BB); Value *ThenV = PN->getIncomingValueForBlock(ThenBB); + // FIXME: Try to remove some of the duplication with HoistThenElseCodeToIf. // Skip PHIs which are trivial. if (ThenV == OrigV) continue; HaveRewritablePHIs = true; - ConstantExpr *CE = dyn_cast<ConstantExpr>(ThenV); - if (!CE) + ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV); + ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV); + if (!OrigCE && !ThenCE) continue; // Known safe and cheap. - if (!isSafeToSpeculativelyExecute(CE)) + if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) || + (OrigCE && !isSafeToSpeculativelyExecute(OrigCE))) return false; - if (ComputeSpeculationCost(CE) > PHINodeFoldingThreshold) + unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE) : 0; + unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE) : 0; + if (OrigCost + ThenCost > 2 * PHINodeFoldingThreshold) return false; // Account for the cost of an unfolded ConstantExpr which could end up @@ -1473,12 +1505,24 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { // If there are no PHIs to process, bail early. This helps ensure idempotence // as well. - if (!HaveRewritablePHIs) + if (!HaveRewritablePHIs && !(HoistCondStores && SpeculatedStoreValue)) return false; // If we get here, we can hoist the instruction and if-convert. DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";); + // Insert a select of the value of the speculated store. + if (SpeculatedStoreValue) { + IRBuilder<true, NoFolder> Builder(BI); + Value *TrueV = SpeculatedStore->getValueOperand(); + Value *FalseV = SpeculatedStoreValue; + if (Invert) + std::swap(TrueV, FalseV); + Value *S = Builder.CreateSelect(BrCond, TrueV, FalseV, TrueV->getName() + + "." + FalseV->getName()); + SpeculatedStore->setOperand(0, S); + } + // Hoist the instructions. BB->getInstList().splice(BI, ThenBB->getInstList(), ThenBB->begin(), llvm::prior(ThenBB->end())); @@ -3073,7 +3117,12 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { Value *Sub = SI->getCondition(); if (!Offset->isNullValue()) Sub = Builder.CreateAdd(Sub, Offset, Sub->getName()+".off"); - Value *Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch"); + Value *Cmp; + // If NumCases overflowed, then all possible values jump to the successor. + if (NumCases->isNullValue() && SI->getNumCases() != 0) + Cmp = ConstantInt::getTrue(SI->getContext()); + else + Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch"); BranchInst *NewBI = Builder.CreateCondBr( Cmp, SI->case_begin().getCaseSuccessor(), SI->getDefaultDest()); @@ -3216,7 +3265,7 @@ static bool ForwardSwitchConditionToPHI(SwitchInst *SI) { for (ForwardingNodesMap::iterator I = ForwardingNodes.begin(), E = ForwardingNodes.end(); I != E; ++I) { PHINode *Phi = I->first; - SmallVector<int,4> &Indexes = I->second; + SmallVectorImpl<int> &Indexes = I->second; if (Indexes.size() < 2) continue; @@ -3301,11 +3350,12 @@ static Constant *ConstantFold(Instruction *I, /// at the common destination basic block, *CommonDest, for one of the case /// destionations CaseDest corresponding to value CaseVal (0 for the default /// case), of a switch instruction SI. -static bool GetCaseResults(SwitchInst *SI, - ConstantInt *CaseVal, - BasicBlock *CaseDest, - BasicBlock **CommonDest, - SmallVector<std::pair<PHINode*,Constant*>, 4> &Res) { +static bool +GetCaseResults(SwitchInst *SI, + ConstantInt *CaseVal, + BasicBlock *CaseDest, + BasicBlock **CommonDest, + SmallVectorImpl<std::pair<PHINode*,Constant*> > &Res) { // The block from which we enter the common destination. BasicBlock *Pred = SI->getParent(); @@ -3378,7 +3428,7 @@ namespace { SwitchLookupTable(Module &M, uint64_t TableSize, ConstantInt *Offset, - const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values, + const SmallVectorImpl<std::pair<ConstantInt*, Constant*> >& Values, Constant *DefaultValue, const DataLayout *TD); @@ -3425,7 +3475,7 @@ namespace { SwitchLookupTable::SwitchLookupTable(Module &M, uint64_t TableSize, ConstantInt *Offset, - const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values, + const SmallVectorImpl<std::pair<ConstantInt*, Constant*> >& Values, Constant *DefaultValue, const DataLayout *TD) : SingleValue(0), BitMap(0), BitMapElementTy(0), Array(0) { @@ -3552,7 +3602,7 @@ bool SwitchLookupTable::WouldFitInRegister(const DataLayout *TD, } /// ShouldBuildLookupTable - Determine whether a lookup table should be built -/// for this switch, based on the number of caes, size of the table and the +/// for this switch, based on the number of cases, size of the table and the /// types of the results. static bool ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp index 41c207c..bf3442a 100644 --- a/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -119,7 +119,7 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) return 0; D = ConstantInt::get(UseInst->getContext(), - APInt(BitWidth, 1).shl(D->getZExtValue())); + APInt::getOneBitSet(BitWidth, D->getZExtValue())); } FoldedExpr = SE->getUDivExpr(SE->getSCEV(IVSrc), SE->getSCEV(D)); } diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index c231704..094c201 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1518,6 +1518,12 @@ struct FPrintFOpt : public LibCallOptimization { if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr)) return 0; + // Do not do any of the following transformations if the fprintf return + // value is used, in general the fprintf return value is not compatible + // with fwrite(), fputc() or fputs(). + if (!CI->use_empty()) + return 0; + // fprintf(F, "foo") --> fwrite("foo", 3, 1, F) if (CI->getNumArgOperands() == 2) { for (unsigned i = 0, e = FormatStr.size(); i != e; ++i) @@ -1527,11 +1533,10 @@ struct FPrintFOpt : public LibCallOptimization { // These optimizations require DataLayout. if (!TD) return 0; - Value *NewCI = EmitFWrite(CI->getArgOperand(1), - ConstantInt::get(TD->getIntPtrType(*Context), - FormatStr.size()), - CI->getArgOperand(0), B, TD, TLI); - return NewCI ? ConstantInt::get(CI->getType(), FormatStr.size()) : 0; + return EmitFWrite(CI->getArgOperand(1), + ConstantInt::get(TD->getIntPtrType(*Context), + FormatStr.size()), + CI->getArgOperand(0), B, TD, TLI); } // The remaining optimizations require the format string to be "%s" or "%c" @@ -1544,14 +1549,12 @@ struct FPrintFOpt : public LibCallOptimization { if (FormatStr[1] == 'c') { // fprintf(F, "%c", chr) --> fputc(chr, F) if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0; - Value *NewCI = EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, - TD, TLI); - return NewCI ? ConstantInt::get(CI->getType(), 1) : 0; + return EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI); } if (FormatStr[1] == 's') { // fprintf(F, "%s", str) --> fputs(str, F) - if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty()) + if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0; return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI); } @@ -1937,7 +1940,7 @@ LibCallSimplifier::~LibCallSimplifier() { } Value *LibCallSimplifier::optimizeCall(CallInst *CI) { - if (CI->hasFnAttr(Attribute::NoBuiltin)) return 0; + if (CI->isNoBuiltin()) return 0; return Impl->optimizeCall(CI); } @@ -1947,3 +1950,53 @@ void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) const { } } + +// TODO: +// Additional cases that we need to add to this file: +// +// cbrt: +// * cbrt(expN(X)) -> expN(x/3) +// * cbrt(sqrt(x)) -> pow(x,1/6) +// * cbrt(sqrt(x)) -> pow(x,1/9) +// +// exp, expf, expl: +// * exp(log(x)) -> x +// +// log, logf, logl: +// * log(exp(x)) -> x +// * log(x**y) -> y*log(x) +// * log(exp(y)) -> y*log(e) +// * log(exp2(y)) -> y*log(2) +// * log(exp10(y)) -> y*log(10) +// * log(sqrt(x)) -> 0.5*log(x) +// * log(pow(x,y)) -> y*log(x) +// +// lround, lroundf, lroundl: +// * lround(cnst) -> cnst' +// +// pow, powf, powl: +// * pow(exp(x),y) -> exp(x*y) +// * pow(sqrt(x),y) -> pow(x,y*0.5) +// * pow(pow(x,y),z)-> pow(x,y*z) +// +// round, roundf, roundl: +// * round(cnst) -> cnst' +// +// signbit: +// * signbit(cnst) -> cnst' +// * signbit(nncst) -> 0 (if pstv is a non-negative constant) +// +// sqrt, sqrtf, sqrtl: +// * sqrt(expN(x)) -> expN(x*0.5) +// * sqrt(Nroot(x)) -> pow(x,1/(2*N)) +// * sqrt(pow(x,y)) -> pow(|x|,y*0.5) +// +// strchr: +// * strchr(p, 0) -> strlen(p) +// tan, tanf, tanl: +// * tan(atan(x)) -> x +// +// trunc, truncf, truncl: +// * trunc(cnst) -> cnst' +// +// diff --git a/lib/Transforms/Utils/SpecialCaseList.cpp b/lib/Transforms/Utils/SpecialCaseList.cpp new file mode 100644 index 0000000..b98cb5b --- /dev/null +++ b/lib/Transforms/Utils/SpecialCaseList.cpp @@ -0,0 +1,225 @@ +//===-- SpecialCaseList.cpp - special case list for sanitizers ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is a utility class for instrumentation passes (like AddressSanitizer +// or ThreadSanitizer) to avoid instrumenting some functions or global +// variables, or to instrument some functions or global variables in a specific +// way, based on a user-supplied list. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/SpecialCaseList.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Regex.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/system_error.h" +#include <string> +#include <utility> + +namespace llvm { + +/// Represents a set of regular expressions. Regular expressions which are +/// "literal" (i.e. no regex metacharacters) are stored in Strings, while all +/// others are represented as a single pipe-separated regex in RegEx. The +/// reason for doing so is efficiency; StringSet is much faster at matching +/// literal strings than Regex. +struct SpecialCaseList::Entry { + StringSet<> Strings; + Regex *RegEx; + + Entry() : RegEx(0) {} + + bool match(StringRef Query) const { + return Strings.count(Query) || (RegEx && RegEx->match(Query)); + } +}; + +SpecialCaseList::SpecialCaseList(const StringRef Path) { + // Validate and open blacklist file. + if (Path.empty()) return; + OwningPtr<MemoryBuffer> File; + if (error_code EC = MemoryBuffer::getFile(Path, File)) { + report_fatal_error("Can't open blacklist file: " + Path + ": " + + EC.message()); + } + + init(File.get()); +} + +SpecialCaseList::SpecialCaseList(const MemoryBuffer *MB) { + init(MB); +} + +void SpecialCaseList::init(const MemoryBuffer *MB) { + // Iterate through each line in the blacklist file. + SmallVector<StringRef, 16> Lines; + SplitString(MB->getBuffer(), Lines, "\n\r"); + StringMap<StringMap<std::string> > Regexps; + for (SmallVectorImpl<StringRef>::iterator I = Lines.begin(), E = Lines.end(); + I != E; ++I) { + // Ignore empty lines and lines starting with "#" + if (I->empty() || I->startswith("#")) + continue; + // Get our prefix and unparsed regexp. + std::pair<StringRef, StringRef> SplitLine = I->split(":"); + StringRef Prefix = SplitLine.first; + if (SplitLine.second.empty()) { + // Missing ':' in the line. + report_fatal_error("malformed blacklist line: " + SplitLine.first); + } + + std::pair<StringRef, StringRef> SplitRegexp = SplitLine.second.split("="); + std::string Regexp = SplitRegexp.first; + StringRef Category = SplitRegexp.second; + + // Backwards compatibility. + if (Prefix == "global-init") { + Prefix = "global"; + Category = "init"; + } else if (Prefix == "global-init-type") { + Prefix = "type"; + Category = "init"; + } else if (Prefix == "global-init-src") { + Prefix = "src"; + Category = "init"; + } + + // See if we can store Regexp in Strings. + if (Regex::isLiteralERE(Regexp)) { + Entries[Prefix][Category].Strings.insert(Regexp); + continue; + } + + // Replace * with .* + for (size_t pos = 0; (pos = Regexp.find("*", pos)) != std::string::npos; + pos += strlen(".*")) { + Regexp.replace(pos, strlen("*"), ".*"); + } + + // Check that the regexp is valid. + Regex CheckRE(Regexp); + std::string Error; + if (!CheckRE.isValid(Error)) { + report_fatal_error("malformed blacklist regex: " + SplitLine.second + + ": " + Error); + } + + // Add this regexp into the proper group by its prefix. + if (!Regexps[Prefix][Category].empty()) + Regexps[Prefix][Category] += "|"; + Regexps[Prefix][Category] += "^" + Regexp + "$"; + } + + // Iterate through each of the prefixes, and create Regexs for them. + for (StringMap<StringMap<std::string> >::const_iterator I = Regexps.begin(), + E = Regexps.end(); + I != E; ++I) { + for (StringMap<std::string>::const_iterator II = I->second.begin(), + IE = I->second.end(); + II != IE; ++II) { + Entries[I->getKey()][II->getKey()].RegEx = new Regex(II->getValue()); + } + } +} + +SpecialCaseList::~SpecialCaseList() { + for (StringMap<StringMap<Entry> >::iterator I = Entries.begin(), + E = Entries.end(); + I != E; ++I) { + for (StringMap<Entry>::const_iterator II = I->second.begin(), + IE = I->second.end(); + II != IE; ++II) { + delete II->second.RegEx; + } + } +} + +bool SpecialCaseList::findCategory(const Function &F, + StringRef &Category) const { + return findCategory(*F.getParent(), Category) || + findCategory("fun", F.getName(), Category); +} + +bool SpecialCaseList::isIn(const Function& F, const StringRef Category) const { + return isIn(*F.getParent(), Category) || + inSectionCategory("fun", F.getName(), Category); +} + +static StringRef GetGVTypeString(const GlobalVariable &G) { + // Types of GlobalVariables are always pointer types. + Type *GType = G.getType()->getElementType(); + // For now we support blacklisting struct types only. + if (StructType *SGType = dyn_cast<StructType>(GType)) { + if (!SGType->isLiteral()) + return SGType->getName(); + } + return "<unknown type>"; +} + +bool SpecialCaseList::findCategory(const GlobalVariable &G, + StringRef &Category) const { + return findCategory(*G.getParent(), Category) || + findCategory("global", G.getName(), Category) || + findCategory("type", GetGVTypeString(G), Category); +} + +bool SpecialCaseList::isIn(const GlobalVariable &G, + const StringRef Category) const { + return isIn(*G.getParent(), Category) || + inSectionCategory("global", G.getName(), Category) || + inSectionCategory("type", GetGVTypeString(G), Category); +} + +bool SpecialCaseList::findCategory(const Module &M, StringRef &Category) const { + return findCategory("src", M.getModuleIdentifier(), Category); +} + +bool SpecialCaseList::isIn(const Module &M, const StringRef Category) const { + return inSectionCategory("src", M.getModuleIdentifier(), Category); +} + +bool SpecialCaseList::findCategory(const StringRef Section, + const StringRef Query, + StringRef &Category) const { + StringMap<StringMap<Entry> >::const_iterator I = Entries.find(Section); + if (I == Entries.end()) return false; + + for (StringMap<Entry>::const_iterator II = I->second.begin(), + IE = I->second.end(); + II != IE; ++II) { + if (II->getValue().match(Query)) { + Category = II->first(); + return true; + } + } + + return false; +} + +bool SpecialCaseList::inSectionCategory(const StringRef Section, + const StringRef Query, + const StringRef Category) const { + StringMap<StringMap<Entry> >::const_iterator I = Entries.find(Section); + if (I == Entries.end()) return false; + StringMap<Entry>::const_iterator II = I->second.find(Category); + if (II == I->second.end()) return false; + + return II->getValue().match(Query); +} + +} // namespace llvm diff --git a/lib/Transforms/Utils/Utils.cpp b/lib/Transforms/Utils/Utils.cpp index 5812d46..c3df215 100644 --- a/lib/Transforms/Utils/Utils.cpp +++ b/lib/Transforms/Utils/Utils.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/InitializePasses.h" +#include "llvm/PassRegistry.h" #include "llvm-c/Initialization.h" using namespace llvm; diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index b5941bd..457fc80 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -22,14 +22,22 @@ using namespace llvm; // Out of line method to get vtable etc for class. void ValueMapTypeRemapper::anchor() {} +void ValueMaterializer::anchor() {} Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, - ValueMapTypeRemapper *TypeMapper) { + ValueMapTypeRemapper *TypeMapper, + ValueMaterializer *Materializer) { ValueToValueMapTy::iterator I = VM.find(V); // If the value already exists in the map, use it. if (I != VM.end() && I->second) return I->second; + // If we have a materializer and it can materialize a value, use that. + if (Materializer) { + if (Value *NewV = Materializer->materializeValueFor(const_cast<Value*>(V))) + return VM[V] = NewV; + } + // Global values do not need to be seeded into the VM if they // are using the identity mapping. if (isa<GlobalValue>(V) || isa<MDString>(V)) @@ -57,14 +65,14 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, return VM[V] = const_cast<Value*>(V); // Create a dummy node in case we have a metadata cycle. - MDNode *Dummy = MDNode::getTemporary(V->getContext(), ArrayRef<Value*>()); + MDNode *Dummy = MDNode::getTemporary(V->getContext(), None); VM[V] = Dummy; // Check all operands to see if any need to be remapped. for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) { Value *OP = MD->getOperand(i); if (OP == 0) continue; - Value *Mapped_OP = MapValue(OP, VM, Flags, TypeMapper); + Value *Mapped_OP = MapValue(OP, VM, Flags, TypeMapper, Materializer); // Use identity map if Mapped_Op is null and we can ignore missing // entries. if (Mapped_OP == OP || @@ -79,7 +87,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, if (Op == 0) Elts.push_back(0); else { - Value *Mapped_Op = MapValue(Op, VM, Flags, TypeMapper); + Value *Mapped_Op = MapValue(Op, VM, Flags, TypeMapper, Materializer); // Use identity map if Mapped_Op is null and we can ignore missing // entries. if (Mapped_Op == 0 && (Flags & RF_IgnoreMissingEntries)) @@ -109,9 +117,9 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) { Function *F = - cast<Function>(MapValue(BA->getFunction(), VM, Flags, TypeMapper)); + cast<Function>(MapValue(BA->getFunction(), VM, Flags, TypeMapper, Materializer)); BasicBlock *BB = cast_or_null<BasicBlock>(MapValue(BA->getBasicBlock(), VM, - Flags, TypeMapper)); + Flags, TypeMapper, Materializer)); return VM[V] = BlockAddress::get(F, BB ? BB : BA->getBasicBlock()); } @@ -121,7 +129,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, Value *Mapped = 0; for (; OpNo != NumOperands; ++OpNo) { Value *Op = C->getOperand(OpNo); - Mapped = MapValue(Op, VM, Flags, TypeMapper); + Mapped = MapValue(Op, VM, Flags, TypeMapper, Materializer); if (Mapped != C) break; } @@ -149,7 +157,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, // Map the rest of the operands that aren't processed yet. for (++OpNo; OpNo != NumOperands; ++OpNo) Ops.push_back(MapValue(cast<Constant>(C->getOperand(OpNo)), VM, - Flags, TypeMapper)); + Flags, TypeMapper, Materializer)); } if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) @@ -173,10 +181,11 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, /// current values into those specified by VMap. /// void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap, - RemapFlags Flags, ValueMapTypeRemapper *TypeMapper){ + RemapFlags Flags, ValueMapTypeRemapper *TypeMapper, + ValueMaterializer *Materializer){ // Remap operands. for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) { - Value *V = MapValue(*op, VMap, Flags, TypeMapper); + Value *V = MapValue(*op, VMap, Flags, TypeMapper, Materializer); // If we aren't ignoring missing entries, assert that something happened. if (V != 0) *op = V; @@ -204,7 +213,7 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap, for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI) { MDNode *Old = MI->second; - MDNode *New = MapValue(Old, VMap, Flags, TypeMapper); + MDNode *New = MapValue(Old, VMap, Flags, TypeMapper, Materializer); if (New != Old) I->setMetadata(MI->first, New); } |
