diff options
Diffstat (limited to 'lib/Transforms/Utils')
29 files changed, 1899 insertions, 1480 deletions
diff --git a/lib/Transforms/Utils/ASanStackFrameLayout.cpp b/lib/Transforms/Utils/ASanStackFrameLayout.cpp index cce016a..03c3a80 100644 --- a/lib/Transforms/Utils/ASanStackFrameLayout.cpp +++ b/lib/Transforms/Utils/ASanStackFrameLayout.cpp @@ -13,6 +13,7 @@ #include "llvm/Transforms/Utils/ASanStackFrameLayout.h" #include "llvm/ADT/SmallString.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/MathExtras.h" #include <algorithm> namespace llvm { @@ -33,11 +34,6 @@ static inline bool CompareVars(const ASanStackVariableDescription &a, // with e.g. alignment 1 and alignment 16 do not get reordered by CompareVars. static const size_t kMinAlignment = 16; -static size_t RoundUpTo(size_t X, size_t RoundTo) { - assert((RoundTo & (RoundTo - 1)) == 0); - return (X + RoundTo - 1) & ~(RoundTo - 1); -} - // The larger the variable Size the larger is the redzone. // The resulting frame size is a multiple of Alignment. static size_t VarAndRedzoneSize(size_t Size, size_t Alignment) { @@ -48,7 +44,7 @@ static size_t VarAndRedzoneSize(size_t Size, size_t Alignment) { else if (Size <= 512) Res = Size + 64; else if (Size <= 4096) Res = Size + 128; else Res = Size + 256; - return RoundUpTo(Res, Alignment); + return RoundUpToAlignment(Res, Alignment); } void diff --git a/lib/Transforms/Utils/AddDiscriminators.cpp b/lib/Transforms/Utils/AddDiscriminators.cpp index f8e5af5..820544b 100644 --- a/lib/Transforms/Utils/AddDiscriminators.cpp +++ b/lib/Transforms/Utils/AddDiscriminators.cpp @@ -167,7 +167,7 @@ bool AddDiscriminators::runOnFunction(Function &F) { bool Changed = false; Module *M = F.getParent(); LLVMContext &Ctx = M->getContext(); - DIBuilder Builder(*M); + DIBuilder Builder(*M, /*AllowUnresolved*/ false); // Traverse all the blocks looking for instructions in different // blocks that are at the same file:line location. diff --git a/lib/Transforms/Utils/Android.mk b/lib/Transforms/Utils/Android.mk index e20dc0a..4d24928 100644 --- a/lib/Transforms/Utils/Android.mk +++ b/lib/Transforms/Utils/Android.mk @@ -22,7 +22,6 @@ transforms_utils_SRC_FILES := \ LoopSimplify.cpp \ LoopUnroll.cpp \ LoopUnrollRuntime.cpp \ - LowerExpectIntrinsic.cpp \ LowerInvoke.cpp \ LowerSwitch.cpp \ Mem2Reg.cpp \ diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 983f025..b455257 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -65,16 +65,10 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) { /// any single-entry PHI nodes in it, fold them away. This handles the case /// when all entries to the PHI nodes in a block are guaranteed equal, such as /// when the block has exactly one predecessor. -void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, Pass *P) { +void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, AliasAnalysis *AA, + MemoryDependenceAnalysis *MemDep) { if (!isa<PHINode>(BB->begin())) return; - AliasAnalysis *AA = nullptr; - MemoryDependenceAnalysis *MemDep = nullptr; - if (P) { - AA = P->getAnalysisIfAvailable<AliasAnalysis>(); - MemDep = P->getAnalysisIfAvailable<MemoryDependenceAnalysis>(); - } - while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) { if (PN->getIncomingValue(0) != PN) PN->replaceAllUsesWith(PN->getIncomingValue(0)); @@ -113,7 +107,9 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) { /// MergeBlockIntoPredecessor - Attempts to merge a block into its predecessor, /// if possible. The return value indicates success or failure. -bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) { +bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT, + LoopInfo *LI, AliasAnalysis *AA, + MemoryDependenceAnalysis *MemDep) { // Don't merge away blocks who have their address taken. if (BB->hasAddressTaken()) return false; @@ -149,7 +145,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) { // Begin by getting rid of unneeded PHIs. if (isa<PHINode>(BB->front())) - FoldSingleEntryPHINodes(BB, P); + FoldSingleEntryPHINodes(BB, AA, MemDep); // Delete the unconditional branch from the predecessor... PredBB->getInstList().pop_back(); @@ -166,28 +162,23 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) { PredBB->takeName(BB); // Finally, erase the old block and update dominator info. - if (P) { - if (DominatorTreeWrapperPass *DTWP = - P->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) { - DominatorTree &DT = DTWP->getDomTree(); - if (DomTreeNode *DTN = DT.getNode(BB)) { - DomTreeNode *PredDTN = DT.getNode(PredBB); - SmallVector<DomTreeNode*, 8> Children(DTN->begin(), DTN->end()); - for (SmallVectorImpl<DomTreeNode *>::iterator DI = Children.begin(), - DE = Children.end(); DI != DE; ++DI) - DT.changeImmediateDominator(*DI, PredDTN); - - DT.eraseNode(BB); - } + if (DT) + if (DomTreeNode *DTN = DT->getNode(BB)) { + DomTreeNode *PredDTN = DT->getNode(PredBB); + SmallVector<DomTreeNode *, 8> Children(DTN->begin(), DTN->end()); + for (SmallVectorImpl<DomTreeNode *>::iterator DI = Children.begin(), + DE = Children.end(); + DI != DE; ++DI) + DT->changeImmediateDominator(*DI, PredDTN); + + DT->eraseNode(BB); + } - if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>()) - LI->removeBlock(BB); + if (LI) + LI->removeBlock(BB); - if (MemoryDependenceAnalysis *MD = - P->getAnalysisIfAvailable<MemoryDependenceAnalysis>()) - MD->invalidateCachedPredecessors(); - } - } + if (MemDep) + MemDep->invalidateCachedPredecessors(); BB->eraseFromParent(); return true; @@ -240,12 +231,14 @@ void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) { /// SplitEdge - Split the edge connecting specified block. Pass P must /// not be NULL. -BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) { +BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, DominatorTree *DT, + LoopInfo *LI) { unsigned SuccNum = GetSuccessorNumber(BB, Succ); // If this is a critical edge, let SplitCriticalEdge do it. TerminatorInst *LatchTerm = BB->getTerminator(); - if (SplitCriticalEdge(LatchTerm, SuccNum, P)) + if (SplitCriticalEdge(LatchTerm, SuccNum, CriticalEdgeSplittingOptions(DT, LI) + .setPreserveLCSSA())) return LatchTerm->getSuccessor(SuccNum); // If the edge isn't critical, then BB has a single successor or Succ has a @@ -255,23 +248,25 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) { // block. assert(SP == BB && "CFG broken"); SP = nullptr; - return SplitBlock(Succ, Succ->begin(), P); + return SplitBlock(Succ, Succ->begin(), DT, LI); } // Otherwise, if BB has a single successor, split it at the bottom of the // block. assert(BB->getTerminator()->getNumSuccessors() == 1 && "Should have a single succ!"); - return SplitBlock(BB, BB->getTerminator(), P); + return SplitBlock(BB, BB->getTerminator(), DT, LI); } -unsigned llvm::SplitAllCriticalEdges(Function &F, Pass *P) { +unsigned +llvm::SplitAllCriticalEdges(Function &F, + const CriticalEdgeSplittingOptions &Options) { unsigned NumBroken = 0; for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { TerminatorInst *TI = I->getTerminator(); if (TI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(TI)) for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) - if (SplitCriticalEdge(TI, i, P)) + if (SplitCriticalEdge(TI, i, Options)) ++NumBroken; } return NumBroken; @@ -282,7 +277,8 @@ unsigned llvm::SplitAllCriticalEdges(Function &F, Pass *P) { /// to a new block. The two blocks are joined by an unconditional branch and /// the loop info is updated. /// -BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) { +BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, + DominatorTree *DT, LoopInfo *LI) { BasicBlock::iterator SplitIt = SplitPt; while (isa<PHINode>(SplitIt) || isa<LandingPadInst>(SplitIt)) ++SplitIt; @@ -290,26 +286,23 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) { // The new block lives in whichever loop the old one did. This preserves // LCSSA as well, because we force the split point to be after any PHI nodes. - if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>()) + if (LI) if (Loop *L = LI->getLoopFor(Old)) - L->addBasicBlockToLoop(New, LI->getBase()); + L->addBasicBlockToLoop(New, *LI); - if (DominatorTreeWrapperPass *DTWP = - P->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) { - DominatorTree &DT = DTWP->getDomTree(); + if (DT) // Old dominates New. New node dominates all other nodes dominated by Old. - if (DomTreeNode *OldNode = DT.getNode(Old)) { + if (DomTreeNode *OldNode = DT->getNode(Old)) { std::vector<DomTreeNode *> Children; for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end(); I != E; ++I) Children.push_back(*I); - DomTreeNode *NewNode = DT.addNewBlock(New, Old); + DomTreeNode *NewNode = DT->addNewBlock(New, Old); for (std::vector<DomTreeNode *>::iterator I = Children.begin(), E = Children.end(); I != E; ++I) - DT.changeImmediateDominator(*I, NewNode); + DT->changeImmediateDominator(*I, NewNode); } - } return New; } @@ -318,45 +311,46 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) { /// analysis information. static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB, ArrayRef<BasicBlock *> Preds, - Pass *P, bool &HasLoopExit) { - if (!P) return; + DominatorTree *DT, LoopInfo *LI, + bool PreserveLCSSA, bool &HasLoopExit) { + // Update dominator tree if available. + if (DT) + DT->splitBlock(NewBB); + + // The rest of the logic is only relevant for updating the loop structures. + if (!LI) + return; - LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>(); - Loop *L = LI ? LI->getLoopFor(OldBB) : nullptr; + Loop *L = LI->getLoopFor(OldBB); // If we need to preserve loop analyses, collect some information about how // this split will affect loops. bool IsLoopEntry = !!L; bool SplitMakesNewLoopHeader = false; - if (LI) { - bool PreserveLCSSA = P->mustPreserveAnalysisID(LCSSAID); - for (ArrayRef<BasicBlock*>::iterator - i = Preds.begin(), e = Preds.end(); i != e; ++i) { - BasicBlock *Pred = *i; - - // If we need to preserve LCSSA, determine if any of the preds is a loop - // exit. - if (PreserveLCSSA) - if (Loop *PL = LI->getLoopFor(Pred)) - if (!PL->contains(OldBB)) - HasLoopExit = true; - - // If we need to preserve LoopInfo, note whether any of the preds crosses - // an interesting loop boundary. - if (!L) continue; - if (L->contains(Pred)) - IsLoopEntry = false; - else - SplitMakesNewLoopHeader = true; - } + for (ArrayRef<BasicBlock *>::iterator i = Preds.begin(), e = Preds.end(); + i != e; ++i) { + BasicBlock *Pred = *i; + + // If we need to preserve LCSSA, determine if any of the preds is a loop + // exit. + if (PreserveLCSSA) + if (Loop *PL = LI->getLoopFor(Pred)) + if (!PL->contains(OldBB)) + HasLoopExit = true; + + // If we need to preserve LoopInfo, note whether any of the preds crosses + // an interesting loop boundary. + if (!L) + continue; + if (L->contains(Pred)) + IsLoopEntry = false; + else + SplitMakesNewLoopHeader = true; } - // Update dominator tree if available. - if (DominatorTreeWrapperPass *DTWP = - P->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) - DTWP->getDomTree().splitBlock(NewBB); - - if (!L) return; + // Unless we have a loop for OldBB, nothing else to do here. + if (!L) + return; if (IsLoopEntry) { // Add the new block to the nearest enclosing loop (and not an adjacent @@ -382,9 +376,9 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB, } if (InnermostPredLoop) - InnermostPredLoop->addBasicBlockToLoop(NewBB, LI->getBase()); + InnermostPredLoop->addBasicBlockToLoop(NewBB, *LI); } else { - L->addBasicBlockToLoop(NewBB, LI->getBase()); + L->addBasicBlockToLoop(NewBB, *LI); if (SplitMakesNewLoopHeader) L->moveToHeader(NewBB); } @@ -393,10 +387,9 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB, /// UpdatePHINodes - Update the PHI nodes in OrigBB to include the values coming /// from NewBB. This also updates AliasAnalysis, if available. static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB, - ArrayRef<BasicBlock*> Preds, BranchInst *BI, - Pass *P, bool HasLoopExit) { + ArrayRef<BasicBlock *> Preds, BranchInst *BI, + AliasAnalysis *AA, bool HasLoopExit) { // Otherwise, create a new PHI node in NewBB for each PHI node in OrigBB. - AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : nullptr; SmallPtrSet<BasicBlock *, 16> PredSet(Preds.begin(), Preds.end()); for (BasicBlock::iterator I = OrigBB->begin(); isa<PHINode>(I); ) { PHINode *PN = cast<PHINode>(I++); @@ -461,11 +454,15 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB, } } -/// SplitBlockPredecessors - This method transforms BB by introducing a new -/// basic block into the function, and moving some of the predecessors of BB to -/// be predecessors of the new block. The new predecessors are indicated by the -/// Preds array, which has NumPreds elements in it. The new block is given a -/// suffix of 'Suffix'. +/// SplitBlockPredecessors - This method introduces at least one new basic block +/// into the function and moves some of the predecessors of BB to be +/// predecessors of the new block. The new predecessors are indicated by the +/// Preds array. The new block is given a suffix of 'Suffix'. Returns new basic +/// block to which predecessors from Preds are now pointing. +/// +/// If BB is a landingpad block then additional basicblock might be introduced. +/// It will have suffix of 'Suffix'+".split_lp". +/// See SplitLandingPadPredecessors for more details on this case. /// /// This currently updates the LLVM IR, AliasAnalysis, DominatorTree, /// LoopInfo, and LCCSA but no other analyses. In particular, it does not @@ -473,8 +470,21 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB, /// of the edges being split is an exit of a loop with other exits). /// BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, - ArrayRef<BasicBlock*> Preds, - const char *Suffix, Pass *P) { + ArrayRef<BasicBlock *> Preds, + const char *Suffix, AliasAnalysis *AA, + DominatorTree *DT, LoopInfo *LI, + bool PreserveLCSSA) { + // For the landingpads we need to act a bit differently. + // Delegate this work to the SplitLandingPadPredecessors. + if (BB->isLandingPad()) { + SmallVector<BasicBlock*, 2> NewBBs; + std::string NewName = std::string(Suffix) + ".split-lp"; + + SplitLandingPadPredecessors(BB, Preds, Suffix, NewName.c_str(), + NewBBs, AA, DT, LI, PreserveLCSSA); + return NewBBs[0]; + } + // Create new basic block, insert right before the original block. BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix, BB->getParent(), BB); @@ -505,10 +515,11 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, // Update DominatorTree, LoopInfo, and LCCSA analysis information. bool HasLoopExit = false; - UpdateAnalysisInformation(BB, NewBB, Preds, P, HasLoopExit); + UpdateAnalysisInformation(BB, NewBB, Preds, DT, LI, PreserveLCSSA, + HasLoopExit); // Update the PHI nodes in BB with the values coming from NewBB. - UpdatePHINodes(BB, NewBB, Preds, BI, P, HasLoopExit); + UpdatePHINodes(BB, NewBB, Preds, BI, AA, HasLoopExit); return NewBB; } @@ -526,10 +537,11 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, /// exits). /// void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, - ArrayRef<BasicBlock*> Preds, + ArrayRef<BasicBlock *> Preds, const char *Suffix1, const char *Suffix2, - Pass *P, - SmallVectorImpl<BasicBlock*> &NewBBs) { + SmallVectorImpl<BasicBlock *> &NewBBs, + AliasAnalysis *AA, DominatorTree *DT, + LoopInfo *LI, bool PreserveLCSSA) { assert(OrigBB->isLandingPad() && "Trying to split a non-landing pad!"); // Create a new basic block for OrigBB's predecessors listed in Preds. Insert @@ -552,12 +564,12 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, Preds[i]->getTerminator()->replaceUsesOfWith(OrigBB, NewBB1); } - // Update DominatorTree, LoopInfo, and LCCSA analysis information. bool HasLoopExit = false; - UpdateAnalysisInformation(OrigBB, NewBB1, Preds, P, HasLoopExit); + UpdateAnalysisInformation(OrigBB, NewBB1, Preds, DT, LI, PreserveLCSSA, + HasLoopExit); // Update the PHI nodes in OrigBB with the values coming from NewBB1. - UpdatePHINodes(OrigBB, NewBB1, Preds, BI1, P, HasLoopExit); + UpdatePHINodes(OrigBB, NewBB1, Preds, BI1, AA, HasLoopExit); // Move the remaining edges from OrigBB to point to NewBB2. SmallVector<BasicBlock*, 8> NewBB2Preds; @@ -589,10 +601,11 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, // Update DominatorTree, LoopInfo, and LCCSA analysis information. HasLoopExit = false; - UpdateAnalysisInformation(OrigBB, NewBB2, NewBB2Preds, P, HasLoopExit); + UpdateAnalysisInformation(OrigBB, NewBB2, NewBB2Preds, DT, LI, + PreserveLCSSA, HasLoopExit); // Update the PHI nodes in OrigBB with the values coming from NewBB2. - UpdatePHINodes(OrigBB, NewBB2, NewBB2Preds, BI2, P, HasLoopExit); + UpdatePHINodes(OrigBB, NewBB2, NewBB2Preds, BI2, AA, HasLoopExit); } LandingPadInst *LPad = OrigBB->getLandingPadInst(); diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp index eda22cf..7e83c9e 100644 --- a/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -18,6 +18,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/CFG.h" @@ -41,14 +42,19 @@ namespace { } bool runOnFunction(Function &F) override { - unsigned N = SplitAllCriticalEdges(F, this); + auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); + auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; + auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); + auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; + unsigned N = + SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions(DT, LI)); NumBroken += N; return N > 0; } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addPreserved<DominatorTreeWrapperPass>(); - AU.addPreserved<LoopInfo>(); + AU.addPreserved<LoopInfoWrapperPass>(); // No loop canonicalization guarantees are broken by this pass. AU.addPreservedID(LoopSimplifyID); @@ -125,10 +131,9 @@ static void createPHIsForSplitLoopExit(ArrayRef<BasicBlock *> Preds, /// to. /// BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, - Pass *P, bool MergeIdenticalEdges, - bool DontDeleteUselessPhis, - bool SplitLandingPads) { - if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return nullptr; + const CriticalEdgeSplittingOptions &Options) { + if (!isCriticalEdge(TI, SuccNum, Options.MergeIdenticalEdges)) + return nullptr; assert(!isa<IndirectBrInst>(TI) && "Cannot split critical edge from IndirectBrInst"); @@ -179,29 +184,22 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, // If there are any other edges from TIBB to DestBB, update those to go // through the split block, making those edges non-critical as well (and // reducing the number of phi entries in the DestBB if relevant). - if (MergeIdenticalEdges) { + if (Options.MergeIdenticalEdges) { for (unsigned i = SuccNum+1, e = TI->getNumSuccessors(); i != e; ++i) { if (TI->getSuccessor(i) != DestBB) continue; // Remove an entry for TIBB from DestBB phi nodes. - DestBB->removePredecessor(TIBB, DontDeleteUselessPhis); + DestBB->removePredecessor(TIBB, Options.DontDeleteUselessPHIs); // We found another edge to DestBB, go to NewBB instead. TI->setSuccessor(i, NewBB); } } - - - // If we don't have a pass object, we can't update anything... - if (!P) return NewBB; - - DominatorTreeWrapperPass *DTWP = - P->getAnalysisIfAvailable<DominatorTreeWrapperPass>(); - DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; - LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>(); - // If we have nothing to update, just return. + auto *AA = Options.AA; + auto *DT = Options.DT; + auto *LI = Options.LI; if (!DT && !LI) return NewBB; @@ -268,13 +266,13 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, if (Loop *DestLoop = LI->getLoopFor(DestBB)) { if (TIL == DestLoop) { // Both in the same loop, the NewBB joins loop. - DestLoop->addBasicBlockToLoop(NewBB, LI->getBase()); + DestLoop->addBasicBlockToLoop(NewBB, *LI); } else if (TIL->contains(DestLoop)) { // Edge from an outer loop to an inner loop. Add to the outer loop. - TIL->addBasicBlockToLoop(NewBB, LI->getBase()); + TIL->addBasicBlockToLoop(NewBB, *LI); } else if (DestLoop->contains(TIL)) { // Edge from an inner loop to an outer loop. Add to the outer loop. - DestLoop->addBasicBlockToLoop(NewBB, LI->getBase()); + DestLoop->addBasicBlockToLoop(NewBB, *LI); } else { // Edge from two loops with no containment relation. Because these // are natural loops, we know that the destination block must be the @@ -283,19 +281,20 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, assert(DestLoop->getHeader() == DestBB && "Should not create irreducible loops!"); if (Loop *P = DestLoop->getParentLoop()) - P->addBasicBlockToLoop(NewBB, LI->getBase()); + P->addBasicBlockToLoop(NewBB, *LI); } } + // If TIBB is in a loop and DestBB is outside of that loop, we may need // to update LoopSimplify form and LCSSA form. - if (!TIL->contains(DestBB) && - P->mustPreserveAnalysisID(LoopSimplifyID)) { + if (!TIL->contains(DestBB)) { assert(!TIL->contains(NewBB) && "Split point for loop exit is contained in loop!"); // Update LCSSA form in the newly created exit block. - if (P->mustPreserveAnalysisID(LCSSAID)) + if (Options.PreserveLCSSA) { createPHIsForSplitLoopExit(TIBB, NewBB, DestBB); + } // The only that we can break LoopSimplify form by splitting a critical // edge is if after the split there exists some edge from TIL to DestBB @@ -322,20 +321,12 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, if (!LoopPreds.empty()) { assert(!DestBB->isLandingPad() && "We don't split edges to landing pads!"); - BasicBlock *NewExitBB = - SplitBlockPredecessors(DestBB, LoopPreds, "split", P); - if (P->mustPreserveAnalysisID(LCSSAID)) + BasicBlock *NewExitBB = SplitBlockPredecessors( + DestBB, LoopPreds, "split", AA, DT, LI, Options.PreserveLCSSA); + if (Options.PreserveLCSSA) createPHIsForSplitLoopExit(LoopPreds, NewExitBB, DestBB); } } - // LCSSA form was updated above for the case where LoopSimplify is - // available, which means that all predecessors of loop exit blocks - // are within the loop. Without LoopSimplify form, it would be - // necessary to insert a new phi. - assert((!P->mustPreserveAnalysisID(LCSSAID) || - P->mustPreserveAnalysisID(LoopSimplifyID)) && - "SplitCriticalEdge doesn't know how to update LCCSA form " - "without LoopSimplify!"); } } diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp index 112d26c..762a83f 100644 --- a/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/lib/Transforms/Utils/BuildLibCalls.cpp @@ -21,7 +21,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" -#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" using namespace llvm; @@ -486,135 +486,3 @@ Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, CI->setCallingConv(Fn->getCallingConv()); return CI; } - -SimplifyFortifiedLibCalls::~SimplifyFortifiedLibCalls() { } - -bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const DataLayout *TD, - const TargetLibraryInfo *TLI) { - // We really need DataLayout for later. - if (!TD) return false; - - this->CI = CI; - Function *Callee = CI->getCalledFunction(); - StringRef Name = Callee->getName(); - FunctionType *FT = Callee->getFunctionType(); - LLVMContext &Context = CI->getParent()->getContext(); - IRBuilder<> B(CI); - - if (Name == "__memcpy_chk") { - // Check if this has the right signature. - if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || - !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isPointerTy() || - FT->getParamType(2) != TD->getIntPtrType(Context) || - FT->getParamType(3) != TD->getIntPtrType(Context)) - return false; - - if (isFoldable(3, 2, false)) { - B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), 1); - replaceCall(CI->getArgOperand(0)); - return true; - } - return false; - } - - // Should be similar to memcpy. - if (Name == "__mempcpy_chk") { - return false; - } - - if (Name == "__memmove_chk") { - // Check if this has the right signature. - if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || - !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isPointerTy() || - FT->getParamType(2) != TD->getIntPtrType(Context) || - FT->getParamType(3) != TD->getIntPtrType(Context)) - return false; - - if (isFoldable(3, 2, false)) { - B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), 1); - replaceCall(CI->getArgOperand(0)); - return true; - } - return false; - } - - if (Name == "__memset_chk") { - // Check if this has the right signature. - if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || - !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isIntegerTy() || - FT->getParamType(2) != TD->getIntPtrType(Context) || - FT->getParamType(3) != TD->getIntPtrType(Context)) - return false; - - if (isFoldable(3, 2, false)) { - Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), - false); - B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1); - replaceCall(CI->getArgOperand(0)); - return true; - } - return false; - } - - if (Name == "__strcpy_chk" || Name == "__stpcpy_chk") { - // Check if this has the right signature. - if (FT->getNumParams() != 3 || - FT->getReturnType() != FT->getParamType(0) || - FT->getParamType(0) != FT->getParamType(1) || - FT->getParamType(0) != Type::getInt8PtrTy(Context) || - FT->getParamType(2) != TD->getIntPtrType(Context)) - return 0; - - - // If a) we don't have any length information, or b) we know this will - // fit then just lower to a plain st[rp]cpy. Otherwise we'll keep our - // st[rp]cpy_chk call which may fail at runtime if the size is too long. - // TODO: It might be nice to get a maximum length out of the possible - // string lengths for varying. - if (isFoldable(2, 1, true)) { - Value *Ret = EmitStrCpy(CI->getArgOperand(0), CI->getArgOperand(1), B, TD, - TLI, Name.substr(2, 6)); - if (!Ret) - return false; - replaceCall(Ret); - return true; - } - return false; - } - - if (Name == "__strncpy_chk" || Name == "__stpncpy_chk") { - // Check if this has the right signature. - if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || - FT->getParamType(0) != FT->getParamType(1) || - FT->getParamType(0) != Type::getInt8PtrTy(Context) || - !FT->getParamType(2)->isIntegerTy() || - FT->getParamType(3) != TD->getIntPtrType(Context)) - return false; - - if (isFoldable(3, 2, false)) { - Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), B, TD, TLI, - Name.substr(2, 7)); - if (!Ret) - return false; - replaceCall(Ret); - return true; - } - return false; - } - - if (Name == "__strcat_chk") { - return false; - } - - if (Name == "__strncat_chk") { - return false; - } - - return false; -} diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt index 6ce22b1..01e811f 100644 --- a/lib/Transforms/Utils/CMakeLists.txt +++ b/lib/Transforms/Utils/CMakeLists.txt @@ -21,7 +21,6 @@ add_llvm_library(LLVMTransformUtils LoopSimplify.cpp LoopUnroll.cpp LoopUnrollRuntime.cpp - LowerExpectIntrinsic.cpp LowerInvoke.cpp LowerSwitch.cpp Mem2Reg.cpp @@ -37,6 +36,10 @@ add_llvm_library(LLVMTransformUtils UnifyFunctionExitNodes.cpp Utils.cpp ValueMapper.cpp + + ADDITIONAL_HEADER_DIRS + ${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms + ${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms/Utils ) add_dependencies(LLVMTransformUtils intrinsics_gen) diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 5c8f20d..09279b6 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -164,14 +164,13 @@ static MDNode* FindSubprogram(const Function *F, DebugInfoFinder &Finder) { // Add an operand to an existing MDNode. The new operand will be added at the // back of the operand list. -static void AddOperand(MDNode *Node, Value *Operand) { - SmallVector<Value*, 16> Operands; - for (unsigned i = 0; i < Node->getNumOperands(); i++) { - Operands.push_back(Node->getOperand(i)); - } - Operands.push_back(Operand); - MDNode *NewNode = MDNode::get(Node->getContext(), Operands); - Node->replaceAllUsesWith(NewNode); +static void AddOperand(DICompileUnit CU, DIArray SPs, Metadata *NewSP) { + SmallVector<Metadata *, 16> NewSPs; + NewSPs.reserve(SPs->getNumOperands() + 1); + for (unsigned I = 0, E = SPs->getNumOperands(); I != E; ++I) + NewSPs.push_back(SPs->getOperand(I)); + NewSPs.push_back(NewSP); + CU.replaceSubprograms(DIArray(MDNode::get(CU->getContext(), NewSPs))); } // Clone the module-level debug info associated with OldFunc. The cloned data @@ -187,7 +186,7 @@ static void CloneDebugInfoMetadata(Function *NewFunc, const Function *OldFunc, // Ensure that OldFunc appears in the map. // (if it's already there it must point to NewFunc anyway) VMap[OldFunc] = NewFunc; - DISubprogram NewSubprogram(MapValue(OldSubprogramMDNode, VMap)); + DISubprogram NewSubprogram(MapMetadata(OldSubprogramMDNode, VMap)); for (DICompileUnit CU : Finder.compile_units()) { DIArray Subprograms(CU.getSubprograms()); @@ -196,7 +195,8 @@ static void CloneDebugInfoMetadata(Function *NewFunc, const Function *OldFunc, // also contain the new one. for (unsigned i = 0; i < Subprograms.getNumElements(); i++) { if ((MDNode*)Subprograms.getElement(i) == OldSubprogramMDNode) { - AddOperand(Subprograms, NewSubprogram); + AddOperand(CU, Subprograms, NewSubprogram); + break; } } } @@ -260,21 +260,36 @@ namespace { const char *NameSuffix; ClonedCodeInfo *CodeInfo; const DataLayout *DL; + CloningDirector *Director; + ValueMapTypeRemapper *TypeMapper; + ValueMaterializer *Materializer; + public: PruningFunctionCloner(Function *newFunc, const Function *oldFunc, ValueToValueMapTy &valueMap, bool moduleLevelChanges, const char *nameSuffix, ClonedCodeInfo *codeInfo, - const DataLayout *DL) + const DataLayout *DL, + CloningDirector *Director) : NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap), ModuleLevelChanges(moduleLevelChanges), - NameSuffix(nameSuffix), CodeInfo(codeInfo), DL(DL) { + NameSuffix(nameSuffix), CodeInfo(codeInfo), DL(DL), + Director(Director) { + // These are optional components. The Director may return null. + if (Director) { + TypeMapper = Director->getTypeRemapper(); + Materializer = Director->getValueMaterializer(); + } else { + TypeMapper = nullptr; + Materializer = nullptr; + } } /// CloneBlock - The specified block is found to be reachable, clone it and /// anything that it can reach. - void CloneBlock(const BasicBlock *BB, + void CloneBlock(const BasicBlock *BB, + BasicBlock::const_iterator StartingInst, std::vector<const BasicBlock*> &ToClone); }; } @@ -282,6 +297,7 @@ namespace { /// CloneBlock - The specified block is found to be reachable, clone it and /// anything that it can reach. void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, + BasicBlock::const_iterator StartingInst, std::vector<const BasicBlock*> &ToClone){ WeakVH &BBEntry = VMap[BB]; @@ -307,21 +323,39 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, const_cast<BasicBlock*>(BB)); VMap[OldBBAddr] = BlockAddress::get(NewFunc, NewBB); } - bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false; - + // Loop over all instructions, and copy them over, DCE'ing as we go. This // loop doesn't include the terminator. - for (BasicBlock::const_iterator II = BB->begin(), IE = --BB->end(); + for (BasicBlock::const_iterator II = StartingInst, IE = --BB->end(); II != IE; ++II) { + // If the "Director" remaps the instruction, don't clone it. + if (Director) { + CloningDirector::CloningAction Action + = Director->handleInstruction(VMap, II, NewBB); + // If the cloning director says stop, we want to stop everything, not + // just break out of the loop (which would cause the terminator to be + // cloned). The cloning director is responsible for inserting a proper + // terminator into the new basic block in this case. + if (Action == CloningDirector::StopCloningBB) + return; + // If the cloning director says skip, continue to the next instruction. + // In this case, the cloning director is responsible for mapping the + // skipped instruction to some value that is defined in the new + // basic block. + if (Action == CloningDirector::SkipInstruction) + continue; + } + Instruction *NewInst = II->clone(); // Eagerly remap operands to the newly cloned instruction, except for PHI // nodes for which we defer processing until we update the CFG. if (!isa<PHINode>(NewInst)) { RemapInstruction(NewInst, VMap, - ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, + TypeMapper, Materializer); // If we can simplify this instruction to some other value, simply add // a mapping to that value rather than inserting a new instruction into @@ -354,6 +388,18 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, // Finally, clone over the terminator. const TerminatorInst *OldTI = BB->getTerminator(); bool TerminatorDone = false; + if (Director) { + CloningDirector::CloningAction Action + = Director->handleInstruction(VMap, OldTI, NewBB); + // If the cloning director says stop, we want to stop everything, not + // just break out of the loop (which would cause the terminator to be + // cloned). The cloning director is responsible for inserting a proper + // terminator into the new basic block in this case. + if (Action == CloningDirector::StopCloningBB) + return; + assert(Action != CloningDirector::SkipInstruction && + "SkipInstruction is not valid for terminators."); + } if (const BranchInst *BI = dyn_cast<BranchInst>(OldTI)) { if (BI->isConditional()) { // If the condition was a known constant in the callee... @@ -409,39 +455,55 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, } } -/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto, -/// except that it does some simple constant prop and DCE on the fly. The -/// effect of this is to copy significantly less code in cases where (for -/// example) a function call with constant arguments is inlined, and those -/// constant arguments cause a significant amount of code in the callee to be -/// dead. Since this doesn't produce an exact copy of the input, it can't be -/// used for things like CloneFunction or CloneModule. -void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, +/// CloneAndPruneIntoFromInst - This works like CloneAndPruneFunctionInto, except +/// that it does not clone the entire function. Instead it starts at an +/// instruction provided by the caller and copies (and prunes) only the code +/// reachable from that instruction. +void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, + const Instruction *StartingInst, ValueToValueMapTy &VMap, bool ModuleLevelChanges, - SmallVectorImpl<ReturnInst*> &Returns, + SmallVectorImpl<ReturnInst *> &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo, const DataLayout *DL, - Instruction *TheCall) { + CloningDirector *Director) { assert(NameSuffix && "NameSuffix cannot be null!"); - + + ValueMapTypeRemapper *TypeMapper = nullptr; + ValueMaterializer *Materializer = nullptr; + + if (Director) { + TypeMapper = Director->getTypeRemapper(); + Materializer = Director->getValueMaterializer(); + } + #ifndef NDEBUG - for (Function::const_arg_iterator II = OldFunc->arg_begin(), - E = OldFunc->arg_end(); II != E; ++II) - assert(VMap.count(II) && "No mapping from source argument specified!"); + // If the cloning starts at the begining of the function, verify that + // the function arguments are mapped. + if (!StartingInst) + for (Function::const_arg_iterator II = OldFunc->arg_begin(), + E = OldFunc->arg_end(); II != E; ++II) + assert(VMap.count(II) && "No mapping from source argument specified!"); #endif PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges, - NameSuffix, CodeInfo, DL); + NameSuffix, CodeInfo, DL, Director); + const BasicBlock *StartingBB; + if (StartingInst) + StartingBB = StartingInst->getParent(); + else { + StartingBB = &OldFunc->getEntryBlock(); + StartingInst = StartingBB->begin(); + } // Clone the entry block, and anything recursively reachable from it. std::vector<const BasicBlock*> CloneWorklist; - CloneWorklist.push_back(&OldFunc->getEntryBlock()); + PFC.CloneBlock(StartingBB, StartingInst, CloneWorklist); while (!CloneWorklist.empty()) { const BasicBlock *BB = CloneWorklist.back(); CloneWorklist.pop_back(); - PFC.CloneBlock(BB, CloneWorklist); + PFC.CloneBlock(BB, BB->begin(), CloneWorklist); } // Loop over all of the basic blocks in the old function. If the block was @@ -470,7 +532,8 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, // Finally, remap the terminator instructions, as those can't be remapped // until all BBs are mapped. RemapInstruction(NewBB->getTerminator(), VMap, - ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, + TypeMapper, Materializer); } // Defer PHI resolution until rest of function is resolved, PHI resolution @@ -569,7 +632,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, // and zap unconditional fall-through branches. This happen all the time when // specializing code: code specialization turns conditional branches into // uncond branches, and this code folds them. - Function::iterator Begin = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]); + Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB]); Function::iterator I = Begin; while (I != NewFunc->end()) { // Check if this block has become dead during inlining or other @@ -620,9 +683,30 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, // Make a final pass over the basic blocks from theh old function to gather // any return instructions which survived folding. We have to do this here // because we can iteratively remove and merge returns above. - for (Function::iterator I = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]), + for (Function::iterator I = cast<BasicBlock>(VMap[StartingBB]), E = NewFunc->end(); I != E; ++I) if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator())) Returns.push_back(RI); } + + +/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto, +/// except that it does some simple constant prop and DCE on the fly. The +/// effect of this is to copy significantly less code in cases where (for +/// example) a function call with constant arguments is inlined, and those +/// constant arguments cause a significant amount of code in the callee to be +/// dead. Since this doesn't produce an exact copy of the input, it can't be +/// used for things like CloneFunction or CloneModule. +void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, + ValueToValueMapTy &VMap, + bool ModuleLevelChanges, + SmallVectorImpl<ReturnInst*> &Returns, + const char *NameSuffix, + ClonedCodeInfo *CodeInfo, + const DataLayout *DL, + Instruction *TheCall) { + CloneAndPruneIntoFromInst(NewFunc, OldFunc, OldFunc->front().begin(), + VMap, ModuleLevelChanges, Returns, NameSuffix, + CodeInfo, DL, nullptr); +} diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp index d078c96..fae9ff5 100644 --- a/lib/Transforms/Utils/CloneModule.cpp +++ b/lib/Transforms/Utils/CloneModule.cpp @@ -109,7 +109,7 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { I != E; ++I) { GlobalAlias *GA = cast<GlobalAlias>(VMap[I]); if (const Constant *C = I->getAliasee()) - GA->setAliasee(cast<GlobalObject>(MapValue(C, VMap))); + GA->setAliasee(MapValue(C, VMap)); } // And named metadata.... @@ -118,7 +118,7 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { const NamedMDNode &NMD = *I; NamedMDNode *NewNMD = New->getOrInsertNamedMetadata(NMD.getName()); for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i) - NewNMD->addOperand(MapValue(NMD.getOperand(i), VMap)); + NewNMD->addOperand(MapMetadata(NMD.getOperand(i), VMap)); } return New; diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp index 9972b22..003da58 100644 --- a/lib/Transforms/Utils/DemoteRegToStack.cpp +++ b/lib/Transforms/Utils/DemoteRegToStack.cpp @@ -39,6 +39,19 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, F->getEntryBlock().begin()); } + // We cannot demote invoke instructions to the stack if their normal edge + // is critical. Therefore, split the critical edge and create a basic block + // into which the store can be inserted. + if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) { + if (!II->getNormalDest()->getSinglePredecessor()) { + unsigned SuccNum = GetSuccessorNumber(II->getParent(), II->getNormalDest()); + assert(isCriticalEdge(II, SuccNum) && "Expected a critical edge!"); + BasicBlock *BB = SplitCriticalEdge(II, SuccNum); + assert(BB && "Unable to split critical edge."); + (void)BB; + } + } + // Change all of the users of the instruction to read from the stack slot. while (!I.use_empty()) { Instruction *U = cast<Instruction>(I.user_back()); @@ -71,7 +84,6 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, } } - // Insert stores of the computed value into the stack slot. We have to be // careful if I is an invoke instruction, because we can't insert the store // AFTER the terminator instruction. @@ -79,27 +91,13 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, if (!isa<TerminatorInst>(I)) { InsertPt = &I; ++InsertPt; + for (; isa<PHINode>(InsertPt) || isa<LandingPadInst>(InsertPt); ++InsertPt) + /* empty */; // Don't insert before PHI nodes or landingpad instrs. } else { InvokeInst &II = cast<InvokeInst>(I); - if (II.getNormalDest()->getSinglePredecessor()) - InsertPt = II.getNormalDest()->getFirstInsertionPt(); - else { - // We cannot demote invoke instructions to the stack if their normal edge - // is critical. Therefore, split the critical edge and insert the store - // in the newly created basic block. - unsigned SuccNum = GetSuccessorNumber(I.getParent(), II.getNormalDest()); - TerminatorInst *TI = &cast<TerminatorInst>(I); - assert (isCriticalEdge(TI, SuccNum) && - "Expected a critical edge!"); - BasicBlock *BB = SplitCriticalEdge(TI, SuccNum); - assert (BB && "Unable to split critical edge."); - InsertPt = BB->getFirstInsertionPt(); - } + InsertPt = II.getNormalDest()->getFirstInsertionPt(); } - for (; isa<PHINode>(InsertPt) || isa<LandingPadInst>(InsertPt); ++InsertPt) - /* empty */; // Don't insert before PHI nodes or landingpad instrs. - new StoreInst(&I, Slot, InsertPt); return Slot; } diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 2d0b7dc..c2ef1ac 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -18,7 +18,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/AssumptionTracker.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -30,6 +30,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DIBuilder.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" @@ -308,7 +309,7 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) { // Walk the existing metadata, adding the complete (perhaps cyclic) chain to // the set. - SmallVector<const Value *, 16> Queue(MD.begin(), MD.end()); + SmallVector<const Metadata *, 16> Queue(MD.begin(), MD.end()); while (!Queue.empty()) { const MDNode *M = cast<MDNode>(Queue.pop_back_val()); for (unsigned i = 0, ie = M->getNumOperands(); i != ie; ++i) @@ -319,13 +320,12 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) { // Now we have a complete set of all metadata in the chains used to specify // the noalias scopes and the lists of those scopes. - SmallVector<MDNode *, 16> DummyNodes; - DenseMap<const MDNode *, TrackingVH<MDNode> > MDMap; + SmallVector<TempMDTuple, 16> DummyNodes; + DenseMap<const MDNode *, TrackingMDNodeRef> MDMap; for (SetVector<const MDNode *>::iterator I = MD.begin(), IE = MD.end(); I != IE; ++I) { - MDNode *Dummy = MDNode::getTemporary(CalledFunc->getContext(), None); - DummyNodes.push_back(Dummy); - MDMap[*I] = Dummy; + DummyNodes.push_back(MDTuple::getTemporary(CalledFunc->getContext(), None)); + MDMap[*I].reset(DummyNodes.back().get()); } // Create new metadata nodes to replace the dummy nodes, replacing old @@ -333,17 +333,18 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) { // node. for (SetVector<const MDNode *>::iterator I = MD.begin(), IE = MD.end(); I != IE; ++I) { - SmallVector<Value *, 4> NewOps; + SmallVector<Metadata *, 4> NewOps; for (unsigned i = 0, ie = (*I)->getNumOperands(); i != ie; ++i) { - const Value *V = (*I)->getOperand(i); + const Metadata *V = (*I)->getOperand(i); if (const MDNode *M = dyn_cast<MDNode>(V)) NewOps.push_back(MDMap[M]); else - NewOps.push_back(const_cast<Value *>(V)); + NewOps.push_back(const_cast<Metadata *>(V)); } - MDNode *NewM = MDNode::get(CalledFunc->getContext(), NewOps), - *TempM = MDMap[*I]; + MDNode *NewM = MDNode::get(CalledFunc->getContext(), NewOps); + MDTuple *TempM = cast<MDTuple>(MDMap[*I]); + assert(TempM->isTemporary() && "Expected temporary node"); TempM->replaceAllUsesWith(NewM); } @@ -388,10 +389,6 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) { NI->setMetadata(LLVMContext::MD_noalias, M); } } - - // Now that everything has been replaced, delete the dummy nodes. - for (unsigned i = 0, ie = DummyNodes.size(); i != ie; ++i) - MDNode::deleteTemporary(DummyNodes[i]); } /// AddAliasScopeMetadata - If the inlined function has noalias arguments, then @@ -516,7 +513,7 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap, // need to go through several PHIs to see it, and thus could be // repeated in the Objects list. SmallPtrSet<const Value *, 4> ObjSet; - SmallVector<Value *, 4> Scopes, NoAliases; + SmallVector<Metadata *, 4> Scopes, NoAliases; SmallSetVector<const Argument *, 4> NAPtrArgs; for (unsigned i = 0, ie = PtrArgs.size(); i != ie; ++i) { @@ -633,9 +630,10 @@ static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) { DominatorTree DT; bool DTCalculated = false; - const Function *CalledFunc = CS.getCalledFunction(); - for (Function::const_arg_iterator I = CalledFunc->arg_begin(), - E = CalledFunc->arg_end(); I != E; ++I) { + Function *CalledFunc = CS.getCalledFunction(); + for (Function::arg_iterator I = CalledFunc->arg_begin(), + E = CalledFunc->arg_end(); + I != E; ++I) { unsigned Align = I->getType()->isPointerTy() ? I->getParamAlignment() : 0; if (Align && !I->hasByValOrInAllocaAttr() && !I->hasNUses(0)) { if (!DTCalculated) { @@ -647,8 +645,9 @@ static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) { // If we can already prove the asserted alignment in the context of the // caller, then don't bother inserting the assumption. Value *Arg = CS.getArgument(I->getArgNo()); - if (getKnownAlignment(Arg, IFI.DL, IFI.AT, CS.getInstruction(), - &DT) >= Align) + if (getKnownAlignment(Arg, IFI.DL, + &IFI.ACT->getAssumptionCache(*CalledFunc), + CS.getInstruction(), &DT) >= Align) continue; IRBuilder<>(CS.getInstruction()).CreateAlignmentAssumption(*IFI.DL, Arg, @@ -748,6 +747,8 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, PointerType *ArgTy = cast<PointerType>(Arg->getType()); Type *AggTy = ArgTy->getElementType(); + Function *Caller = TheCall->getParent()->getParent(); + // If the called function is readonly, then it could not mutate the caller's // copy of the byval'd memory. In this case, it is safe to elide the copy and // temporary. @@ -760,8 +761,9 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, // If the pointer is already known to be sufficiently aligned, or if we can // round it up to a larger alignment, then we don't need a temporary. - if (getOrEnforceKnownAlignment(Arg, ByValAlignment, - IFI.DL, IFI.AT, TheCall) >= ByValAlignment) + if (getOrEnforceKnownAlignment(Arg, ByValAlignment, IFI.DL, + &IFI.ACT->getAssumptionCache(*Caller), + TheCall) >= ByValAlignment) return Arg; // Otherwise, we have to make a memcpy to get a safe alignment. This is bad @@ -778,8 +780,6 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, // pointer inside the callee). Align = std::max(Align, ByValAlignment); - Function *Caller = TheCall->getParent()->getParent(); - Value *NewAlloca = new AllocaInst(AggTy, nullptr, Align, Arg->getName(), &*Caller->begin()->begin()); IFI.StaticAllocas.push_back(cast<AllocaInst>(NewAlloca)); @@ -824,20 +824,42 @@ static bool hasLifetimeMarkers(AllocaInst *AI) { return false; } -/// updateInlinedAtInfo - Helper function used by fixupLineNumbers to -/// recursively update InlinedAtEntry of a DebugLoc. -static DebugLoc updateInlinedAtInfo(const DebugLoc &DL, - const DebugLoc &InlinedAtDL, - LLVMContext &Ctx) { - if (MDNode *IA = DL.getInlinedAt(Ctx)) { - DebugLoc NewInlinedAtDL - = updateInlinedAtInfo(DebugLoc::getFromDILocation(IA), InlinedAtDL, Ctx); - return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx), - NewInlinedAtDL.getAsMDNode(Ctx)); +/// Rebuild the entire inlined-at chain for this instruction so that the top of +/// the chain now is inlined-at the new call site. +static DebugLoc +updateInlinedAtInfo(DebugLoc DL, MDLocation *InlinedAtNode, + LLVMContext &Ctx, + DenseMap<const MDLocation *, MDLocation *> &IANodes) { + SmallVector<MDLocation*, 3> InlinedAtLocations; + MDLocation *Last = InlinedAtNode; + DebugLoc CurInlinedAt = DL; + + // Gather all the inlined-at nodes + while (MDLocation *IA = + cast_or_null<MDLocation>(CurInlinedAt.getInlinedAt(Ctx))) { + // Skip any we've already built nodes for + if (MDLocation *Found = IANodes[IA]) { + Last = Found; + break; + } + + InlinedAtLocations.push_back(IA); + CurInlinedAt = DebugLoc::getFromDILocation(IA); + } + + // Starting from the top, rebuild the nodes to point to the new inlined-at + // location (then rebuilding the rest of the chain behind it) and update the + // map of already-constructed inlined-at nodes. + for (auto I = InlinedAtLocations.rbegin(), E = InlinedAtLocations.rend(); + I != E; ++I) { + const MDLocation *MD = *I; + Last = IANodes[MD] = MDLocation::getDistinct( + Ctx, MD->getLine(), MD->getColumn(), MD->getScope(), Last); } - return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx), - InlinedAtDL.getAsMDNode(Ctx)); + // And finally create the normal location for this instruction, referring to + // the new inlined-at chain. + return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx), Last); } /// fixupLineNumbers - Update inlined instructions' line numbers to @@ -848,6 +870,20 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI, if (TheCallDL.isUnknown()) return; + auto &Ctx = Fn->getContext(); + auto *InlinedAtNode = cast<MDLocation>(TheCallDL.getAsMDNode(Ctx)); + + // Create a unique call site, not to be confused with any other call from the + // same location. + InlinedAtNode = MDLocation::getDistinct( + Ctx, InlinedAtNode->getLine(), InlinedAtNode->getColumn(), + InlinedAtNode->getScope(), InlinedAtNode->getInlinedAt()); + + // Cache the inlined-at nodes as they're built so they are reused, without + // this every instruction's inlined-at chain would become distinct from each + // other. + DenseMap<const MDLocation *, MDLocation *> IANodes; + for (; FI != Fn->end(); ++FI) { for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) { @@ -865,12 +901,19 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI, BI->setDebugLoc(TheCallDL); } else { - BI->setDebugLoc(updateInlinedAtInfo(DL, TheCallDL, BI->getContext())); + BI->setDebugLoc(updateInlinedAtInfo(DL, InlinedAtNode, BI->getContext(), IANodes)); if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(BI)) { LLVMContext &Ctx = BI->getContext(); MDNode *InlinedAt = BI->getDebugLoc().getInlinedAt(Ctx); - DVI->setOperand(2, createInlinedVariable(DVI->getVariable(), - InlinedAt, Ctx)); + DVI->setOperand(2, MetadataAsValue::get( + Ctx, createInlinedVariable(DVI->getVariable(), + InlinedAt, Ctx))); + } else if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) { + LLVMContext &Ctx = BI->getContext(); + MDNode *InlinedAt = BI->getDebugLoc().getInlinedAt(Ctx); + DDI->setOperand(1, MetadataAsValue::get( + Ctx, createInlinedVariable(DDI->getVariable(), + InlinedAt, Ctx))); } } } @@ -1026,8 +1069,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // FIXME: We could register any cloned assumptions instead of clearing the // whole function's cache. - if (IFI.AT) - IFI.AT->forgetCachedAssumptions(Caller); + if (IFI.ACT) + IFI.ACT->getAssumptionCache(*Caller).clear(); } // If there are any alloca instructions in the block that used to be the entry @@ -1069,6 +1112,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, FirstNewBlock->getInstList(), AI, I); } + // Move any dbg.declares describing the allocas into the entry basic block. + DIBuilder DIB(*Caller->getParent()); + for (auto &AI : IFI.StaticAllocas) + replaceDbgDeclareForAlloca(AI, AI, DIB, /*Deref=*/false); } bool InlinedMustTailCalls = false; @@ -1398,7 +1445,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // the entries are the same or undef). If so, remove the PHI so it doesn't // block other optimizations. if (PHI) { - if (Value *V = SimplifyInstruction(PHI, IFI.DL, nullptr, nullptr, IFI.AT)) { + if (Value *V = SimplifyInstruction(PHI, IFI.DL, nullptr, nullptr, + &IFI.ACT->getAssumptionCache(*Caller))) { PHI->replaceAllUsesWith(V); PHI->eraseFromParent(); } diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp index 51a3d9c..1cba367 100644 --- a/lib/Transforms/Utils/LCSSA.cpp +++ b/lib/Transforms/Utils/LCSSA.cpp @@ -61,7 +61,7 @@ static bool isExitBlock(BasicBlock *BB, /// uses. static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT, const SmallVectorImpl<BasicBlock *> &ExitBlocks, - PredIteratorCache &PredCache) { + PredIteratorCache &PredCache, LoopInfo *LI) { SmallVector<Use *, 16> UsesToRewrite; BasicBlock *InstBB = Inst.getParent(); @@ -94,6 +94,7 @@ static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT, DomTreeNode *DomNode = DT.getNode(DomBB); SmallVector<PHINode *, 16> AddedPHIs; + SmallVector<PHINode *, 8> PostProcessPHIs; SSAUpdater SSAUpdate; SSAUpdate.Initialize(Inst.getType(), Inst.getName()); @@ -131,6 +132,18 @@ static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT, // Remember that this phi makes the value alive in this block. SSAUpdate.AddAvailableValue(ExitBB, PN); + + // LoopSimplify might fail to simplify some loops (e.g. when indirect + // branches are involved). In such situations, it might happen that an exit + // for Loop L1 is the header of a disjoint Loop L2. Thus, when we create + // PHIs in such an exit block, we are also inserting PHIs into L2's header. + // This could break LCSSA form for L2 because these inserted PHIs can also + // have uses outside of L2. Remember all PHIs in such situation as to + // revisit than later on. FIXME: Remove this if indirectbr support into + // LoopSimplify gets improved. + if (auto *OtherLoop = LI->getLoopFor(ExitBB)) + if (!L.contains(OtherLoop)) + PostProcessPHIs.push_back(PN); } // Rewrite all uses outside the loop in terms of the new PHIs we just @@ -157,6 +170,25 @@ static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT, SSAUpdate.RewriteUse(*UsesToRewrite[i]); } + // Post process PHI instructions that were inserted into another disjoint loop + // and update their exits properly. + for (auto *I : PostProcessPHIs) { + if (I->use_empty()) + continue; + + BasicBlock *PHIBB = I->getParent(); + Loop *OtherLoop = LI->getLoopFor(PHIBB); + SmallVector<BasicBlock *, 8> EBs; + OtherLoop->getExitBlocks(EBs); + if (EBs.empty()) + continue; + + // Recurse and re-process each PHI instruction. FIXME: we should really + // convert this entire thing to a worklist approach where we process a + // vector of instructions... + processInstruction(*OtherLoop, *I, DT, EBs, PredCache, LI); + } + // Remove PHI nodes that did not have any uses rewritten. for (unsigned i = 0, e = AddedPHIs.size(); i != e; ++i) { if (AddedPHIs[i]->use_empty()) @@ -180,7 +212,8 @@ blockDominatesAnExit(BasicBlock *BB, return false; } -bool llvm::formLCSSA(Loop &L, DominatorTree &DT, ScalarEvolution *SE) { +bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI, + ScalarEvolution *SE) { bool Changed = false; // Get the set of exiting blocks. @@ -212,7 +245,7 @@ bool llvm::formLCSSA(Loop &L, DominatorTree &DT, ScalarEvolution *SE) { !isa<PHINode>(I->user_back()))) continue; - Changed |= processInstruction(L, *I, DT, ExitBlocks, PredCache); + Changed |= processInstruction(L, *I, DT, ExitBlocks, PredCache, LI); } } @@ -228,15 +261,15 @@ bool llvm::formLCSSA(Loop &L, DominatorTree &DT, ScalarEvolution *SE) { } /// Process a loop nest depth first. -bool llvm::formLCSSARecursively(Loop &L, DominatorTree &DT, +bool llvm::formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution *SE) { bool Changed = false; // Recurse depth-first through inner loops. - for (Loop::iterator LI = L.begin(), LE = L.end(); LI != LE; ++LI) - Changed |= formLCSSARecursively(**LI, DT, SE); + for (Loop::iterator I = L.begin(), E = L.end(); I != E; ++I) + Changed |= formLCSSARecursively(**I, DT, LI, SE); - Changed |= formLCSSA(L, DT, SE); + Changed |= formLCSSA(L, DT, LI, SE); return Changed; } @@ -261,7 +294,7 @@ struct LCSSA : public FunctionPass { AU.setPreservesCFG(); AU.addRequired<DominatorTreeWrapperPass>(); - AU.addRequired<LoopInfo>(); + AU.addRequired<LoopInfoWrapperPass>(); AU.addPreservedID(LoopSimplifyID); AU.addPreserved<AliasAnalysis>(); AU.addPreserved<ScalarEvolution>(); @@ -275,7 +308,7 @@ private: char LCSSA::ID = 0; INITIALIZE_PASS_BEGIN(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_END(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false) Pass *llvm::createLCSSAPass() { return new LCSSA(); } @@ -285,13 +318,13 @@ char &llvm::LCSSAID = LCSSA::ID; /// Process all loops in the function, inner-most out. bool LCSSA::runOnFunction(Function &F) { bool Changed = false; - LI = &getAnalysis<LoopInfo>(); + LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); SE = getAnalysisIfAvailable<ScalarEvolution>(); // Simplify each loop nest in the function. for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) - Changed |= formLCSSARecursively(**I, *DT, SE); + Changed |= formLCSSARecursively(**I, *DT, LI, SE); return Changed; } diff --git a/lib/Transforms/Utils/LLVMBuild.txt b/lib/Transforms/Utils/LLVMBuild.txt index 88b2ffe..6b2d405 100644 --- a/lib/Transforms/Utils/LLVMBuild.txt +++ b/lib/Transforms/Utils/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = TransformUtils parent = Transforms -required_libraries = Analysis Core IPA Support Target +required_libraries = Analysis Core IPA Support diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index c963c51..4830568 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/LibCallSemantics.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ValueTracking.h" @@ -110,11 +111,17 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, } if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) { - // If we are switching on a constant, we can convert the switch into a - // single branch instruction! + // If we are switching on a constant, we can convert the switch to an + // unconditional branch. ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition()); - BasicBlock *TheOnlyDest = SI->getDefaultDest(); - BasicBlock *DefaultDest = TheOnlyDest; + BasicBlock *DefaultDest = SI->getDefaultDest(); + BasicBlock *TheOnlyDest = DefaultDest; + + // If the default is unreachable, ignore it when searching for TheOnlyDest. + if (isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg()) && + SI->getNumCases() > 0) { + TheOnlyDest = SI->case_begin().getCaseSuccessor(); + } // Figure out which case it goes to. for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); @@ -137,7 +144,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, SmallVector<uint32_t, 8> Weights; for (unsigned MD_i = 1, MD_e = MD->getNumOperands(); MD_i < MD_e; ++MD_i) { - ConstantInt* CI = dyn_cast<ConstantInt>(MD->getOperand(MD_i)); + ConstantInt *CI = + mdconst::dyn_extract<ConstantInt>(MD->getOperand(MD_i)); assert(CI); Weights.push_back(CI->getValue().getZExtValue()); } @@ -208,8 +216,10 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, SI->getDefaultDest()); MDNode *MD = SI->getMetadata(LLVMContext::MD_prof); if (MD && MD->getNumOperands() == 3) { - ConstantInt *SICase = dyn_cast<ConstantInt>(MD->getOperand(2)); - ConstantInt *SIDef = dyn_cast<ConstantInt>(MD->getOperand(1)); + ConstantInt *SICase = + mdconst::dyn_extract<ConstantInt>(MD->getOperand(2)); + ConstantInt *SIDef = + mdconst::dyn_extract<ConstantInt>(MD->getOperand(1)); assert(SICase && SIDef); // The TrueWeight should be the weight for the single case of SI. NewBr->setMetadata(LLVMContext::MD_prof, @@ -486,7 +496,7 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred, /// between them, moving the instructions in the predecessor into DestBB and /// deleting the predecessor block. /// -void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) { +void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT) { // If BB has single-entry PHI nodes, fold them. while (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) { Value *NewVal = PN->getIncomingValue(0); @@ -522,14 +532,10 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) { if (PredBB == &DestBB->getParent()->getEntryBlock()) DestBB->moveAfter(PredBB); - if (P) { - if (DominatorTreeWrapperPass *DTWP = - P->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) { - DominatorTree &DT = DTWP->getDomTree(); - BasicBlock *PredBBIDom = DT.getNode(PredBB)->getIDom()->getBlock(); - DT.changeImmediateDominator(DestBB, PredBBIDom); - DT.eraseNode(PredBB); - } + if (DT) { + BasicBlock *PredBBIDom = DT->getNode(PredBB)->getIDom()->getBlock(); + DT->changeImmediateDominator(DestBB, PredBBIDom); + DT->eraseNode(PredBB); } // Nuke BB. PredBB->eraseFromParent(); @@ -940,7 +946,7 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align, /// increase the alignment of the ultimate object, making this check succeed. unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, const DataLayout *DL, - AssumptionTracker *AT, + AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { assert(V->getType()->isPointerTy() && @@ -948,7 +954,7 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, unsigned BitWidth = DL ? DL->getPointerTypeSizeInBits(V->getType()) : 64; APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(V, KnownZero, KnownOne, DL, 0, AT, CxtI, DT); + computeKnownBits(V, KnownZero, KnownOne, DL, 0, AC, CxtI, DT); unsigned TrailZ = KnownZero.countTrailingOnes(); // Avoid trouble with ridiculously large TrailZ values, such as @@ -1048,7 +1054,7 @@ static bool isArray(AllocaInst *AI) { /// LowerDbgDeclare - Lowers llvm.dbg.declare intrinsics into appropriate set /// of llvm.dbg.value intrinsics. bool llvm::LowerDbgDeclare(Function &F) { - DIBuilder DIB(*F.getParent()); + DIBuilder DIB(*F.getParent(), /*AllowUnresolved*/ false); SmallVector<DbgDeclareInst *, 4> Dbgs; for (auto &FI : F) for (BasicBlock::iterator BI : FI) @@ -1091,19 +1097,21 @@ bool llvm::LowerDbgDeclare(Function &F) { /// FindAllocaDbgDeclare - Finds the llvm.dbg.declare intrinsic describing the /// alloca 'V', if any. DbgDeclareInst *llvm::FindAllocaDbgDeclare(Value *V) { - if (MDNode *DebugNode = MDNode::getIfExists(V->getContext(), V)) - for (User *U : DebugNode->users()) - if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U)) - return DDI; + if (auto *L = LocalAsMetadata::getIfExists(V)) + if (auto *MDV = MetadataAsValue::getIfExists(V->getContext(), L)) + for (User *U : MDV->users()) + if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U)) + return DDI; return nullptr; } bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress, - DIBuilder &Builder) { + DIBuilder &Builder, bool Deref) { DbgDeclareInst *DDI = FindAllocaDbgDeclare(AI); if (!DDI) return false; + DebugLoc Loc = DDI->getDebugLoc(); DIVariable DIVar(DDI->getVariable()); DIExpression DIExpr(DDI->getExpression()); assert((!DIVar || DIVar.isVariable()) && @@ -1111,23 +1119,24 @@ bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress, if (!DIVar) return false; - // Create a copy of the original DIDescriptor for user variable, appending - // "deref" operation to a list of address elements, as new llvm.dbg.declare - // will take a value storing address of the memory for variable, not - // alloca itself. - SmallVector<int64_t, 4> NewDIExpr; - if (DIExpr) { - for (unsigned i = 0, n = DIExpr.getNumElements(); i < n; ++i) { - NewDIExpr.push_back(DIExpr.getElement(i)); - } + if (Deref) { + // Create a copy of the original DIDescriptor for user variable, prepending + // "deref" operation to a list of address elements, as new llvm.dbg.declare + // will take a value storing address of the memory for variable, not + // alloca itself. + SmallVector<uint64_t, 4> NewDIExpr; + NewDIExpr.push_back(dwarf::DW_OP_deref); + if (DIExpr) + for (unsigned i = 0, n = DIExpr.getNumElements(); i < n; ++i) + NewDIExpr.push_back(DIExpr.getElement(i)); + DIExpr = Builder.createExpression(NewDIExpr); } - NewDIExpr.push_back(dwarf::DW_OP_deref); // Insert llvm.dbg.declare in the same basic block as the original alloca, // and remove old llvm.dbg.declare. BasicBlock *BB = AI->getParent(); - Builder.insertDeclare(NewAllocaAddress, DIVar, - Builder.createExpression(NewDIExpr), BB); + Builder.insertDeclare(NewAllocaAddress, DIVar, DIExpr, BB) + ->setDebugLoc(Loc); DDI->eraseFromParent(); return true; } @@ -1252,7 +1261,7 @@ static bool markAliveBlocks(BasicBlock *BB, if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) { changeToUnreachable(II, true); Changed = true; - } else if (II->doesNotThrow()) { + } else if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(II)) { if (II->use_empty() && II->onlyReadsMemory()) { // jump to the normal destination branch. BranchInst::Create(II->getNormalDest(), II); @@ -1326,6 +1335,8 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J, ArrayRef<unsign K->setMetadata(Kind, MDNode::getMostGenericTBAA(JMD, KMD)); break; case LLVMContext::MD_alias_scope: + K->setMetadata(Kind, MDNode::getMostGenericAliasScope(JMD, KMD)); + break; case LLVMContext::MD_noalias: K->setMetadata(Kind, MDNode::intersect(JMD, KMD)); break; diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index af0501f..a0f8268 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -44,7 +44,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/AssumptionTracker.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" @@ -113,6 +113,14 @@ static void placeSplitBlockCarefully(BasicBlock *NewBB, BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) { BasicBlock *Header = L->getHeader(); + // Get analyses that we try to update. + auto *AA = PP->getAnalysisIfAvailable<AliasAnalysis>(); + auto *DTWP = PP->getAnalysisIfAvailable<DominatorTreeWrapperPass>(); + auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; + auto *LIWP = PP->getAnalysisIfAvailable<LoopInfoWrapperPass>(); + auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; + bool PreserveLCSSA = PP->mustPreserveAnalysisID(LCSSAID); + // Compute the set of predecessors of the loop that are not in the loop. SmallVector<BasicBlock*, 8> OutsideBlocks; for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header); @@ -131,15 +139,8 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) { // Split out the loop pre-header. BasicBlock *PreheaderBB; - if (!Header->isLandingPad()) { - PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader", - PP); - } else { - SmallVector<BasicBlock*, 2> NewBBs; - SplitLandingPadPredecessors(Header, OutsideBlocks, ".preheader", - ".split-lp", PP, NewBBs); - PreheaderBB = NewBBs[0]; - } + PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader", + AA, DT, LI, PreserveLCSSA); PreheaderBB->getTerminator()->setDebugLoc( Header->getFirstNonPHI()->getDebugLoc()); @@ -157,7 +158,9 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) { /// /// This method is used to split exit blocks that have predecessors outside of /// the loop. -static BasicBlock *rewriteLoopExitBlock(Loop *L, BasicBlock *Exit, Pass *PP) { +static BasicBlock *rewriteLoopExitBlock(Loop *L, BasicBlock *Exit, + AliasAnalysis *AA, DominatorTree *DT, + LoopInfo *LI, Pass *PP) { SmallVector<BasicBlock*, 8> LoopBlocks; for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) { BasicBlock *P = *I; @@ -172,15 +175,10 @@ static BasicBlock *rewriteLoopExitBlock(Loop *L, BasicBlock *Exit, Pass *PP) { assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?"); BasicBlock *NewExitBB = nullptr; - if (Exit->isLandingPad()) { - SmallVector<BasicBlock*, 2> NewBBs; - SplitLandingPadPredecessors(Exit, LoopBlocks, - ".loopexit", ".nonloopexit", - PP, NewBBs); - NewExitBB = NewBBs[0]; - } else { - NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", PP); - } + bool PreserveLCSSA = PP->mustPreserveAnalysisID(LCSSAID); + + NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", AA, DT, + LI, PreserveLCSSA); DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block " << NewExitBB->getName() << "\n"); @@ -210,11 +208,11 @@ static void addBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock, /// us how to partition the loops. static PHINode *findPHIToPartitionLoops(Loop *L, AliasAnalysis *AA, DominatorTree *DT, - AssumptionTracker *AT) { + AssumptionCache *AC) { for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) { PHINode *PN = cast<PHINode>(I); ++I; - if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, DT, AT)) { + if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, DT, AC)) { // This is a degenerate PHI already, don't modify it! PN->replaceAllUsesWith(V); if (AA) AA->deleteValue(PN); @@ -254,7 +252,7 @@ static PHINode *findPHIToPartitionLoops(Loop *L, AliasAnalysis *AA, static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader, AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, Pass *PP, - AssumptionTracker *AT) { + AssumptionCache *AC) { // Don't try to separate loops without a preheader. if (!Preheader) return nullptr; @@ -263,7 +261,7 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader, assert(!L->getHeader()->isLandingPad() && "Can't insert backedge to landing pad"); - PHINode *PN = findPHIToPartitionLoops(L, AA, DT, AT); + PHINode *PN = findPHIToPartitionLoops(L, AA, DT, AC); if (!PN) return nullptr; // No known way to partition. // Pull out all predecessors that have varying values in the loop. This @@ -287,9 +285,11 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader, if (SE) SE->forgetLoop(L); + bool PreserveLCSSA = PP->mustPreserveAnalysisID(LCSSAID); + BasicBlock *Header = L->getHeader(); - BasicBlock *NewBB = - SplitBlockPredecessors(Header, OuterLoopPreds, ".outer", PP); + BasicBlock *NewBB = SplitBlockPredecessors(Header, OuterLoopPreds, ".outer", + AA, DT, LI, PreserveLCSSA); // Make sure that NewBB is put someplace intelligent, which doesn't mess up // code layout too horribly. @@ -460,7 +460,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, // Update Loop Information - we know that this block is now in the current // loop and all parent loops. - L->addBasicBlockToLoop(BEBlock, LI->getBase()); + L->addBasicBlockToLoop(BEBlock, *LI); // Update dominator information DT->splitBlock(BEBlock); @@ -476,8 +476,8 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, /// explicit if they accepted the analysis directly and then updated it. static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist, AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI, - ScalarEvolution *SE, Pass *PP, - const DataLayout *DL, AssumptionTracker *AT) { + ScalarEvolution *SE, Pass *PP, const DataLayout *DL, + AssumptionCache *AC) { bool Changed = false; ReprocessLoop: @@ -567,7 +567,7 @@ ReprocessLoop: // Must be exactly this loop: no subloops, parent loops, or non-loop preds // allowed. if (!L->contains(*PI)) { - if (rewriteLoopExitBlock(L, ExitBlock, PP)) { + if (rewriteLoopExitBlock(L, ExitBlock, AA, DT, LI, PP)) { ++NumInserted; Changed = true; } @@ -583,8 +583,8 @@ ReprocessLoop: // this for loops with a giant number of backedges, just factor them into a // common backedge instead. if (L->getNumBackEdges() < 8) { - if (Loop *OuterL = separateNestedLoop(L, Preheader, AA, DT, LI, SE, - PP, AT)) { + if (Loop *OuterL = + separateNestedLoop(L, Preheader, AA, DT, LI, SE, PP, AC)) { ++NumNested; // Enqueue the outer loop as it should be processed next in our // depth-first nest walk. @@ -614,7 +614,7 @@ ReprocessLoop: PHINode *PN; for (BasicBlock::iterator I = L->getHeader()->begin(); (PN = dyn_cast<PHINode>(I++)); ) - if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, DT, AT)) { + if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, DT, AC)) { if (AA) AA->deleteValue(PN); if (SE) SE->forgetValue(PN); PN->replaceAllUsesWith(V); @@ -714,7 +714,7 @@ ReprocessLoop: bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP, AliasAnalysis *AA, ScalarEvolution *SE, - const DataLayout *DL, AssumptionTracker *AT) { + const DataLayout *DL, AssumptionCache *AC) { bool Changed = false; // Worklist maintains our depth-first queue of loops in this nest to process. @@ -726,13 +726,12 @@ bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP, // order. We can use this simple process because loops form a tree. for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) { Loop *L2 = Worklist[Idx]; - for (Loop::iterator I = L2->begin(), E = L2->end(); I != E; ++I) - Worklist.push_back(*I); + Worklist.append(L2->begin(), L2->end()); } while (!Worklist.empty()) Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, AA, DT, LI, - SE, PP, DL, AT); + SE, PP, DL, AC); return Changed; } @@ -751,19 +750,19 @@ namespace { LoopInfo *LI; ScalarEvolution *SE; const DataLayout *DL; - AssumptionTracker *AT; + AssumptionCache *AC; bool runOnFunction(Function &F) override; void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<AssumptionTracker>(); + AU.addRequired<AssumptionCacheTracker>(); // We need loop information to identify the loops... AU.addRequired<DominatorTreeWrapperPass>(); AU.addPreserved<DominatorTreeWrapperPass>(); - AU.addRequired<LoopInfo>(); - AU.addPreserved<LoopInfo>(); + AU.addRequired<LoopInfoWrapperPass>(); + AU.addPreserved<LoopInfoWrapperPass>(); AU.addPreserved<AliasAnalysis>(); AU.addPreserved<ScalarEvolution>(); @@ -779,9 +778,9 @@ namespace { char LoopSimplify::ID = 0; INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify", "Canonicalize natural loops", false, false) -INITIALIZE_PASS_DEPENDENCY(AssumptionTracker) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_END(LoopSimplify, "loop-simplify", "Canonicalize natural loops", false, false) @@ -795,16 +794,16 @@ Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); } bool LoopSimplify::runOnFunction(Function &F) { bool Changed = false; AA = getAnalysisIfAvailable<AliasAnalysis>(); - LI = &getAnalysis<LoopInfo>(); + LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); SE = getAnalysisIfAvailable<ScalarEvolution>(); DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); DL = DLP ? &DLP->getDataLayout() : nullptr; - AT = &getAnalysis<AssumptionTracker>(); + AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); // Simplify each loop nest in the function. for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) - Changed |= simplifyLoop(*I, DT, LI, this, AA, SE, DL, AT); + Changed |= simplifyLoop(*I, DT, LI, this, AA, SE, DL, AC); return Changed; } diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index 0e1baa1..accb731 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -19,7 +19,7 @@ #include "llvm/Transforms/Utils/UnrollLoop.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AssumptionTracker.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/LoopPass.h" @@ -154,9 +154,8 @@ FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, LPPassManager *LPM, /// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are /// available from the Pass it must also preserve those analyses. bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, - bool AllowRuntime, unsigned TripMultiple, - LoopInfo *LI, Pass *PP, LPPassManager *LPM, - AssumptionTracker *AT) { + bool AllowRuntime, unsigned TripMultiple, LoopInfo *LI, + Pass *PP, LPPassManager *LPM, AssumptionCache *AC) { BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); @@ -312,7 +311,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, // Tell LI about New. if (*BB == Header) { assert(LI->getLoopFor(*BB) == L && "Header should not be in a sub-loop"); - L->addBasicBlockToLoop(New, LI->getBase()); + L->addBasicBlockToLoop(New, *LI); } else { // Figure out which loop New is in. const Loop *OldLoop = LI->getLoopFor(*BB); @@ -334,7 +333,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, if (SE) SE->forgetLoop(OldLoop); } - NewLoop->addBasicBlockToLoop(New, LI->getBase()); + NewLoop->addBasicBlockToLoop(New, *LI); } if (*BB == Header) @@ -473,7 +472,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, // FIXME: We could register any cloned assumptions instead of clearing the // whole function's cache. - AT->forgetCachedAssumptions(F); + AC->clear(); DominatorTree *DT = nullptr; if (PP) { @@ -534,7 +533,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, if (OuterL) { DataLayoutPass *DLP = PP->getAnalysisIfAvailable<DataLayoutPass>(); const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; - simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ nullptr, SE, DL, AT); + simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ nullptr, SE, DL, AC); // LCSSA must be performed on the outermost affected loop. The unrolled // loop's last loop latch is guaranteed to be in the outermost loop after @@ -544,9 +543,32 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, while (OuterL->getParentLoop() != LatchLoop) OuterL = OuterL->getParentLoop(); - formLCSSARecursively(*OuterL, *DT, SE); + formLCSSARecursively(*OuterL, *DT, LI, SE); } } return true; } + +/// Given an llvm.loop loop id metadata node, returns the loop hint metadata +/// node with the given name (for example, "llvm.loop.unroll.count"). If no +/// such metadata node exists, then nullptr is returned. +MDNode *llvm::GetUnrollMetadata(MDNode *LoopID, StringRef Name) { + // First operand should refer to the loop id itself. + assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); + assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); + + for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) { + MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); + if (!MD) + continue; + + MDString *S = dyn_cast<MDString>(MD->getOperand(0)); + if (!S) + continue; + + if (Name.equals(S->getString())) + return MD; + } + return nullptr; +} diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 3d91336..91b688c 100644 --- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -23,14 +23,17 @@ #include "llvm/Transforms/Utils/UnrollLoop.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Metadata.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include <algorithm> @@ -55,10 +58,11 @@ STATISTIC(NumRuntimeUnrolled, /// - Branch around the original loop if the trip count is less /// than the unroll factor. /// -static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count, +static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, BasicBlock *LastPrologBB, BasicBlock *PrologEnd, BasicBlock *OrigPH, BasicBlock *NewPH, - ValueToValueMapTy &VMap, Pass *P) { + ValueToValueMapTy &VMap, AliasAnalysis *AA, + DominatorTree *DT, LoopInfo *LI, Pass *P) { BasicBlock *Latch = L->getLoopLatch(); assert(Latch && "Loop must have a latch"); @@ -105,23 +109,25 @@ static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count, } } - // Create a branch around the orignal loop, which is taken if the - // trip count is less than the unroll factor. + // Create a branch around the orignal loop, which is taken if there are no + // iterations remaining to be executed after running the prologue. Instruction *InsertPt = PrologEnd->getTerminator(); + + assert(Count != 0 && "nonsensical Count!"); + + // If BECount <u (Count - 1) then (BECount + 1) & (Count - 1) == (BECount + 1) + // (since Count is a power of 2). This means %xtraiter is (BECount + 1) and + // and all of the iterations of this loop were executed by the prologue. Note + // that if BECount <u (Count - 1) then (BECount + 1) cannot unsigned-overflow. Instruction *BrLoopExit = - new ICmpInst(InsertPt, ICmpInst::ICMP_ULT, TripCount, - ConstantInt::get(TripCount->getType(), Count)); + new ICmpInst(InsertPt, ICmpInst::ICMP_ULT, BECount, + ConstantInt::get(BECount->getType(), Count - 1)); BasicBlock *Exit = L->getUniqueExitBlock(); assert(Exit && "Loop must have a single exit block only"); // Split the exit to maintain loop canonicalization guarantees SmallVector<BasicBlock*, 4> Preds(pred_begin(Exit), pred_end(Exit)); - if (!Exit->isLandingPad()) { - SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", P); - } else { - SmallVector<BasicBlock*, 2> NewBBs; - SplitLandingPadPredecessors(Exit, Preds, ".unr1-lcssa", ".unr2-lcssa", - P, NewBBs); - } + SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", AA, DT, LI, + P->mustPreserveAnalysisID(LCSSAID)); // Add the branch to the exit block (around the unrolled loop) BranchInst::Create(Exit, NewPH, BrLoopExit, InsertPt); InsertPt->eraseFromParent(); @@ -160,9 +166,9 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog, NewBlocks.push_back(NewBB); if (NewLoop) - NewLoop->addBasicBlockToLoop(NewBB, LI->getBase()); + NewLoop->addBasicBlockToLoop(NewBB, *LI); else if (ParentLoop) - ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase()); + ParentLoop->addBasicBlockToLoop(NewBB, *LI); VMap[*BB] = NewBB; if (Header == *BB) { @@ -217,9 +223,9 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog, } if (NewLoop) { // Add unroll disable metadata to disable future unrolling for this loop. - SmallVector<Value *, 4> Vals; + SmallVector<Metadata *, 4> MDs; // Reserve first location for self reference to the LoopID metadata node. - Vals.push_back(nullptr); + MDs.push_back(nullptr); MDNode *LoopID = NewLoop->getLoopID(); if (LoopID) { // First remove any existing loop unrolling metadata. @@ -230,17 +236,18 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog, const MDString *S = dyn_cast<MDString>(MD->getOperand(0)); IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll."); } - if (!IsUnrollMetadata) Vals.push_back(LoopID->getOperand(i)); + if (!IsUnrollMetadata) + MDs.push_back(LoopID->getOperand(i)); } } LLVMContext &Context = NewLoop->getHeader()->getContext(); - SmallVector<Value *, 1> DisableOperands; + SmallVector<Metadata *, 1> DisableOperands; DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable")); MDNode *DisableNode = MDNode::get(Context, DisableOperands); - Vals.push_back(DisableNode); + MDs.push_back(DisableNode); - MDNode *NewLoopID = MDNode::get(Context, Vals); + MDNode *NewLoopID = MDNode::get(Context, MDs); // Set operand 0 to refer to the loop id itself. NewLoopID->replaceOperandWith(0, NewLoopID); NewLoop->setLoopID(NewLoopID); @@ -291,23 +298,28 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, // Only unroll loops with a computable trip count and the trip count needs // to be an int value (allowing a pointer type is a TODO item) - const SCEV *BECount = SE->getBackedgeTakenCount(L); - if (isa<SCEVCouldNotCompute>(BECount) || !BECount->getType()->isIntegerTy()) + const SCEV *BECountSC = SE->getBackedgeTakenCount(L); + if (isa<SCEVCouldNotCompute>(BECountSC) || + !BECountSC->getType()->isIntegerTy()) return false; - // If BECount is INT_MAX, we can't compute trip-count without overflow. - if (BECount->isAllOnesValue()) - return false; + unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth(); // Add 1 since the backedge count doesn't include the first loop iteration const SCEV *TripCountSC = - SE->getAddExpr(BECount, SE->getConstant(BECount->getType(), 1)); + SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1)); if (isa<SCEVCouldNotCompute>(TripCountSC)) return false; // We only handle cases when the unroll factor is a power of 2. // Count is the loop unroll factor, the number of extra copies added + 1. - if ((Count & (Count-1)) != 0) + if (!isPowerOf2_32(Count)) + return false; + + // This constraint lets us deal with an overflowing trip count easily; see the + // comment on ModVal below. This check is equivalent to `Log2(Count) < + // BEWidth`. + if (static_cast<uint64_t>(Count) > (1ULL << BEWidth)) return false; // If this loop is nested, then the loop unroller changes the code in @@ -315,13 +327,17 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, if (Loop *ParentLoop = L->getParentLoop()) SE->forgetLoop(ParentLoop); + // Grab analyses that we preserve. + auto *DTWP = LPM->getAnalysisIfAvailable<DominatorTreeWrapperPass>(); + auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; + BasicBlock *PH = L->getLoopPreheader(); BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); // It helps to splits the original preheader twice, one for the end of the // prolog code and one for a new loop preheader - BasicBlock *PEnd = SplitEdge(PH, Header, LPM->getAsPass()); - BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), LPM->getAsPass()); + BasicBlock *PEnd = SplitEdge(PH, Header, DT, LI); + BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), DT, LI); BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator()); // Compute the number of extra iterations required, which is: @@ -329,16 +345,23 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, SCEVExpander Expander(*SE, "loop-unroll"); Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(), PreHeaderBR); + Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(), + PreHeaderBR); IRBuilder<> B(PreHeaderBR); Value *ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter"); - // Check if for no extra iterations, then jump to cloned/unrolled loop. - // We have to check that the trip count computation didn't overflow when - // adding one to the backedge taken count. - Value *LCmp = B.CreateIsNotNull(ModVal, "lcmp.mod"); - Value *OverflowCheck = B.CreateIsNull(TripCount, "lcmp.overflow"); - Value *BranchVal = B.CreateOr(OverflowCheck, LCmp, "lcmp.or"); + // If ModVal is zero, we know that either + // 1. there are no iteration to be run in the prologue loop + // OR + // 2. the addition computing TripCount overflowed + // + // If (2) is true, we know that TripCount really is (1 << BEWidth) and so the + // number of iterations that remain to be run in the original loop is a + // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we + // explicitly check this above). + + Value *BranchVal = B.CreateIsNotNull(ModVal, "lcmp.mod"); // Branch to either the extra iterations or the cloned/unrolled loop // We will fix up the true branch label when adding loop body copies @@ -361,10 +384,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, std::vector<BasicBlock *> NewBlocks; ValueToValueMapTy VMap; - // If unroll count is 2 and we can't overflow in tripcount computation (which - // is BECount + 1), then we don't need a loop for prologue, and we can unroll - // it. We can be sure that we don't overflow only if tripcount is a constant. - bool UnrollPrologue = (Count == 2 && isa<ConstantInt>(TripCount)); + bool UnrollPrologue = Count == 2; // Clone all the basic blocks in the loop. If Count is 2, we don't clone // the loop, otherwise we create a cloned loop to execute the extra @@ -390,8 +410,8 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, // Connect the prolog code to the original loop and update the // PHI functions. BasicBlock *LastLoopBB = cast<BasicBlock>(VMap[Latch]); - ConnectProlog(L, TripCount, Count, LastLoopBB, PEnd, PH, NewPH, VMap, - LPM->getAsPass()); + ConnectProlog(L, BECount, Count, LastLoopBB, PEnd, PH, NewPH, VMap, + /*AliasAnalysis*/ nullptr, DT, LI, LPM->getAsPass()); NumRuntimeUnrolled++; return true; } diff --git a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp deleted file mode 100644 index ff89e74..0000000 --- a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp +++ /dev/null @@ -1,188 +0,0 @@ -//===- LowerExpectIntrinsic.cpp - Lower expect intrinsic ------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass lowers the 'expect' intrinsic to LLVM metadata. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Scalar.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/MDBuilder.h" -#include "llvm/IR/Metadata.h" -#include "llvm/Pass.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include <vector> - -using namespace llvm; - -#define DEBUG_TYPE "lower-expect-intrinsic" - -STATISTIC(IfHandled, "Number of 'expect' intrinsic instructions handled"); - -static cl::opt<uint32_t> -LikelyBranchWeight("likely-branch-weight", cl::Hidden, cl::init(64), - cl::desc("Weight of the branch likely to be taken (default = 64)")); -static cl::opt<uint32_t> -UnlikelyBranchWeight("unlikely-branch-weight", cl::Hidden, cl::init(4), - cl::desc("Weight of the branch unlikely to be taken (default = 4)")); - -namespace { - - class LowerExpectIntrinsic : public FunctionPass { - - bool HandleSwitchExpect(SwitchInst *SI); - - bool HandleIfExpect(BranchInst *BI); - - public: - static char ID; - LowerExpectIntrinsic() : FunctionPass(ID) { - initializeLowerExpectIntrinsicPass(*PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F) override; - }; -} - - -bool LowerExpectIntrinsic::HandleSwitchExpect(SwitchInst *SI) { - CallInst *CI = dyn_cast<CallInst>(SI->getCondition()); - if (!CI) - return false; - - Function *Fn = CI->getCalledFunction(); - if (!Fn || Fn->getIntrinsicID() != Intrinsic::expect) - return false; - - Value *ArgValue = CI->getArgOperand(0); - ConstantInt *ExpectedValue = dyn_cast<ConstantInt>(CI->getArgOperand(1)); - if (!ExpectedValue) - return false; - - SwitchInst::CaseIt Case = SI->findCaseValue(ExpectedValue); - unsigned n = SI->getNumCases(); // +1 for default case. - std::vector<uint32_t> Weights(n + 1); - - Weights[0] = Case == SI->case_default() ? LikelyBranchWeight - : UnlikelyBranchWeight; - for (unsigned i = 0; i != n; ++i) - Weights[i + 1] = i == Case.getCaseIndex() ? LikelyBranchWeight - : UnlikelyBranchWeight; - - SI->setMetadata(LLVMContext::MD_prof, - MDBuilder(CI->getContext()).createBranchWeights(Weights)); - - SI->setCondition(ArgValue); - return true; -} - - -bool LowerExpectIntrinsic::HandleIfExpect(BranchInst *BI) { - if (BI->isUnconditional()) - return false; - - // Handle non-optimized IR code like: - // %expval = call i64 @llvm.expect.i64(i64 %conv1, i64 1) - // %tobool = icmp ne i64 %expval, 0 - // br i1 %tobool, label %if.then, label %if.end - // - // Or the following simpler case: - // %expval = call i1 @llvm.expect.i1(i1 %cmp, i1 1) - // br i1 %expval, label %if.then, label %if.end - - CallInst *CI; - - ICmpInst *CmpI = dyn_cast<ICmpInst>(BI->getCondition()); - if (!CmpI) { - CI = dyn_cast<CallInst>(BI->getCondition()); - } else { - if (CmpI->getPredicate() != CmpInst::ICMP_NE) - return false; - CI = dyn_cast<CallInst>(CmpI->getOperand(0)); - } - - if (!CI) - return false; - - Function *Fn = CI->getCalledFunction(); - if (!Fn || Fn->getIntrinsicID() != Intrinsic::expect) - return false; - - Value *ArgValue = CI->getArgOperand(0); - ConstantInt *ExpectedValue = dyn_cast<ConstantInt>(CI->getArgOperand(1)); - if (!ExpectedValue) - return false; - - MDBuilder MDB(CI->getContext()); - MDNode *Node; - - // If expect value is equal to 1 it means that we are more likely to take - // branch 0, in other case more likely is branch 1. - if (ExpectedValue->isOne()) - Node = MDB.createBranchWeights(LikelyBranchWeight, UnlikelyBranchWeight); - else - Node = MDB.createBranchWeights(UnlikelyBranchWeight, LikelyBranchWeight); - - BI->setMetadata(LLVMContext::MD_prof, Node); - - if (CmpI) - CmpI->setOperand(0, ArgValue); - else - BI->setCondition(ArgValue); - return true; -} - - -bool LowerExpectIntrinsic::runOnFunction(Function &F) { - for (Function::iterator I = F.begin(), E = F.end(); I != E;) { - BasicBlock *BB = I++; - - // Create "block_weights" metadata. - if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) { - if (HandleIfExpect(BI)) - IfHandled++; - } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) { - if (HandleSwitchExpect(SI)) - IfHandled++; - } - - // remove llvm.expect intrinsics. - for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); - BI != BE; ) { - CallInst *CI = dyn_cast<CallInst>(BI++); - if (!CI) - continue; - - Function *Fn = CI->getCalledFunction(); - if (Fn && Fn->getIntrinsicID() == Intrinsic::expect) { - Value *Exp = CI->getArgOperand(0); - CI->replaceAllUsesWith(Exp); - CI->eraseFromParent(); - } - } - } - - return false; -} - - -char LowerExpectIntrinsic::ID = 0; -INITIALIZE_PASS(LowerExpectIntrinsic, "lower-expect", "Lower 'expect' " - "Intrinsics", false, false) - -FunctionPass *llvm::createLowerExpectIntrinsicPass() { - return new LowerExpectIntrinsic(); -} diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp index a0105c2..b3bdae4 100644 --- a/lib/Transforms/Utils/LowerSwitch.cpp +++ b/lib/Transforms/Utils/LowerSwitch.cpp @@ -32,6 +32,23 @@ using namespace llvm; #define DEBUG_TYPE "lower-switch" namespace { + struct IntRange { + int64_t Low, High; + }; + // Return true iff R is covered by Ranges. + static bool IsInRanges(const IntRange &R, + const std::vector<IntRange> &Ranges) { + // Note: Ranges must be sorted, non-overlapping and non-adjacent. + + // Find the first range whose High field is >= R.High, + // then check if the Low field is <= R.Low. If so, we + // have a Range that covers R. + auto I = std::lower_bound( + Ranges.begin(), Ranges.end(), R, + [](const IntRange &A, const IntRange &B) { return A.High < B.High; }); + return I != Ranges.end() && I->Low <= R.Low; + } + /// LowerSwitch Pass - Replace all SwitchInst instructions with chained branch /// instructions. class LowerSwitch : public FunctionPass { @@ -46,18 +63,16 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { // This is a cluster of orthogonal Transforms AU.addPreserved<UnifyFunctionExitNodes>(); - AU.addPreserved("mem2reg"); AU.addPreservedID(LowerInvokePassID); } struct CaseRange { - Constant* Low; - Constant* High; + ConstantInt* Low; + ConstantInt* High; BasicBlock* BB; - CaseRange(Constant *low = nullptr, Constant *high = nullptr, - BasicBlock *bb = nullptr) : - Low(low), High(high), BB(bb) { } + CaseRange(ConstantInt *low, ConstantInt *high, BasicBlock *bb) + : Low(low), High(high), BB(bb) {} }; typedef std::vector<CaseRange> CaseVector; @@ -68,7 +83,8 @@ namespace { BasicBlock *switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound, ConstantInt *UpperBound, Value *Val, BasicBlock *Predecessor, - BasicBlock *OrigBlock, BasicBlock *Default); + BasicBlock *OrigBlock, BasicBlock *Default, + const std::vector<IntRange> &UnreachableRanges); BasicBlock *newLeafBlock(CaseRange &Leaf, Value *Val, BasicBlock *OrigBlock, BasicBlock *Default); unsigned Clusterify(CaseVector &Cases, SwitchInst *SI); @@ -131,25 +147,39 @@ static raw_ostream& operator<<(raw_ostream &O, return O << "]"; } -/// \brief Update the first occurrence of the "switch statement" BB in the PHI -/// node with the "new" BB. The other occurrences will be updated by subsequent -/// calls to this function. -/// -/// Switch statements may have more than one incoming edge into the same BB if -/// they all have the same value. When the switch statement is converted these -/// incoming edges are now coming from multiple BBs. -static void fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB) { - for (BasicBlock::iterator I = SuccBB->begin(), E = SuccBB->getFirstNonPHI(); - I != E; ++I) { +// \brief Update the first occurrence of the "switch statement" BB in the PHI +// node with the "new" BB. The other occurrences will: +// +// 1) Be updated by subsequent calls to this function. Switch statements may +// have more than one outcoming edge into the same BB if they all have the same +// value. When the switch statement is converted these incoming edges are now +// coming from multiple BBs. +// 2) Removed if subsequent incoming values now share the same case, i.e., +// multiple outcome edges are condensed into one. This is necessary to keep the +// number of phi values equal to the number of branches to SuccBB. +static void fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB, + unsigned NumMergedCases) { + for (BasicBlock::iterator I = SuccBB->begin(), IE = SuccBB->getFirstNonPHI(); + I != IE; ++I) { PHINode *PN = cast<PHINode>(I); // Only update the first occurence. - for (unsigned Idx = 0, E = PN->getNumIncomingValues(); Idx != E; ++Idx) { + unsigned Idx = 0, E = PN->getNumIncomingValues(); + unsigned LocalNumMergedCases = NumMergedCases; + for (; Idx != E; ++Idx) { if (PN->getIncomingBlock(Idx) == OrigBB) { PN->setIncomingBlock(Idx, NewBB); break; } } + + // Remove additional occurences coming from condensed cases and keep the + // number of incoming values equal to the number of branches to SuccBB. + for (++Idx; LocalNumMergedCases > 0 && Idx < E; ++Idx) + if (PN->getIncomingBlock(Idx) == OrigBB) { + PN->removeIncomingValue(Idx); + LocalNumMergedCases--; + } } } @@ -158,12 +188,12 @@ static void fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB) { // LowerBound and UpperBound are used to keep track of the bounds for Val // that have already been checked by a block emitted by one of the previous // calls to switchConvert in the call stack. -BasicBlock *LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, - ConstantInt *LowerBound, - ConstantInt *UpperBound, Value *Val, - BasicBlock *Predecessor, - BasicBlock *OrigBlock, - BasicBlock *Default) { +BasicBlock * +LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound, + ConstantInt *UpperBound, Value *Val, + BasicBlock *Predecessor, BasicBlock *OrigBlock, + BasicBlock *Default, + const std::vector<IntRange> &UnreachableRanges) { unsigned Size = End - Begin; if (Size == 1) { @@ -172,7 +202,11 @@ BasicBlock *LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, // emitting the code that checks if the value actually falls in the range // because the bounds already tell us so. if (Begin->Low == LowerBound && Begin->High == UpperBound) { - fixPhis(Begin->BB, OrigBlock, Predecessor); + unsigned NumMergedCases = 0; + if (LowerBound && UpperBound) + NumMergedCases = + UpperBound->getSExtValue() - LowerBound->getSExtValue(); + fixPhis(Begin->BB, OrigBlock, Predecessor, NumMergedCases); return Begin->BB; } return newLeafBlock(*Begin, Val, OrigBlock, Default); @@ -186,32 +220,32 @@ BasicBlock *LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, CaseRange &Pivot = *(Begin + Mid); DEBUG(dbgs() << "Pivot ==> " - << cast<ConstantInt>(Pivot.Low)->getValue() - << " -" << cast<ConstantInt>(Pivot.High)->getValue() << "\n"); + << Pivot.Low->getValue() + << " -" << Pivot.High->getValue() << "\n"); // NewLowerBound here should never be the integer minimal value. // This is because it is computed from a case range that is never // the smallest, so there is always a case range that has at least // a smaller value. - ConstantInt *NewLowerBound = cast<ConstantInt>(Pivot.Low); - ConstantInt *NewUpperBound; - - // If we don't have a Default block then it means that we can never - // have a value outside of a case range, so set the UpperBound to the highest - // value in the LHS part of the case ranges. - if (Default != nullptr) { - // Because NewLowerBound is never the smallest representable integer - // it is safe here to subtract one. - NewUpperBound = ConstantInt::get(NewLowerBound->getContext(), - NewLowerBound->getValue() - 1); - } else { - CaseItr LastLHS = LHS.begin() + LHS.size() - 1; - NewUpperBound = cast<ConstantInt>(LastLHS->High); + ConstantInt *NewLowerBound = Pivot.Low; + + // Because NewLowerBound is never the smallest representable integer + // it is safe here to subtract one. + ConstantInt *NewUpperBound = ConstantInt::get(NewLowerBound->getContext(), + NewLowerBound->getValue() - 1); + + if (!UnreachableRanges.empty()) { + // Check if the gap between LHS's highest and NewLowerBound is unreachable. + int64_t GapLow = LHS.back().High->getSExtValue() + 1; + int64_t GapHigh = NewLowerBound->getSExtValue() - 1; + IntRange Gap = { GapLow, GapHigh }; + if (GapHigh >= GapLow && IsInRanges(Gap, UnreachableRanges)) + NewUpperBound = LHS.back().High; } DEBUG(dbgs() << "LHS Bounds ==> "; if (LowerBound) { - dbgs() << cast<ConstantInt>(LowerBound)->getSExtValue(); + dbgs() << LowerBound->getSExtValue(); } else { dbgs() << "NONE"; } @@ -219,7 +253,7 @@ BasicBlock *LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, dbgs() << "RHS Bounds ==> "; dbgs() << NewLowerBound->getSExtValue() << " - "; if (UpperBound) { - dbgs() << cast<ConstantInt>(UpperBound)->getSExtValue() << "\n"; + dbgs() << UpperBound->getSExtValue() << "\n"; } else { dbgs() << "NONE\n"; }); @@ -234,10 +268,10 @@ BasicBlock *LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, BasicBlock *LBranch = switchConvert(LHS.begin(), LHS.end(), LowerBound, NewUpperBound, Val, NewNode, OrigBlock, - Default); + Default, UnreachableRanges); BasicBlock *RBranch = switchConvert(RHS.begin(), RHS.end(), NewLowerBound, UpperBound, Val, NewNode, OrigBlock, - Default); + Default, UnreachableRanges); Function::iterator FI = OrigBlock; F->getBasicBlockList().insert(++FI, NewNode); @@ -270,11 +304,11 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val, Leaf.Low, "SwitchLeaf"); } else { // Make range comparison - if (cast<ConstantInt>(Leaf.Low)->isMinValue(true /*isSigned*/)) { + if (Leaf.Low->isMinValue(true /*isSigned*/)) { // Val >= Min && Val <= Hi --> Val <= Hi Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SLE, Val, Leaf.High, "SwitchLeaf"); - } else if (cast<ConstantInt>(Leaf.Low)->isZero()) { + } else if (Leaf.Low->isZero()) { // Val >= 0 && Val <= Hi --> Val <=u Hi Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High, "SwitchLeaf"); @@ -299,8 +333,8 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val, for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) { PHINode* PN = cast<PHINode>(I); // Remove all but one incoming entries from the cluster - uint64_t Range = cast<ConstantInt>(Leaf.High)->getSExtValue() - - cast<ConstantInt>(Leaf.Low)->getSExtValue(); + uint64_t Range = Leaf.High->getSExtValue() - + Leaf.Low->getSExtValue(); for (uint64_t j = 0; j < Range; ++j) { PN->removeIncomingValue(OrigBlock); } @@ -328,8 +362,8 @@ unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) { if (Cases.size()>=2) for (CaseItr I = Cases.begin(), J = std::next(Cases.begin()); J != Cases.end();) { - int64_t nextValue = cast<ConstantInt>(J->Low)->getSExtValue(); - int64_t currentValue = cast<ConstantInt>(I->High)->getSExtValue(); + int64_t nextValue = J->Low->getSExtValue(); + int64_t currentValue = I->High->getSExtValue(); BasicBlock* nextBB = J->BB; BasicBlock* currentBB = I->BB; @@ -362,26 +396,102 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) { Value *Val = SI->getCondition(); // The value we are switching on... BasicBlock* Default = SI->getDefaultDest(); - // If there is only the default destination, don't bother with the code below. + // If there is only the default destination, just branch. if (!SI->getNumCases()) { - BranchInst::Create(SI->getDefaultDest(), CurBlock); - CurBlock->getInstList().erase(SI); + BranchInst::Create(Default, CurBlock); + SI->eraseFromParent(); return; } - const bool DefaultIsUnreachable = - Default->size() == 1 && isa<UnreachableInst>(Default->getTerminator()); + // Prepare cases vector. + CaseVector Cases; + unsigned numCmps = Clusterify(Cases, SI); + DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size() + << ". Total compares: " << numCmps << "\n"); + DEBUG(dbgs() << "Cases: " << Cases << "\n"); + (void)numCmps; + + ConstantInt *LowerBound = nullptr; + ConstantInt *UpperBound = nullptr; + std::vector<IntRange> UnreachableRanges; + + if (isa<UnreachableInst>(Default->getFirstNonPHIOrDbg())) { + // Make the bounds tightly fitted around the case value range, becase we + // know that the value passed to the switch must be exactly one of the case + // values. + assert(!Cases.empty()); + LowerBound = Cases.front().Low; + UpperBound = Cases.back().High; + + DenseMap<BasicBlock *, unsigned> Popularity; + unsigned MaxPop = 0; + BasicBlock *PopSucc = nullptr; + + IntRange R = { INT64_MIN, INT64_MAX }; + UnreachableRanges.push_back(R); + for (const auto &I : Cases) { + int64_t Low = I.Low->getSExtValue(); + int64_t High = I.High->getSExtValue(); + + IntRange &LastRange = UnreachableRanges.back(); + if (LastRange.Low == Low) { + // There is nothing left of the previous range. + UnreachableRanges.pop_back(); + } else { + // Terminate the previous range. + assert(Low > LastRange.Low); + LastRange.High = Low - 1; + } + if (High != INT64_MAX) { + IntRange R = { High + 1, INT64_MAX }; + UnreachableRanges.push_back(R); + } + + // Count popularity. + int64_t N = High - Low + 1; + unsigned &Pop = Popularity[I.BB]; + if ((Pop += N) > MaxPop) { + MaxPop = Pop; + PopSucc = I.BB; + } + } +#ifndef NDEBUG + /* UnreachableRanges should be sorted and the ranges non-adjacent. */ + for (auto I = UnreachableRanges.begin(), E = UnreachableRanges.end(); + I != E; ++I) { + assert(I->Low <= I->High); + auto Next = I + 1; + if (Next != E) { + assert(Next->Low > I->High); + } + } +#endif + + // Use the most popular block as the new default, reducing the number of + // cases. + assert(MaxPop > 0 && PopSucc); + Default = PopSucc; + for (CaseItr I = Cases.begin(); I != Cases.end();) { + if (I->BB == PopSucc) + I = Cases.erase(I); + else + ++I; + } + + // If there are no cases left, just branch. + if (Cases.empty()) { + BranchInst::Create(Default, CurBlock); + SI->eraseFromParent(); + return; + } + } + // Create a new, empty default block so that the new hierarchy of // if-then statements go to this and the PHI nodes are happy. - // if the default block is set as an unreachable we avoid creating one - // because will never be a valid target. - BasicBlock *NewDefault = nullptr; - if (!DefaultIsUnreachable) { - NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault"); - F->getBasicBlockList().insert(Default, NewDefault); - - BranchInst::Create(Default, NewDefault); - } + BasicBlock *NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault"); + F->getBasicBlockList().insert(Default, NewDefault); + BranchInst::Create(Default, NewDefault); + // If there is an entry in any PHI nodes for the default edge, make sure // to update them as well. for (BasicBlock::iterator I = Default->begin(); isa<PHINode>(I); ++I) { @@ -391,40 +501,18 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) { PN->setIncomingBlock((unsigned)BlockIdx, NewDefault); } - // Prepare cases vector. - CaseVector Cases; - unsigned numCmps = Clusterify(Cases, SI); - - DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size() - << ". Total compares: " << numCmps << "\n"); - DEBUG(dbgs() << "Cases: " << Cases << "\n"); - (void)numCmps; - - ConstantInt *UpperBound = nullptr; - ConstantInt *LowerBound = nullptr; - - // Optimize the condition where Default is an unreachable block. In this case - // we can make the bounds tightly fitted around the case value ranges, - // because we know that the value passed to the switch should always be - // exactly one of the case values. - if (DefaultIsUnreachable) { - CaseItr LastCase = Cases.begin() + Cases.size() - 1; - UpperBound = cast<ConstantInt>(LastCase->High); - LowerBound = cast<ConstantInt>(Cases.begin()->Low); - } BasicBlock *SwitchBlock = switchConvert(Cases.begin(), Cases.end(), LowerBound, UpperBound, Val, - OrigBlock, OrigBlock, NewDefault); + OrigBlock, OrigBlock, NewDefault, UnreachableRanges); // Branch to our shiny new if-then stuff... BranchInst::Create(SwitchBlock, OrigBlock); // We are now done with the switch instruction, delete it. + BasicBlock *OldDefault = SI->getDefaultDest(); CurBlock->getInstList().erase(SI); - pred_iterator PI = pred_begin(Default), E = pred_end(Default); - // If the Default block has no more predecessors just remove it - if (PI == E) { - DeleteDeadBlock(Default); - } + // If the Default block has no more predecessors just remove it. + if (pred_begin(OldDefault) == pred_end(OldDefault)) + DeleteDeadBlock(OldDefault); } diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp index 477ee7a..00cf4e6 100644 --- a/lib/Transforms/Utils/Mem2Reg.cpp +++ b/lib/Transforms/Utils/Mem2Reg.cpp @@ -14,7 +14,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AssumptionTracker.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" @@ -39,7 +39,7 @@ namespace { bool runOnFunction(Function &F) override; void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<AssumptionTracker>(); + AU.addRequired<AssumptionCacheTracker>(); AU.addRequired<DominatorTreeWrapperPass>(); AU.setPreservesCFG(); // This is a cluster of orthogonal Transforms @@ -53,7 +53,7 @@ namespace { char PromotePass::ID = 0; INITIALIZE_PASS_BEGIN(PromotePass, "mem2reg", "Promote Memory to Register", false, false) -INITIALIZE_PASS_DEPENDENCY(AssumptionTracker) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_END(PromotePass, "mem2reg", "Promote Memory to Register", false, false) @@ -66,7 +66,8 @@ bool PromotePass::runOnFunction(Function &F) { bool Changed = false; DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - AssumptionTracker *AT = &getAnalysis<AssumptionTracker>(); + AssumptionCache &AC = + getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); while (1) { Allocas.clear(); @@ -80,7 +81,7 @@ bool PromotePass::runOnFunction(Function &F) { if (Allocas.empty()) break; - PromoteMemToReg(Allocas, DT, nullptr, AT); + PromoteMemToReg(Allocas, DT, nullptr, &AC); NumPromoted += Allocas.size(); Changed = true; } diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index 1fd7071..dabadb7 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -239,7 +239,7 @@ struct PromoteMem2Reg { AliasSetTracker *AST; /// A cache of @llvm.assume intrinsics used by SimplifyInstruction. - AssumptionTracker *AT; + AssumptionCache *AC; /// Reverse mapping of Allocas. DenseMap<AllocaInst *, unsigned> AllocaLookup; @@ -282,9 +282,10 @@ struct PromoteMem2Reg { public: PromoteMem2Reg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT, - AliasSetTracker *AST, AssumptionTracker *AT) + AliasSetTracker *AST, AssumptionCache *AC) : Allocas(Allocas.begin(), Allocas.end()), DT(DT), - DIB(*DT.getRoot()->getParent()->getParent()), AST(AST), AT(AT) {} + DIB(*DT.getRoot()->getParent()->getParent(), /*AllowUnresolved*/ false), + AST(AST), AC(AC) {} void run(); @@ -415,7 +416,8 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, // Record debuginfo for the store and remove the declaration's // debuginfo. if (DbgDeclareInst *DDI = Info.DbgDeclare) { - DIBuilder DIB(*AI->getParent()->getParent()->getParent()); + DIBuilder DIB(*AI->getParent()->getParent()->getParent(), + /*AllowUnresolved*/ false); ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, DIB); DDI->eraseFromParent(); LBI.deleteValue(DDI); @@ -498,7 +500,8 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, StoreInst *SI = cast<StoreInst>(AI->user_back()); // Record debuginfo for the store before removing it. if (DbgDeclareInst *DDI = Info.DbgDeclare) { - DIBuilder DIB(*AI->getParent()->getParent()->getParent()); + DIBuilder DIB(*AI->getParent()->getParent()->getParent(), + /*AllowUnresolved*/ false); ConvertDebugDeclareToDebugValue(DDI, SI, DIB); } SI->eraseFromParent(); @@ -688,7 +691,7 @@ void PromoteMem2Reg::run() { PHINode *PN = I->second; // If this PHI node merges one value and/or undefs, get the value. - if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, &DT, AT)) { + if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, &DT, AC)) { if (AST && PN->getType()->isPointerTy()) AST->deleteValue(PN); PN->replaceAllUsesWith(V); @@ -1068,10 +1071,10 @@ NextIteration: } void llvm::PromoteMemToReg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT, - AliasSetTracker *AST, AssumptionTracker *AT) { + AliasSetTracker *AST, AssumptionCache *AC) { // If there is nothing to do, bail out... if (Allocas.empty()) return; - PromoteMem2Reg(Allocas, DT, AST, AT).run(); + PromoteMem2Reg(Allocas, DT, AST, AC).run(); } diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index 3fcb789..c057b06 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -150,8 +150,8 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { ProtoName, &BB->front()); // Fill in all the predecessors of the PHI. - for (unsigned i = 0, e = PredValues.size(); i != e; ++i) - InsertedPHI->addIncoming(PredValues[i].second, PredValues[i].first); + for (const auto &PredValue : PredValues) + InsertedPHI->addIncoming(PredValue.second, PredValue.first); // See if the PHI node can be merged to a single value. This can happen in // loop cases when we get a PHI of itself and one other value. @@ -245,8 +245,7 @@ public: // but it is relatively slow. If we already have PHI nodes in this // block, walk one of them to get the predecessor list instead. if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) { - for (unsigned PI = 0, E = SomePhi->getNumIncomingValues(); PI != E; ++PI) - Preds->push_back(SomePhi->getIncomingBlock(PI)); + Preds->append(SomePhi->block_begin(), SomePhi->block_end()); } else { for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) Preds->push_back(*PI); @@ -344,20 +343,17 @@ run(const SmallVectorImpl<Instruction*> &Insts) const { // This is important because we have to handle multiple defs/uses in a block // ourselves: SSAUpdater is purely for cross-block references. DenseMap<BasicBlock*, TinyPtrVector<Instruction*> > UsesByBlock; - - for (unsigned i = 0, e = Insts.size(); i != e; ++i) { - Instruction *User = Insts[i]; + + for (Instruction *User : Insts) UsesByBlock[User->getParent()].push_back(User); - } // Okay, now we can iterate over all the blocks in the function with uses, // processing them. Keep track of which loads are loading a live-in value. // Walk the uses in the use-list order to be determinstic. SmallVector<LoadInst*, 32> LiveInLoads; DenseMap<Value*, Value*> ReplacedLoads; - - for (unsigned i = 0, e = Insts.size(); i != e; ++i) { - Instruction *User = Insts[i]; + + for (Instruction *User : Insts) { BasicBlock *BB = User->getParent(); TinyPtrVector<Instruction*> &BlockUses = UsesByBlock[BB]; @@ -380,8 +376,8 @@ run(const SmallVectorImpl<Instruction*> &Insts) const { // Otherwise, check to see if this block is all loads. bool HasStore = false; - for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) { - if (isa<StoreInst>(BlockUses[i])) { + for (Instruction *I : BlockUses) { + if (isa<StoreInst>(I)) { HasStore = true; break; } @@ -391,8 +387,8 @@ run(const SmallVectorImpl<Instruction*> &Insts) const { // efficient way to tell which on is first in the block and don't want to // scan large blocks, so just add all loads as live ins. if (!HasStore) { - for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) - LiveInLoads.push_back(cast<LoadInst>(BlockUses[i])); + for (Instruction *I : BlockUses) + LiveInLoads.push_back(cast<LoadInst>(I)); BlockUses.clear(); continue; } @@ -403,8 +399,8 @@ run(const SmallVectorImpl<Instruction*> &Insts) const { // block is a load, then it uses the live in value. The last store defines // the live out value. We handle this by doing a linear scan of the block. Value *StoredValue = nullptr; - for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) { - if (LoadInst *L = dyn_cast<LoadInst>(II)) { + for (Instruction &I : *BB) { + if (LoadInst *L = dyn_cast<LoadInst>(&I)) { // If this is a load from an unrelated pointer, ignore it. if (!isInstInList(L, Insts)) continue; @@ -419,8 +415,8 @@ run(const SmallVectorImpl<Instruction*> &Insts) const { } continue; } - - if (StoreInst *SI = dyn_cast<StoreInst>(II)) { + + if (StoreInst *SI = dyn_cast<StoreInst>(&I)) { // If this is a store to an unrelated pointer, ignore it. if (!isInstInList(SI, Insts)) continue; updateDebugInfo(SI); @@ -438,8 +434,7 @@ run(const SmallVectorImpl<Instruction*> &Insts) const { // Okay, now we rewrite all loads that use live-in values in the loop, // inserting PHI nodes as necessary. - for (unsigned i = 0, e = LiveInLoads.size(); i != e; ++i) { - LoadInst *ALoad = LiveInLoads[i]; + for (LoadInst *ALoad : LiveInLoads) { Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent()); replaceLoadWithValue(ALoad, NewVal); @@ -454,9 +449,7 @@ run(const SmallVectorImpl<Instruction*> &Insts) const { // Now that everything is rewritten, delete the old instructions from the // function. They should all be dead now. - for (unsigned i = 0, e = Insts.size(); i != e; ++i) { - Instruction *User = Insts[i]; - + for (Instruction *User : Insts) { // If this is a load that still has uses, then the load must have been added // as a live value in the SSAUpdate data structure for a block (e.g. because // the loaded value was stored later). In this case, we need to recursively diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 92fd56a..3248a83 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -53,9 +53,13 @@ using namespace PatternMatch; #define DEBUG_TYPE "simplifycfg" +// Chosen as 2 so as to be cheap, but still to have enough power to fold +// a select, so the "clamp" idiom (of a min followed by a max) will be caught. +// To catch this, we need to fold a compare and a select, hence '2' being the +// minimum reasonable default. static cl::opt<unsigned> -PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(1), - cl::desc("Control the amount of phi node folding to perform (default = 1)")); +PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(2), + cl::desc("Control the amount of phi node folding to perform (default = 2)")); static cl::opt<bool> DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false), @@ -73,6 +77,7 @@ STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); STATISTIC(NumLinearMaps, "Number of switch instructions turned into linear mapping"); STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables"); STATISTIC(NumLookupTablesHoles, "Number of switch instructions turned into lookup tables (holes checked)"); +STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares"); STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block"); STATISTIC(NumSpeculations, "Number of speculative executed instructions"); @@ -107,7 +112,7 @@ class SimplifyCFGOpt { const TargetTransformInfo &TTI; unsigned BonusInstThreshold; const DataLayout *const DL; - AssumptionTracker *AT; + AssumptionCache *AC; Value *isValueEqualityComparison(TerminatorInst *TI); BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI, std::vector<ValueEqualityComparisonCase> &Cases); @@ -127,8 +132,8 @@ class SimplifyCFGOpt { public: SimplifyCFGOpt(const TargetTransformInfo &TTI, unsigned BonusInstThreshold, - const DataLayout *DL, AssumptionTracker *AT) - : TTI(TTI), BonusInstThreshold(BonusInstThreshold), DL(DL), AT(AT) {} + const DataLayout *DL, AssumptionCache *AC) + : TTI(TTI), BonusInstThreshold(BonusInstThreshold), DL(DL), AC(AC) {} bool run(BasicBlock *BB); }; } @@ -215,45 +220,15 @@ static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, } /// ComputeSpeculationCost - Compute an abstract "cost" of speculating the -/// given instruction, which is assumed to be safe to speculate. 1 means -/// cheap, 2 means less cheap, and UINT_MAX means prohibitively expensive. -static unsigned ComputeSpeculationCost(const User *I, const DataLayout *DL) { +/// given instruction, which is assumed to be safe to speculate. TCC_Free means +/// cheap, TCC_Basic means less cheap, and TCC_Expensive means prohibitively +/// expensive. +static unsigned ComputeSpeculationCost(const User *I, const DataLayout *DL, + const TargetTransformInfo &TTI) { assert(isSafeToSpeculativelyExecute(I, DL) && "Instruction is not safe to speculatively execute!"); - switch (Operator::getOpcode(I)) { - default: - // In doubt, be conservative. - return UINT_MAX; - case Instruction::GetElementPtr: - // GEPs are cheap if all indices are constant. - if (!cast<GEPOperator>(I)->hasAllConstantIndices()) - return UINT_MAX; - return 1; - case Instruction::ExtractValue: - case Instruction::Load: - case Instruction::Add: - case Instruction::Sub: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: - case Instruction::ICmp: - case Instruction::Trunc: - case Instruction::ZExt: - case Instruction::SExt: - case Instruction::BitCast: - case Instruction::ExtractElement: - case Instruction::InsertElement: - return 1; // These are all cheap. - - case Instruction::Call: - case Instruction::Select: - return 2; - } + return TTI.getUserCost(I); } - /// DominatesMergePoint - If we have a merge point of an "if condition" as /// accepted above, return true if the specified value dominates the block. We /// don't handle the true generality of domination here, just a special case @@ -274,7 +249,8 @@ static unsigned ComputeSpeculationCost(const User *I, const DataLayout *DL) { static bool DominatesMergePoint(Value *V, BasicBlock *BB, SmallPtrSetImpl<Instruction*> *AggressiveInsts, unsigned &CostRemaining, - const DataLayout *DL) { + const DataLayout *DL, + const TargetTransformInfo &TTI) { Instruction *I = dyn_cast<Instruction>(V); if (!I) { // Non-instructions all dominate instructions, but not all constantexprs @@ -310,7 +286,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, if (!isSafeToSpeculativelyExecute(I, DL)) return false; - unsigned Cost = ComputeSpeculationCost(I, DL); + unsigned Cost = ComputeSpeculationCost(I, DL, TTI); if (Cost > CostRemaining) return false; @@ -320,7 +296,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, // Okay, we can only really hoist these out if their operands do // not take us over the cost threshold. for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) - if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, DL)) + if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, DL, TTI)) return false; // Okay, it's safe to do this! Remember this instruction. AggressiveInsts->insert(I); @@ -383,10 +359,9 @@ struct ConstantComparesGatherer { } /// Prevent copy - ConstantComparesGatherer(const ConstantComparesGatherer &) - LLVM_DELETED_FUNCTION; + ConstantComparesGatherer(const ConstantComparesGatherer &) = delete; ConstantComparesGatherer & - operator=(const ConstantComparesGatherer &) LLVM_DELETED_FUNCTION; + operator=(const ConstantComparesGatherer &) = delete; private: @@ -712,8 +687,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, if (HasWeight) for (unsigned MD_i = 1, MD_e = MD->getNumOperands(); MD_i < MD_e; ++MD_i) { - ConstantInt* CI = dyn_cast<ConstantInt>(MD->getOperand(MD_i)); - assert(CI); + ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(MD_i)); Weights.push_back(CI->getValue().getZExtValue()); } for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) { @@ -818,7 +792,7 @@ static void GetBranchWeights(TerminatorInst *TI, MDNode *MD = TI->getMetadata(LLVMContext::MD_prof); assert(MD); for (unsigned i = 1, e = MD->getNumOperands(); i < e; ++i) { - ConstantInt *CI = cast<ConstantInt>(MD->getOperand(i)); + ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(i)); Weights.push_back(CI->getValue().getZExtValue()); } @@ -1079,7 +1053,8 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I); /// HoistThenElseCodeToIf - Given a conditional branch that goes to BB1 and /// BB2, hoist any common code in the two blocks up into the branch block. The /// caller of this function guarantees that BI's block dominates BB1 and BB2. -static bool HoistThenElseCodeToIf(BranchInst *BI, const DataLayout *DL) { +static bool HoistThenElseCodeToIf(BranchInst *BI, const DataLayout *DL, + const TargetTransformInfo &TTI) { // This does very trivial matching, with limited scanning, to find identical // instructions in the two blocks. In particular, we don't want to get into // O(M*N) situations here where M and N are the sizes of BB1 and BB2. As @@ -1114,6 +1089,9 @@ static bool HoistThenElseCodeToIf(BranchInst *BI, const DataLayout *DL) { if (isa<TerminatorInst>(I1)) goto HoistTerminator; + if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2)) + return Changed; + // For a normal instruction, we just move one to right before the branch, // then replace all uses of the other with the first. Finally, we remove // the now redundant second instruction. @@ -1244,14 +1222,13 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { return false; // Gather the PHI nodes in BBEnd. - std::map<Value*, std::pair<Value*, PHINode*> > MapValueFromBB1ToBB2; + SmallDenseMap<std::pair<Value *, Value *>, PHINode *> JointValueMap; Instruction *FirstNonPhiInBBEnd = nullptr; - for (BasicBlock::iterator I = BBEnd->begin(), E = BBEnd->end(); - I != E; ++I) { + for (BasicBlock::iterator I = BBEnd->begin(), E = BBEnd->end(); I != E; ++I) { if (PHINode *PN = dyn_cast<PHINode>(I)) { Value *BB1V = PN->getIncomingValueForBlock(BB1); Value *BB2V = PN->getIncomingValueForBlock(BB2); - MapValueFromBB1ToBB2[BB1V] = std::make_pair(BB2V, PN); + JointValueMap[std::make_pair(BB1V, BB2V)] = PN; } else { FirstNonPhiInBBEnd = &*I; break; @@ -1260,13 +1237,13 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { if (!FirstNonPhiInBBEnd) return false; - // This does very trivial matching, with limited scanning, to find identical // instructions in the two blocks. We scan backward for obviously identical // instructions in an identical order. BasicBlock::InstListType::reverse_iterator RI1 = BB1->getInstList().rbegin(), - RE1 = BB1->getInstList().rend(), RI2 = BB2->getInstList().rbegin(), - RE2 = BB2->getInstList().rend(); + RE1 = BB1->getInstList().rend(), + RI2 = BB2->getInstList().rbegin(), + RE2 = BB2->getInstList().rend(); // Skip debug info. while (RI1 != RE1 && isa<DbgInfoIntrinsic>(&*RI1)) ++RI1; if (RI1 == RE1) @@ -1289,6 +1266,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { return Changed; Instruction *I1 = &*RI1, *I2 = &*RI2; + auto InstPair = std::make_pair(I1, I2); // I1 and I2 should have a single use in the same PHI node, and they // perform the same operation. // Cannot move control-flow-involving, volatile loads, vaarg, etc. @@ -1299,11 +1277,11 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { I1->mayHaveSideEffects() || I2->mayHaveSideEffects() || I1->mayReadOrWriteMemory() || I2->mayReadOrWriteMemory() || !I1->hasOneUse() || !I2->hasOneUse() || - MapValueFromBB1ToBB2.find(I1) == MapValueFromBB1ToBB2.end() || - MapValueFromBB1ToBB2[I1].first != I2) + !JointValueMap.count(InstPair)) return Changed; // Check whether we should swap the operands of ICmpInst. + // TODO: Add support of communativity. ICmpInst *ICmp1 = dyn_cast<ICmpInst>(I1), *ICmp2 = dyn_cast<ICmpInst>(I2); bool SwapOpnds = false; if (ICmp1 && ICmp2 && @@ -1324,16 +1302,13 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { // with a PHI node after sinking. We only handle the case where there is // a single pair of different operands. Value *DifferentOp1 = nullptr, *DifferentOp2 = nullptr; - unsigned Op1Idx = 0; + unsigned Op1Idx = ~0U; for (unsigned I = 0, E = I1->getNumOperands(); I != E; ++I) { if (I1->getOperand(I) == I2->getOperand(I)) continue; - // Early exit if we have more-than one pair of different operands or - // the different operand is already in MapValueFromBB1ToBB2. - // Early exit if we need a PHI node to replace a constant. - if (DifferentOp1 || - MapValueFromBB1ToBB2.find(I1->getOperand(I)) != - MapValueFromBB1ToBB2.end() || + // Early exit if we have more-than one pair of different operands or if + // we need a PHI node to replace a constant. + if (Op1Idx != ~0U || isa<Constant>(I1->getOperand(I)) || isa<Constant>(I2->getOperand(I))) { // If we can't sink the instructions, undo the swapping. @@ -1346,24 +1321,27 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { DifferentOp2 = I2->getOperand(I); } - // We insert the pair of different operands to MapValueFromBB1ToBB2 and - // remove (I1, I2) from MapValueFromBB1ToBB2. - if (DifferentOp1) { - PHINode *NewPN = PHINode::Create(DifferentOp1->getType(), 2, - DifferentOp1->getName() + ".sink", - BBEnd->begin()); - MapValueFromBB1ToBB2[DifferentOp1] = std::make_pair(DifferentOp2, NewPN); + DEBUG(dbgs() << "SINK common instructions " << *I1 << "\n"); + DEBUG(dbgs() << " " << *I2 << "\n"); + + // We insert the pair of different operands to JointValueMap and + // remove (I1, I2) from JointValueMap. + if (Op1Idx != ~0U) { + auto &NewPN = JointValueMap[std::make_pair(DifferentOp1, DifferentOp2)]; + if (!NewPN) { + NewPN = + PHINode::Create(DifferentOp1->getType(), 2, + DifferentOp1->getName() + ".sink", BBEnd->begin()); + NewPN->addIncoming(DifferentOp1, BB1); + NewPN->addIncoming(DifferentOp2, BB2); + DEBUG(dbgs() << "Create PHI node " << *NewPN << "\n";); + } // I1 should use NewPN instead of DifferentOp1. I1->setOperand(Op1Idx, NewPN); - NewPN->addIncoming(DifferentOp1, BB1); - NewPN->addIncoming(DifferentOp2, BB2); - DEBUG(dbgs() << "Create PHI node " << *NewPN << "\n";); } - PHINode *OldPN = MapValueFromBB1ToBB2[I1].second; - MapValueFromBB1ToBB2.erase(I1); + PHINode *OldPN = JointValueMap[InstPair]; + JointValueMap.erase(InstPair); - DEBUG(dbgs() << "SINK common instructions " << *I1 << "\n";); - DEBUG(dbgs() << " " << *I2 << "\n";); // We need to update RE1 and RE2 if we are going to sink the first // instruction in the basic block down. bool UpdateRE1 = (I1 == BB1->begin()), UpdateRE2 = (I2 == BB2->begin()); @@ -1489,7 +1467,8 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, /// /// \returns true if the conditional block is removed. static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, - const DataLayout *DL) { + const DataLayout *DL, + const TargetTransformInfo &TTI) { // Be conservative for now. FP select instruction can often be expensive. Value *BrCond = BI->getCondition(); if (isa<FCmpInst>(BrCond)) @@ -1538,7 +1517,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, EndBB)))) return false; if (!SpeculatedStoreValue && - ComputeSpeculationCost(I, DL) > PHINodeFoldingThreshold) + ComputeSpeculationCost(I, DL, TTI) > PHINodeFoldingThreshold * + TargetTransformInfo::TCC_Basic) return false; // Store the store speculation candidate. @@ -1597,9 +1577,11 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE, DL)) || (OrigCE && !isSafeToSpeculativelyExecute(OrigCE, DL))) return false; - unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, DL) : 0; - unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, DL) : 0; - if (OrigCost + ThenCost > 2 * PHINodeFoldingThreshold) + unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, DL, TTI) : 0; + unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, DL, TTI) : 0; + unsigned MaxCost = 2 * PHINodeFoldingThreshold * + TargetTransformInfo::TCC_Basic; + if (OrigCost + ThenCost > MaxCost) return false; // Account for the cost of an unfolded ConstantExpr which could end up @@ -1804,7 +1786,8 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *DL) { /// FoldTwoEntryPHINode - Given a BB that starts with the specified two-entry /// PHI node, see if we can eliminate it. -static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL) { +static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL, + const TargetTransformInfo &TTI) { // Ok, this is a two entry PHI node. Check to see if this is a simple "if // statement", which has a very simple dominance structure. Basically, we // are trying to find the condition that is being branched on, which @@ -1835,6 +1818,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL) { SmallPtrSet<Instruction*, 4> AggressiveInsts; unsigned MaxCostVal0 = PHINodeFoldingThreshold, MaxCostVal1 = PHINodeFoldingThreshold; + MaxCostVal0 *= TargetTransformInfo::TCC_Basic; + MaxCostVal1 *= TargetTransformInfo::TCC_Basic; for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) { PHINode *PN = cast<PHINode>(II++); @@ -1845,9 +1830,9 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL) { } if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts, - MaxCostVal0, DL) || + MaxCostVal0, DL, TTI) || !DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts, - MaxCostVal1, DL)) + MaxCostVal1, DL, TTI)) return false; } @@ -2036,8 +2021,10 @@ static bool ExtractBranchMetadata(BranchInst *BI, "Looking for probabilities on unconditional branch?"); MDNode *ProfileData = BI->getMetadata(LLVMContext::MD_prof); if (!ProfileData || ProfileData->getNumOperands() != 3) return false; - ConstantInt *CITrue = dyn_cast<ConstantInt>(ProfileData->getOperand(1)); - ConstantInt *CIFalse = dyn_cast<ConstantInt>(ProfileData->getOperand(2)); + ConstantInt *CITrue = + mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(1)); + ConstantInt *CIFalse = + mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(2)); if (!CITrue || !CIFalse) return false; ProbTrue = CITrue->getValue().getZExtValue(); ProbFalse = CIFalse->getValue().getZExtValue(); @@ -2534,17 +2521,15 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { // The weight to CommonDest should be PredCommon * SuccTotal + // PredOther * SuccCommon. // The weight to OtherDest should be PredOther * SuccOther. - SmallVector<uint64_t, 2> NewWeights; - NewWeights.push_back(PredCommon * (SuccCommon + SuccOther) + - PredOther * SuccCommon); - NewWeights.push_back(PredOther * SuccOther); + uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) + + PredOther * SuccCommon, + PredOther * SuccOther}; // Halve the weights if any of them cannot fit in an uint32_t FitWeights(NewWeights); - SmallVector<uint32_t, 2> MDWeights(NewWeights.begin(),NewWeights.end()); PBI->setMetadata(LLVMContext::MD_prof, - MDBuilder(BI->getContext()). - createBranchWeights(MDWeights)); + MDBuilder(BI->getContext()) + .createBranchWeights(NewWeights[0], NewWeights[1])); } // OtherDest may have phi nodes. If so, add an entry from PBI's @@ -2718,7 +2703,7 @@ static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) { /// the PHI, merging the third icmp into the switch. static bool TryToSimplifyUncondBranchWithICmpInIt( ICmpInst *ICI, IRBuilder<> &Builder, const TargetTransformInfo &TTI, - unsigned BonusInstThreshold, const DataLayout *DL, AssumptionTracker *AT) { + unsigned BonusInstThreshold, const DataLayout *DL, AssumptionCache *AC) { BasicBlock *BB = ICI->getParent(); // If the block has any PHIs in it or the icmp has multiple uses, it is too @@ -2751,7 +2736,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt( ICI->eraseFromParent(); } // BB is now empty, so it is likely to simplify away. - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; } // Ok, the block is reachable from the default dest. If the constant we're @@ -2767,7 +2752,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt( ICI->replaceAllUsesWith(V); ICI->eraseFromParent(); // BB is now empty, so it is likely to simplify away. - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; } // The use of the icmp has to be in the 'end' block, by the only PHI node in @@ -2947,20 +2932,9 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) { return false; // Turn all invokes that unwind here into calls and delete the basic block. - bool InvokeRequiresTableEntry = false; - bool Changed = false; for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) { InvokeInst *II = cast<InvokeInst>((*PI++)->getTerminator()); - - if (II->hasFnAttr(Attribute::UWTable)) { - // Don't remove an `invoke' instruction if the ABI requires an entry into - // the table. - InvokeRequiresTableEntry = true; - continue; - } - SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3); - // Insert a call instruction before the invoke. CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II); Call->takeName(II); @@ -2980,14 +2954,11 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) { // Finally, delete the invoke instruction! II->eraseFromParent(); - Changed = true; } - if (!InvokeRequiresTableEntry) - // The landingpad is now unreachable. Zap it. - BB->eraseFromParent(); - - return Changed; + // The landingpad is now unreachable. Zap it. + BB->eraseFromParent(); + return true; } bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { @@ -3018,7 +2989,7 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { } // If we eliminated all predecessors of the block, delete the block now. - if (pred_begin(BB) == pred_end(BB)) + if (pred_empty(BB)) // We know there are no successors, so just nuke the block. BB->eraseFromParent(); @@ -3119,55 +3090,6 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { --i; --e; Changed = true; } - // If the default value is unreachable, figure out the most popular - // destination and make it the default. - if (SI->getDefaultDest() == BB) { - std::map<BasicBlock*, std::pair<unsigned, unsigned> > Popularity; - for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); - i != e; ++i) { - std::pair<unsigned, unsigned> &entry = - Popularity[i.getCaseSuccessor()]; - if (entry.first == 0) { - entry.first = 1; - entry.second = i.getCaseIndex(); - } else { - entry.first++; - } - } - - // Find the most popular block. - unsigned MaxPop = 0; - unsigned MaxIndex = 0; - BasicBlock *MaxBlock = nullptr; - for (std::map<BasicBlock*, std::pair<unsigned, unsigned> >::iterator - I = Popularity.begin(), E = Popularity.end(); I != E; ++I) { - if (I->second.first > MaxPop || - (I->second.first == MaxPop && MaxIndex > I->second.second)) { - MaxPop = I->second.first; - MaxIndex = I->second.second; - MaxBlock = I->first; - } - } - if (MaxBlock) { - // Make this the new default, allowing us to delete any explicit - // edges to it. - SI->setDefaultDest(MaxBlock); - Changed = true; - - // If MaxBlock has phinodes in it, remove MaxPop-1 entries from - // it. - if (isa<PHINode>(MaxBlock->begin())) - for (unsigned i = 0; i != MaxPop-1; ++i) - MaxBlock->removePredecessor(SI->getParent()); - - for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); - i != e; ++i) - if (i.getCaseSuccessor() == MaxBlock) { - SI->removeCase(i); - --i; --e; - } - } - } } else if (InvokeInst *II = dyn_cast<InvokeInst>(TI)) { if (II->getUnwindDest() == BB) { // Convert the invoke to a call instruction. This would be a good @@ -3191,7 +3113,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { } // If this block is now dead, remove it. - if (pred_begin(BB) == pred_end(BB) && + if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) { // We know there are no successors, so just nuke the block. BB->eraseFromParent(); @@ -3201,70 +3123,122 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { return Changed; } -/// TurnSwitchRangeIntoICmp - Turns a switch with that contains only a -/// integer range comparison into a sub, an icmp and a branch. -static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { - assert(SI->getNumCases() > 1 && "Degenerate switch?"); +static bool CasesAreContiguous(SmallVectorImpl<ConstantInt *> &Cases) { + assert(Cases.size() >= 1); - // Make sure all cases point to the same destination and gather the values. - SmallVector<ConstantInt *, 16> Cases; - SwitchInst::CaseIt I = SI->case_begin(); - Cases.push_back(I.getCaseValue()); - SwitchInst::CaseIt PrevI = I++; - for (SwitchInst::CaseIt E = SI->case_end(); I != E; PrevI = I++) { - if (PrevI.getCaseSuccessor() != I.getCaseSuccessor()) + array_pod_sort(Cases.begin(), Cases.end(), ConstantIntSortPredicate); + for (size_t I = 1, E = Cases.size(); I != E; ++I) { + if (Cases[I - 1]->getValue() != Cases[I]->getValue() + 1) return false; - Cases.push_back(I.getCaseValue()); } - assert(Cases.size() == SI->getNumCases() && "Not all cases gathered"); + return true; +} - // Sort the case values, then check if they form a range we can transform. - array_pod_sort(Cases.begin(), Cases.end(), ConstantIntSortPredicate); - for (unsigned I = 1, E = Cases.size(); I != E; ++I) { - if (Cases[I-1]->getValue() != Cases[I]->getValue()+1) - return false; +/// Turn a switch with two reachable destinations into an integer range +/// comparison and branch. +static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { + assert(SI->getNumCases() > 1 && "Degenerate switch?"); + + bool HasDefault = + !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg()); + + // Partition the cases into two sets with different destinations. + BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr; + BasicBlock *DestB = nullptr; + SmallVector <ConstantInt *, 16> CasesA; + SmallVector <ConstantInt *, 16> CasesB; + + for (SwitchInst::CaseIt I : SI->cases()) { + BasicBlock *Dest = I.getCaseSuccessor(); + if (!DestA) DestA = Dest; + if (Dest == DestA) { + CasesA.push_back(I.getCaseValue()); + continue; + } + if (!DestB) DestB = Dest; + if (Dest == DestB) { + CasesB.push_back(I.getCaseValue()); + continue; + } + return false; // More than two destinations. } - Constant *Offset = ConstantExpr::getNeg(Cases.back()); - Constant *NumCases = ConstantInt::get(Offset->getType(), SI->getNumCases()); + assert(DestA && DestB && "Single-destination switch should have been folded."); + assert(DestA != DestB); + assert(DestB != SI->getDefaultDest()); + assert(!CasesB.empty() && "There must be non-default cases."); + assert(!CasesA.empty() || HasDefault); + + // Figure out if one of the sets of cases form a contiguous range. + SmallVectorImpl<ConstantInt *> *ContiguousCases = nullptr; + BasicBlock *ContiguousDest = nullptr; + BasicBlock *OtherDest = nullptr; + if (!CasesA.empty() && CasesAreContiguous(CasesA)) { + ContiguousCases = &CasesA; + ContiguousDest = DestA; + OtherDest = DestB; + } else if (CasesAreContiguous(CasesB)) { + ContiguousCases = &CasesB; + ContiguousDest = DestB; + OtherDest = DestA; + } else + return false; + + // Start building the compare and branch. + + Constant *Offset = ConstantExpr::getNeg(ContiguousCases->back()); + Constant *NumCases = ConstantInt::get(Offset->getType(), ContiguousCases->size()); Value *Sub = SI->getCondition(); if (!Offset->isNullValue()) - Sub = Builder.CreateAdd(Sub, Offset, Sub->getName()+".off"); + Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off"); + Value *Cmp; // If NumCases overflowed, then all possible values jump to the successor. - if (NumCases->isNullValue() && SI->getNumCases() != 0) + if (NumCases->isNullValue() && !ContiguousCases->empty()) Cmp = ConstantInt::getTrue(SI->getContext()); else Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch"); - BranchInst *NewBI = Builder.CreateCondBr( - Cmp, SI->case_begin().getCaseSuccessor(), SI->getDefaultDest()); + BranchInst *NewBI = Builder.CreateCondBr(Cmp, ContiguousDest, OtherDest); // Update weight for the newly-created conditional branch. - SmallVector<uint64_t, 8> Weights; - bool HasWeights = HasBranchWeights(SI); - if (HasWeights) { + if (HasBranchWeights(SI)) { + SmallVector<uint64_t, 8> Weights; GetBranchWeights(SI, Weights); if (Weights.size() == 1 + SI->getNumCases()) { - // Combine all weights for the cases to be the true weight of NewBI. - // We assume that the sum of all weights for a Terminator can fit into 32 - // bits. - uint32_t NewTrueWeight = 0; - for (unsigned I = 1, E = Weights.size(); I != E; ++I) - NewTrueWeight += (uint32_t)Weights[I]; + uint64_t TrueWeight = 0; + uint64_t FalseWeight = 0; + for (size_t I = 0, E = Weights.size(); I != E; ++I) { + if (SI->getSuccessor(I) == ContiguousDest) + TrueWeight += Weights[I]; + else + FalseWeight += Weights[I]; + } + while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) { + TrueWeight /= 2; + FalseWeight /= 2; + } NewBI->setMetadata(LLVMContext::MD_prof, - MDBuilder(SI->getContext()). - createBranchWeights(NewTrueWeight, - (uint32_t)Weights[0])); + MDBuilder(SI->getContext()).createBranchWeights( + (uint32_t)TrueWeight, (uint32_t)FalseWeight)); } } - // Prune obsolete incoming values off the successor's PHI nodes. - for (BasicBlock::iterator BBI = SI->case_begin().getCaseSuccessor()->begin(); - isa<PHINode>(BBI); ++BBI) { - for (unsigned I = 0, E = SI->getNumCases()-1; I != E; ++I) + // Prune obsolete incoming values off the successors' PHI nodes. + for (auto BBI = ContiguousDest->begin(); isa<PHINode>(BBI); ++BBI) { + unsigned PreviousEdges = ContiguousCases->size(); + if (ContiguousDest == SI->getDefaultDest()) ++PreviousEdges; + for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I) + cast<PHINode>(BBI)->removeIncomingValue(SI->getParent()); + } + for (auto BBI = OtherDest->begin(); isa<PHINode>(BBI); ++BBI) { + unsigned PreviousEdges = SI->getNumCases() - ContiguousCases->size(); + if (OtherDest == SI->getDefaultDest()) ++PreviousEdges; + for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I) cast<PHINode>(BBI)->removeIncomingValue(SI->getParent()); } + + // Drop the switch. SI->eraseFromParent(); return true; @@ -3273,11 +3247,11 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { /// EliminateDeadSwitchCases - Compute masked bits for the condition of a switch /// and use it to remove dead cases. static bool EliminateDeadSwitchCases(SwitchInst *SI, const DataLayout *DL, - AssumptionTracker *AT) { + AssumptionCache *AC) { Value *Cond = SI->getCondition(); unsigned Bits = Cond->getType()->getIntegerBitWidth(); APInt KnownZero(Bits, 0), KnownOne(Bits, 0); - computeKnownBits(Cond, KnownZero, KnownOne, DL, 0, AT, SI); + computeKnownBits(Cond, KnownZero, KnownOne, DL, 0, AC, SI); // Gather dead cases. SmallVector<ConstantInt*, 8> DeadCases; @@ -3484,6 +3458,21 @@ GetCaseResults(SwitchInst *SI, continue; } else if (Constant *C = ConstantFold(I, ConstantPool, DL)) { // Instruction is side-effect free and constant. + + // If the instruction has uses outside this block or a phi node slot for + // the block, it is not safe to bypass the instruction since it would then + // no longer dominate all its uses. + for (auto &Use : I->uses()) { + User *User = Use.getUser(); + if (Instruction *I = dyn_cast<Instruction>(User)) + if (I->getParent() == CaseDest) + continue; + if (PHINode *Phi = dyn_cast<PHINode>(User)) + if (Phi->getIncomingBlock(Use) == CaseDest) + continue; + return false; + } + ConstantPool.insert(std::make_pair(I, C)); } else { break; @@ -3509,12 +3498,6 @@ GetCaseResults(SwitchInst *SI, if (!ConstVal) return false; - // Note: If the constant comes from constant-propagating the case value - // through the CaseDest basic block, it will be safe to remove the - // instructions in that block. They cannot be used (except in the phi nodes - // we visit) outside CaseDest, because that block does not dominate its - // successor. If it did, we would not be in this phi node. - // Be conservative about which kinds of constants we support. if (!ValidLookupTableConstant(ConstVal)) return false; @@ -3655,7 +3638,7 @@ static void RemoveSwitchAfterSelectConversion(SwitchInst *SI, PHINode *PHI, /// phi nodes in a common successor block with only two different /// constant values, replace the switch with select. static bool SwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, - const DataLayout *DL, AssumptionTracker *AT) { + const DataLayout *DL, AssumptionCache *AC) { Value *const Cond = SI->getCondition(); PHINode *PHI = nullptr; BasicBlock *CommonDest = nullptr; @@ -3982,6 +3965,89 @@ static bool ShouldBuildLookupTable(SwitchInst *SI, return SI->getNumCases() * 10 >= TableSize * 4; } +/// Try to reuse the switch table index compare. Following pattern: +/// \code +/// if (idx < tablesize) +/// r = table[idx]; // table does not contain default_value +/// else +/// r = default_value; +/// if (r != default_value) +/// ... +/// \endcode +/// Is optimized to: +/// \code +/// cond = idx < tablesize; +/// if (cond) +/// r = table[idx]; +/// else +/// r = default_value; +/// if (cond) +/// ... +/// \endcode +/// Jump threading will then eliminate the second if(cond). +static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock, + BranchInst *RangeCheckBranch, Constant *DefaultValue, + const SmallVectorImpl<std::pair<ConstantInt*, Constant*> >& Values) { + + ICmpInst *CmpInst = dyn_cast<ICmpInst>(PhiUser); + if (!CmpInst) + return; + + // We require that the compare is in the same block as the phi so that jump + // threading can do its work afterwards. + if (CmpInst->getParent() != PhiBlock) + return; + + Constant *CmpOp1 = dyn_cast<Constant>(CmpInst->getOperand(1)); + if (!CmpOp1) + return; + + Value *RangeCmp = RangeCheckBranch->getCondition(); + Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType()); + Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType()); + + // Check if the compare with the default value is constant true or false. + Constant *DefaultConst = ConstantExpr::getICmp(CmpInst->getPredicate(), + DefaultValue, CmpOp1, true); + if (DefaultConst != TrueConst && DefaultConst != FalseConst) + return; + + // Check if the compare with the case values is distinct from the default + // compare result. + for (auto ValuePair : Values) { + Constant *CaseConst = ConstantExpr::getICmp(CmpInst->getPredicate(), + ValuePair.second, CmpOp1, true); + if (!CaseConst || CaseConst == DefaultConst) + return; + assert((CaseConst == TrueConst || CaseConst == FalseConst) && + "Expect true or false as compare result."); + } + + // Check if the branch instruction dominates the phi node. It's a simple + // dominance check, but sufficient for our needs. + // Although this check is invariant in the calling loops, it's better to do it + // at this late stage. Practically we do it at most once for a switch. + BasicBlock *BranchBlock = RangeCheckBranch->getParent(); + for (auto PI = pred_begin(PhiBlock), E = pred_end(PhiBlock); PI != E; ++PI) { + BasicBlock *Pred = *PI; + if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock) + return; + } + + if (DefaultConst == FalseConst) { + // The compare yields the same result. We can replace it. + CmpInst->replaceAllUsesWith(RangeCmp); + ++NumTableCmpReuses; + } else { + // The compare yields the same result, just inverted. We can replace it. + Value *InvertedTableCmp = BinaryOperator::CreateXor(RangeCmp, + ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp", + RangeCheckBranch); + CmpInst->replaceAllUsesWith(InvertedTableCmp); + ++NumTableCmpReuses; + } +} + /// SwitchToLookupTable - If the switch is only used to initialize one or more /// phi nodes in a common successor block with different constant values, /// replace the switch with lookup tables. @@ -4058,11 +4124,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, // If the table has holes, we need a constant result for the default case // or a bitmask that fits in a register. SmallVector<std::pair<PHINode*, Constant*>, 4> DefaultResultsList; - bool HasDefaultResults = false; - if (TableHasHoles) { - HasDefaultResults = GetCaseResults(SI, nullptr, SI->getDefaultDest(), + bool HasDefaultResults = GetCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResultsList, DL); - } bool NeedMask = (TableHasHoles && !HasDefaultResults); if (NeedMask) { @@ -4102,21 +4165,24 @@ static bool SwitchToLookupTable(SwitchInst *SI, "It is impossible for a switch to have more entries than the max " "representable value of its input integer type's size."); - // If we have a fully covered lookup table, unconditionally branch to the - // lookup table BB. Otherwise, check if the condition value is within the case - // range. If it is so, branch to the new BB. Otherwise branch to SI's default - // destination. - const bool GeneratingCoveredLookupTable = MaxTableSize == TableSize; - if (GeneratingCoveredLookupTable) { + // If the default destination is unreachable, or if the lookup table covers + // all values of the conditional variable, branch directly to the lookup table + // BB. Otherwise, check that the condition is within the case range. + const bool DefaultIsReachable = + !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg()); + const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize); + BranchInst *RangeCheckBranch = nullptr; + + if (!DefaultIsReachable || GeneratingCoveredLookupTable) { Builder.CreateBr(LookupBB); // We cached PHINodes in PHIs, to avoid accessing deleted PHINodes later, // do not delete PHINodes here. SI->getDefaultDest()->removePredecessor(SI->getParent(), - true/*DontDeleteUselessPHIs*/); + /*DontDeleteUselessPHIs=*/true); } else { Value *Cmp = Builder.CreateICmpULT(TableIndex, ConstantInt::get( MinCaseVal->getType(), TableSize)); - Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest()); + RangeCheckBranch = Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest()); } // Populate the BB that does the lookups. @@ -4167,11 +4233,11 @@ static bool SwitchToLookupTable(SwitchInst *SI, bool ReturnedEarly = false; for (size_t I = 0, E = PHIs.size(); I != E; ++I) { PHINode *PHI = PHIs[I]; + const ResultListTy &ResultList = ResultLists[PHI]; // If using a bitmask, use any value to fill the lookup table holes. Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI]; - SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultLists[PHI], - DV, DL); + SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultList, DV, DL); Value *Result = Table.BuildLookup(TableIndex, Builder); @@ -4184,6 +4250,16 @@ static bool SwitchToLookupTable(SwitchInst *SI, break; } + // Do a small peephole optimization: re-use the switch table compare if + // possible. + if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) { + BasicBlock *PhiBlock = PHI->getParent(); + // Search for compare instructions which use the phi. + for (auto *User : PHI->users()) { + reuseTableCompare(User, PhiBlock, RangeCheckBranch, DV, ResultList); + } + } + PHI->addIncoming(Result, LookupBB); } @@ -4214,12 +4290,12 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { // see if that predecessor totally determines the outcome of this switch. if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; Value *Cond = SI->getCondition(); if (SelectInst *Select = dyn_cast<SelectInst>(Cond)) if (SimplifySwitchOnSelect(SI, Select)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; // If the block only contains the switch, see if we can fold the block // away into any preds. @@ -4229,25 +4305,25 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { ++BBI; if (SI == &*BBI) if (FoldValueComparisonIntoPredecessors(SI, Builder)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; } // Try to transform the switch into an icmp and a branch. if (TurnSwitchRangeIntoICmp(SI, Builder)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; // Remove unreachable cases. - if (EliminateDeadSwitchCases(SI, DL, AT)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true; + if (EliminateDeadSwitchCases(SI, DL, AC)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; - if (SwitchToSelect(SI, Builder, DL, AT)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true; + if (SwitchToSelect(SI, Builder, DL, AC)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; if (ForwardSwitchConditionToPHI(SI)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; if (SwitchToLookupTable(SI, Builder, TTI, DL)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; return false; } @@ -4284,7 +4360,7 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) { if (SimplifyIndirectBrOnSelect(IBI, SI)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; } return Changed; } @@ -4309,7 +4385,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ ; if (I->isTerminator() && TryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, TTI, - BonusInstThreshold, DL, AT)) + BonusInstThreshold, DL, AC)) return true; } @@ -4318,7 +4394,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ // predecessor and use logical operations to update the incoming value // for PHI nodes in common successor. if (FoldBranchToCommonDest(BI, DL, BonusInstThreshold)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; return false; } @@ -4333,7 +4409,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // switch. if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; // This block must be empty, except for the setcond inst, if it exists. // Ignore dbg intrinsics. @@ -4343,14 +4419,14 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { ++I; if (&*I == BI) { if (FoldValueComparisonIntoPredecessors(BI, Builder)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; } else if (&*I == cast<Instruction>(BI->getCondition())){ ++I; // Ignore dbg intrinsics. while (isa<DbgInfoIntrinsic>(I)) ++I; if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; } } @@ -4362,7 +4438,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // branches to us and one of our successors, fold the comparison into the // predecessor and use logical operations to pick the right destination. if (FoldBranchToCommonDest(BI, DL, BonusInstThreshold)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; // We have a conditional branch to two blocks that are only reachable // from BI. We know that the condbr dominates the two blocks, so see if @@ -4370,16 +4446,16 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // can hoist it up to the branching block. if (BI->getSuccessor(0)->getSinglePredecessor()) { if (BI->getSuccessor(1)->getSinglePredecessor()) { - if (HoistThenElseCodeToIf(BI, DL)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true; + if (HoistThenElseCodeToIf(BI, DL, TTI)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; } else { // If Successor #1 has multiple preds, we may be able to conditionally // execute Successor #0 if it branches to Successor #1. TerminatorInst *Succ0TI = BI->getSuccessor(0)->getTerminator(); if (Succ0TI->getNumSuccessors() == 1 && Succ0TI->getSuccessor(0) == BI->getSuccessor(1)) - if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), DL)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true; + if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), DL, TTI)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; } } else if (BI->getSuccessor(1)->getSinglePredecessor()) { // If Successor #0 has multiple preds, we may be able to conditionally @@ -4387,8 +4463,8 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { TerminatorInst *Succ1TI = BI->getSuccessor(1)->getTerminator(); if (Succ1TI->getNumSuccessors() == 1 && Succ1TI->getSuccessor(0) == BI->getSuccessor(0)) - if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), DL)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true; + if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), DL, TTI)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; } // If this is a branch on a phi node in the current block, thread control @@ -4396,14 +4472,14 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition())) if (PN->getParent() == BI->getParent()) if (FoldCondBranchOnPHI(BI, DL)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; // Scan predecessor blocks for conditional branches. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) if (PBI != BI && PBI->isConditional()) if (SimplifyCondBranchToCondBranch(PBI, BI)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; return false; } @@ -4484,7 +4560,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { // Remove basic blocks that have no predecessors (except the entry block)... // or that just have themself as a predecessor. These are unreachable. - if ((pred_begin(BB) == pred_end(BB) && + if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) || BB->getSinglePredecessor() == BB) { DEBUG(dbgs() << "Removing BB: \n" << *BB); @@ -4515,7 +4591,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { // eliminate it, do so now. if (PHINode *PN = dyn_cast<PHINode>(BB->begin())) if (PN->getNumIncomingValues() == 2) - Changed |= FoldTwoEntryPHINode(PN, DL); + Changed |= FoldTwoEntryPHINode(PN, DL, TTI); Builder.SetInsertPoint(BB->getTerminator()); if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) { @@ -4547,7 +4623,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { /// of the CFG. It returns true if a modification was made. /// bool llvm::SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, - unsigned BonusInstThreshold, - const DataLayout *DL, AssumptionTracker *AT) { - return SimplifyCFGOpt(TTI, BonusInstThreshold, DL, AT).run(BB); + unsigned BonusInstThreshold, const DataLayout *DL, + AssumptionCache *AC) { + return SimplifyCFGOpt(TTI, BonusInstThreshold, DL, AC).run(BB); } diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp index a4fdd55..6a5d885 100644 --- a/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -48,22 +48,15 @@ namespace { Loop *L; LoopInfo *LI; ScalarEvolution *SE; - const DataLayout *DL; // May be NULL SmallVectorImpl<WeakVH> &DeadInsts; bool Changed; public: - SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, LPPassManager *LPM, - SmallVectorImpl<WeakVH> &Dead, IVUsers *IVU = nullptr) : - L(Loop), - LI(LPM->getAnalysisIfAvailable<LoopInfo>()), - SE(SE), - DeadInsts(Dead), - Changed(false) { - DataLayoutPass *DLP = LPM->getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; + SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, LoopInfo *LI, + SmallVectorImpl<WeakVH> &Dead, IVUsers *IVU = nullptr) + : L(Loop), LI(LI), SE(SE), DeadInsts(Dead), Changed(false) { assert(LI && "IV simplification requires LoopInfo"); } @@ -80,6 +73,7 @@ namespace { void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand); void eliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand, bool IsSigned); + bool strengthenOverflowingOperation(BinaryOperator *OBO, Value *IVOperand); Instruction *splitOverflowIntrinsic(Instruction *IVUser, const DominatorTree *DT); @@ -271,6 +265,107 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst, return true; } +/// Annotate BO with nsw / nuw if it provably does not signed-overflow / +/// unsigned-overflow. Returns true if anything changed, false otherwise. +bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO, + Value *IVOperand) { + + // Currently we only handle instructions of the form "add <indvar> <value>" + unsigned Op = BO->getOpcode(); + if (Op != Instruction::Add) + return false; + + // If BO is already both nuw and nsw then there is nothing left to do + if (BO->hasNoUnsignedWrap() && BO->hasNoSignedWrap()) + return false; + + IntegerType *IT = cast<IntegerType>(IVOperand->getType()); + Value *OtherOperand = nullptr; + if (BO->getOperand(0) == IVOperand) { + OtherOperand = BO->getOperand(1); + } else { + assert(BO->getOperand(1) == IVOperand && "only other use!"); + OtherOperand = BO->getOperand(0); + } + + bool Changed = false; + const SCEV *OtherOpSCEV = SE->getSCEV(OtherOperand); + if (OtherOpSCEV == SE->getCouldNotCompute()) + return false; + + const SCEV *IVOpSCEV = SE->getSCEV(IVOperand); + const SCEV *ZeroSCEV = SE->getConstant(IVOpSCEV->getType(), 0); + + if (!BO->hasNoSignedWrap()) { + // Upgrade the add to an "add nsw" if we can prove that it will never + // sign-overflow or sign-underflow. + + const SCEV *SignedMax = + SE->getConstant(APInt::getSignedMaxValue(IT->getBitWidth())); + const SCEV *SignedMin = + SE->getConstant(APInt::getSignedMinValue(IT->getBitWidth())); + + // The addition "IVOperand + OtherOp" does not sign-overflow if the result + // is sign-representable in 2's complement in the given bit-width. + // + // If OtherOp is SLT 0, then for an IVOperand in [SignedMin - OtherOp, + // SignedMax], "IVOperand + OtherOp" is in [SignedMin, SignedMax + OtherOp]. + // Everything in [SignedMin, SignedMax + OtherOp] is representable since + // SignedMax + OtherOp is at least -1. + // + // If OtherOp is SGE 0, then for an IVOperand in [SignedMin, SignedMax - + // OtherOp], "IVOperand + OtherOp" is in [SignedMin + OtherOp, SignedMax]. + // Everything in [SignedMin + OtherOp, SignedMax] is representable since + // SignedMin + OtherOp is at most -1. + // + // It follows that for all values of IVOperand in [SignedMin - smin(0, + // OtherOp), SignedMax - smax(0, OtherOp)] the result of the add is + // representable (i.e. there is no sign-overflow). + + const SCEV *UpperDelta = SE->getSMaxExpr(ZeroSCEV, OtherOpSCEV); + const SCEV *UpperLimit = SE->getMinusSCEV(SignedMax, UpperDelta); + + bool NeverSignedOverflows = + SE->isKnownPredicate(ICmpInst::ICMP_SLE, IVOpSCEV, UpperLimit); + + if (NeverSignedOverflows) { + const SCEV *LowerDelta = SE->getSMinExpr(ZeroSCEV, OtherOpSCEV); + const SCEV *LowerLimit = SE->getMinusSCEV(SignedMin, LowerDelta); + + bool NeverSignedUnderflows = + SE->isKnownPredicate(ICmpInst::ICMP_SGE, IVOpSCEV, LowerLimit); + if (NeverSignedUnderflows) { + BO->setHasNoSignedWrap(true); + Changed = true; + } + } + } + + if (!BO->hasNoUnsignedWrap()) { + // Upgrade the add computing "IVOperand + OtherOp" to an "add nuw" if we can + // prove that it will never unsigned-overflow (i.e. the result will always + // be representable in the given bit-width). + // + // "IVOperand + OtherOp" is unsigned-representable in 2's complement iff it + // does not produce a carry. "IVOperand + OtherOp" produces no carry iff + // IVOperand ULE (UnsignedMax - OtherOp). + + const SCEV *UnsignedMax = + SE->getConstant(APInt::getMaxValue(IT->getBitWidth())); + const SCEV *UpperLimit = SE->getMinusSCEV(UnsignedMax, OtherOpSCEV); + + bool NeverUnsignedOverflows = + SE->isKnownPredicate(ICmpInst::ICMP_ULE, IVOpSCEV, UpperLimit); + + if (NeverUnsignedOverflows) { + BO->setHasNoUnsignedWrap(true); + Changed = true; + } + } + + return Changed; +} + /// \brief Split sadd.with.overflow into add + sadd.with.overflow to allow /// analysis and optimization. /// @@ -430,6 +525,16 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) { pushIVUsers(IVOperand, Simplified, SimpleIVUsers); continue; } + + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(UseOper.first)) { + if (isa<OverflowingBinaryOperator>(BO) && + strengthenOverflowingOperation(BO, IVOperand)) { + // re-queue uses of the now modified binary operator and fall + // through to the checks that remain. + pushIVUsers(IVOperand, Simplified, SimpleIVUsers); + } + } + CastInst *Cast = dyn_cast<CastInst>(UseOper.first); if (V && Cast) { V->visitCast(Cast); @@ -450,8 +555,8 @@ void IVVisitor::anchor() { } bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, LPPassManager *LPM, SmallVectorImpl<WeakVH> &Dead, IVVisitor *V) { - LoopInfo *LI = &LPM->getAnalysis<LoopInfo>(); - SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, LPM, Dead); + LoopInfo *LI = &LPM->getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, LI, Dead); SIV.simplifyUsers(CurrIV, V); return SIV.hasChanged(); } diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp index 5632095..55a4455 100644 --- a/lib/Transforms/Utils/SimplifyInstructions.cpp +++ b/lib/Transforms/Utils/SimplifyInstructions.cpp @@ -18,14 +18,14 @@ #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AssumptionTracker.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Type.h" #include "llvm/Pass.h" -#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -42,8 +42,8 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); - AU.addRequired<AssumptionTracker>(); - AU.addRequired<TargetLibraryInfo>(); + AU.addRequired<AssumptionCacheTracker>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); } /// runOnFunction - Remove instructions that simplify. @@ -53,8 +53,10 @@ namespace { const DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; - const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>(); - AssumptionTracker *AT = &getAnalysis<AssumptionTracker>(); + const TargetLibraryInfo *TLI = + &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); + AssumptionCache *AC = + &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2; bool Changed = false; @@ -71,7 +73,7 @@ namespace { continue; // Don't waste time simplifying unused instructions. if (!I->use_empty()) - if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AT)) { + if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AC)) { // Mark all uses for resimplification next time round the loop. for (User *U : I->users()) Next->insert(cast<Instruction>(U)); @@ -104,8 +106,8 @@ namespace { char InstSimplifier::ID = 0; INITIALIZE_PASS_BEGIN(InstSimplifier, "instsimplify", "Remove redundant instructions", false, false) -INITIALIZE_PASS_DEPENDENCY(AssumptionTracker) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(InstSimplifier, "instsimplify", "Remove redundant instructions", false, false) char &llvm::InstructionSimplifierID = InstSimplifier::ID; diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index a39f128..fb1d83f 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -30,7 +30,7 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" using namespace llvm; @@ -116,207 +116,68 @@ static bool hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty, } } -//===----------------------------------------------------------------------===// -// Fortified Library Call Optimizations -//===----------------------------------------------------------------------===// - -static bool isFortifiedCallFoldable(CallInst *CI, unsigned SizeCIOp, unsigned SizeArgOp, - bool isString) { - if (CI->getArgOperand(SizeCIOp) == CI->getArgOperand(SizeArgOp)) - return true; - if (ConstantInt *SizeCI = - dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) { - if (SizeCI->isAllOnesValue()) - return true; - if (isString) { - uint64_t Len = GetStringLength(CI->getArgOperand(SizeArgOp)); - // If the length is 0 we don't know how long it is and so we can't - // remove the check. - if (Len == 0) - return false; - return SizeCI->getZExtValue() >= Len; - } - if (ConstantInt *Arg = dyn_cast<ConstantInt>(CI->getArgOperand(SizeArgOp))) - return SizeCI->getZExtValue() >= Arg->getZExtValue(); - } - return false; -} - -Value *LibCallSimplifier::optimizeMemCpyChk(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - FunctionType *FT = Callee->getFunctionType(); - LLVMContext &Context = CI->getContext(); - - // Check if this has the right signature. - if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || - !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isPointerTy() || - FT->getParamType(2) != DL->getIntPtrType(Context) || - FT->getParamType(3) != DL->getIntPtrType(Context)) - return nullptr; - - if (isFortifiedCallFoldable(CI, 3, 2, false)) { - B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), 1); - return CI->getArgOperand(0); - } - return nullptr; -} - -Value *LibCallSimplifier::optimizeMemMoveChk(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - FunctionType *FT = Callee->getFunctionType(); - LLVMContext &Context = CI->getContext(); - - // Check if this has the right signature. - if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || - !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isPointerTy() || - FT->getParamType(2) != DL->getIntPtrType(Context) || - FT->getParamType(3) != DL->getIntPtrType(Context)) - return nullptr; - - if (isFortifiedCallFoldable(CI, 3, 2, false)) { - B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), 1); - return CI->getArgOperand(0); - } - return nullptr; -} - -Value *LibCallSimplifier::optimizeMemSetChk(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - FunctionType *FT = Callee->getFunctionType(); - LLVMContext &Context = CI->getContext(); - - // Check if this has the right signature. - if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || - !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isIntegerTy() || - FT->getParamType(2) != DL->getIntPtrType(Context) || - FT->getParamType(3) != DL->getIntPtrType(Context)) - return nullptr; - - if (isFortifiedCallFoldable(CI, 3, 2, false)) { - Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false); - B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1); - return CI->getArgOperand(0); - } - return nullptr; -} - -Value *LibCallSimplifier::optimizeStrCpyChk(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - StringRef Name = Callee->getName(); - FunctionType *FT = Callee->getFunctionType(); - LLVMContext &Context = CI->getContext(); - - // Check if this has the right signature. - if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || - FT->getParamType(0) != FT->getParamType(1) || - FT->getParamType(0) != Type::getInt8PtrTy(Context) || - FT->getParamType(2) != DL->getIntPtrType(Context)) - return nullptr; - - Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); - if (Dst == Src) // __strcpy_chk(x,x) -> x - return Src; - - // If a) we don't have any length information, or b) we know this will - // fit then just lower to a plain strcpy. Otherwise we'll keep our - // strcpy_chk call which may fail at runtime if the size is too long. - // TODO: It might be nice to get a maximum length out of the possible - // string lengths for varying. - if (isFortifiedCallFoldable(CI, 2, 1, true)) { - Value *Ret = EmitStrCpy(Dst, Src, B, DL, TLI, Name.substr(2, 6)); - return Ret; - } else { - // Maybe we can stil fold __strcpy_chk to __memcpy_chk. - uint64_t Len = GetStringLength(Src); - if (Len == 0) - return nullptr; - - // This optimization require DataLayout. - if (!DL) - return nullptr; - - Value *Ret = EmitMemCpyChk( - Dst, Src, ConstantInt::get(DL->getIntPtrType(Context), Len), - CI->getArgOperand(2), B, DL, TLI); - return Ret; - } - return nullptr; -} - -Value *LibCallSimplifier::optimizeStpCpyChk(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - StringRef Name = Callee->getName(); - FunctionType *FT = Callee->getFunctionType(); - LLVMContext &Context = CI->getContext(); - - // Check if this has the right signature. - if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || - FT->getParamType(0) != FT->getParamType(1) || - FT->getParamType(0) != Type::getInt8PtrTy(Context) || - FT->getParamType(2) != DL->getIntPtrType(FT->getParamType(0))) - return nullptr; +/// \brief Returns whether \p F matches the signature expected for the +/// string/memory copying library function \p Func. +/// Acceptable functions are st[rp][n]?cpy, memove, memcpy, and memset. +/// Their fortified (_chk) counterparts are also accepted. +static bool checkStringCopyLibFuncSignature(Function *F, LibFunc::Func Func, + const DataLayout *DL) { + FunctionType *FT = F->getFunctionType(); + LLVMContext &Context = F->getContext(); + Type *PCharTy = Type::getInt8PtrTy(Context); + Type *SizeTTy = DL ? DL->getIntPtrType(Context) : nullptr; + unsigned NumParams = FT->getNumParams(); - Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); - if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x) - Value *StrLen = EmitStrLen(Src, B, DL, TLI); - return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : nullptr; - } - - // If a) we don't have any length information, or b) we know this will - // fit then just lower to a plain stpcpy. Otherwise we'll keep our - // stpcpy_chk call which may fail at runtime if the size is too long. - // TODO: It might be nice to get a maximum length out of the possible - // string lengths for varying. - if (isFortifiedCallFoldable(CI, 2, 1, true)) { - Value *Ret = EmitStrCpy(Dst, Src, B, DL, TLI, Name.substr(2, 6)); - return Ret; - } else { - // Maybe we can stil fold __stpcpy_chk to __memcpy_chk. - uint64_t Len = GetStringLength(Src); - if (Len == 0) - return nullptr; - - // This optimization require DataLayout. - if (!DL) - return nullptr; - - Type *PT = FT->getParamType(0); - Value *LenV = ConstantInt::get(DL->getIntPtrType(PT), Len); - Value *DstEnd = - B.CreateGEP(Dst, ConstantInt::get(DL->getIntPtrType(PT), Len - 1)); - if (!EmitMemCpyChk(Dst, Src, LenV, CI->getArgOperand(2), B, DL, TLI)) - return nullptr; - return DstEnd; - } - return nullptr; -} - -Value *LibCallSimplifier::optimizeStrNCpyChk(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - StringRef Name = Callee->getName(); - FunctionType *FT = Callee->getFunctionType(); - LLVMContext &Context = CI->getContext(); - - // Check if this has the right signature. - if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || - FT->getParamType(0) != FT->getParamType(1) || - FT->getParamType(0) != Type::getInt8PtrTy(Context) || - !FT->getParamType(2)->isIntegerTy() || - FT->getParamType(3) != DL->getIntPtrType(Context)) - return nullptr; + // All string libfuncs return the same type as the first parameter. + if (FT->getReturnType() != FT->getParamType(0)) + return false; - if (isFortifiedCallFoldable(CI, 3, 2, false)) { - Value *Ret = - EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), B, DL, TLI, Name.substr(2, 7)); - return Ret; - } - return nullptr; + switch (Func) { + default: + llvm_unreachable("Can't check signature for non-string-copy libfunc."); + case LibFunc::stpncpy_chk: + case LibFunc::strncpy_chk: + --NumParams; // fallthrough + case LibFunc::stpncpy: + case LibFunc::strncpy: { + if (NumParams != 3 || FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != PCharTy || !FT->getParamType(2)->isIntegerTy()) + return false; + break; + } + case LibFunc::strcpy_chk: + case LibFunc::stpcpy_chk: + --NumParams; // fallthrough + case LibFunc::stpcpy: + case LibFunc::strcpy: { + if (NumParams != 2 || FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != PCharTy) + return false; + break; + } + case LibFunc::memmove_chk: + case LibFunc::memcpy_chk: + --NumParams; // fallthrough + case LibFunc::memmove: + case LibFunc::memcpy: { + if (NumParams != 3 || !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || FT->getParamType(2) != SizeTTy) + return false; + break; + } + case LibFunc::memset_chk: + --NumParams; // fallthrough + case LibFunc::memset: { + if (NumParams != 3 || !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isIntegerTy() || FT->getParamType(2) != SizeTTy) + return false; + break; + } + } + // If this is a fortified libcall, the last parameter is a size_t. + if (NumParams == FT->getNumParams() - 1) + return FT->getParamType(FT->getNumParams() - 1) == SizeTTy; + return true; } //===----------------------------------------------------------------------===// @@ -600,11 +461,8 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) { Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - // Verify the "strcpy" function prototype. - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) || - FT->getParamType(0) != FT->getParamType(1) || - FT->getParamType(0) != B.getInt8PtrTy()) + + if (!checkStringCopyLibFuncSignature(Callee, LibFunc::strcpy, DL)) return nullptr; Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); @@ -631,9 +489,8 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); // Verify the "stpcpy" function prototype. FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) || - FT->getParamType(0) != FT->getParamType(1) || - FT->getParamType(0) != B.getInt8PtrTy()) + + if (!checkStringCopyLibFuncSignature(Callee, LibFunc::stpcpy, DL)) return nullptr; // These optimizations require DataLayout. @@ -665,10 +522,8 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) { Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || - FT->getParamType(0) != FT->getParamType(1) || - FT->getParamType(0) != B.getInt8PtrTy() || - !FT->getParamType(2)->isIntegerTy()) + + if (!checkStringCopyLibFuncSignature(Callee, LibFunc::strncpy, DL)) return nullptr; Value *Dst = CI->getArgOperand(0); @@ -976,11 +831,7 @@ Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) { if (!DL) return nullptr; - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || - !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isPointerTy() || - FT->getParamType(2) != DL->getIntPtrType(CI->getContext())) + if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memcpy, DL)) return nullptr; // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1) @@ -995,11 +846,7 @@ Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B) { if (!DL) return nullptr; - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || - !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isPointerTy() || - FT->getParamType(2) != DL->getIntPtrType(CI->getContext())) + if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memmove, DL)) return nullptr; // memmove(x, y, n) -> llvm.memmove(x, y, n, 1) @@ -1014,11 +861,7 @@ Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilder<> &B) { if (!DL) return nullptr; - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || - !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isIntegerTy() || - FT->getParamType(2) != DL->getIntPtrType(FT->getParamType(0))) + if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memset, DL)) return nullptr; // memset(p, v, n) -> llvm.memset(p, v, n, 1) @@ -1031,6 +874,28 @@ Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilder<> &B) { // Math Library Optimizations //===----------------------------------------------------------------------===// +/// Return a variant of Val with float type. +/// Currently this works in two cases: If Val is an FPExtension of a float +/// value to something bigger, simply return the operand. +/// If Val is a ConstantFP but can be converted to a float ConstantFP without +/// loss of precision do so. +static Value *valueHasFloatPrecision(Value *Val) { + if (FPExtInst *Cast = dyn_cast<FPExtInst>(Val)) { + Value *Op = Cast->getOperand(0); + if (Op->getType()->isFloatTy()) + return Op; + } + if (ConstantFP *Const = dyn_cast<ConstantFP>(Val)) { + APFloat F = Const->getValueAPF(); + bool losesInfo; + (void)F.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, + &losesInfo); + if (!losesInfo) + return ConstantFP::get(Const->getContext(), F); + } + return nullptr; +} + //===----------------------------------------------------------------------===// // Double -> Float Shrinking Optimizations for Unary Functions like 'floor' @@ -1052,12 +917,11 @@ Value *LibCallSimplifier::optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B, } // If this is something like 'floor((double)floatval)', convert to floorf. - FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0)); - if (!Cast || !Cast->getOperand(0)->getType()->isFloatTy()) + Value *V = valueHasFloatPrecision(CI->getArgOperand(0)); + if (V == nullptr) return nullptr; // floor((double)floatval) -> (double)floorf(floatval) - Value *V = Cast->getOperand(0); if (Callee->isIntrinsic()) { Module *M = CI->getParent()->getParent()->getParent(); Intrinsic::ID IID = (Intrinsic::ID) Callee->getIntrinsicID(); @@ -1083,21 +947,19 @@ Value *LibCallSimplifier::optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) { return nullptr; // If this is something like 'fmin((double)floatval1, (double)floatval2)', - // we convert it to fminf. - FPExtInst *Cast1 = dyn_cast<FPExtInst>(CI->getArgOperand(0)); - FPExtInst *Cast2 = dyn_cast<FPExtInst>(CI->getArgOperand(1)); - if (!Cast1 || !Cast1->getOperand(0)->getType()->isFloatTy() || !Cast2 || - !Cast2->getOperand(0)->getType()->isFloatTy()) + // or fmin(1.0, (double)floatval), then we convert it to fminf. + Value *V1 = valueHasFloatPrecision(CI->getArgOperand(0)); + if (V1 == nullptr) + return nullptr; + Value *V2 = valueHasFloatPrecision(CI->getArgOperand(1)); + if (V2 == nullptr) return nullptr; // fmin((double)floatval1, (double)floatval2) - // -> (double)fmin(floatval1, floatval2) - Value *V = nullptr; - Value *V1 = Cast1->getOperand(0); - Value *V2 = Cast2->getOperand(0); + // -> (double)fminf(floatval1, floatval2) // TODO: Handle intrinsics in the same way as in optimizeUnaryDoubleFP(). - V = EmitBinaryFloatFnCall(V1, V2, Callee->getName(), B, - Callee->getAttributes()); + Value *V = EmitBinaryFloatFnCall(V1, V2, Callee->getName(), B, + Callee->getAttributes()); return B.CreateFPExt(V, B.getDoubleTy()); } @@ -1995,53 +1857,18 @@ bool LibCallSimplifier::hasFloatVersion(StringRef FuncName) { return false; } -Value *LibCallSimplifier::optimizeCall(CallInst *CI) { - if (CI->isNoBuiltin()) - return nullptr; - +Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI, + IRBuilder<> &Builder) { LibFunc::Func Func; Function *Callee = CI->getCalledFunction(); StringRef FuncName = Callee->getName(); - IRBuilder<> Builder(CI); - bool isCallingConvC = CI->getCallingConv() == llvm::CallingConv::C; - - // Command-line parameter overrides function attribute. - if (EnableUnsafeFPShrink.getNumOccurrences() > 0) - UnsafeFPShrink = EnableUnsafeFPShrink; - else if (Callee->hasFnAttribute("unsafe-fp-math")) { - // FIXME: This is the same problem as described in optimizeSqrt(). - // If calls gain access to IR-level FMF, then use that instead of a - // function attribute. - // Check for unsafe-fp-math = true. - Attribute Attr = Callee->getFnAttribute("unsafe-fp-math"); - if (Attr.getValueAsString() == "true") - UnsafeFPShrink = true; - } - - // First, check for intrinsics. - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) { - if (!isCallingConvC) - return nullptr; - switch (II->getIntrinsicID()) { - case Intrinsic::pow: - return optimizePow(CI, Builder); - case Intrinsic::exp2: - return optimizeExp2(CI, Builder); - case Intrinsic::fabs: - return optimizeFabs(CI, Builder); - case Intrinsic::sqrt: - return optimizeSqrt(CI, Builder); - default: - return nullptr; - } - } - - // Then check for known library functions. + // Check for string/memory library functions. if (TLI->getLibFunc(FuncName, Func) && TLI->has(Func)) { - // We never change the calling convention. - if (!ignoreCallingConv(Func) && !isCallingConvC) - return nullptr; + // Make sure we never change the calling convention. + assert((ignoreCallingConv(Func) || + CI->getCallingConv() == llvm::CallingConv::C) && + "Optimizing string/memory libcall would change the calling convention"); switch (Func) { case LibFunc::strcat: return optimizeStrCat(CI, Builder); @@ -2087,6 +1914,77 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { return optimizeMemMove(CI, Builder); case LibFunc::memset: return optimizeMemSet(CI, Builder); + default: + break; + } + } + return nullptr; +} + +Value *LibCallSimplifier::optimizeCall(CallInst *CI) { + if (CI->isNoBuiltin()) + return nullptr; + + LibFunc::Func Func; + Function *Callee = CI->getCalledFunction(); + StringRef FuncName = Callee->getName(); + IRBuilder<> Builder(CI); + bool isCallingConvC = CI->getCallingConv() == llvm::CallingConv::C; + + // Command-line parameter overrides function attribute. + if (EnableUnsafeFPShrink.getNumOccurrences() > 0) + UnsafeFPShrink = EnableUnsafeFPShrink; + else if (Callee->hasFnAttribute("unsafe-fp-math")) { + // FIXME: This is the same problem as described in optimizeSqrt(). + // If calls gain access to IR-level FMF, then use that instead of a + // function attribute. + + // Check for unsafe-fp-math = true. + Attribute Attr = Callee->getFnAttribute("unsafe-fp-math"); + if (Attr.getValueAsString() == "true") + UnsafeFPShrink = true; + } + + // First, check for intrinsics. + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) { + if (!isCallingConvC) + return nullptr; + switch (II->getIntrinsicID()) { + case Intrinsic::pow: + return optimizePow(CI, Builder); + case Intrinsic::exp2: + return optimizeExp2(CI, Builder); + case Intrinsic::fabs: + return optimizeFabs(CI, Builder); + case Intrinsic::sqrt: + return optimizeSqrt(CI, Builder); + default: + return nullptr; + } + } + + // Also try to simplify calls to fortified library functions. + if (Value *SimplifiedFortifiedCI = FortifiedSimplifier.optimizeCall(CI)) { + // Try to further simplify the result. + CallInst *SimplifiedCI = dyn_cast<CallInst>(SimplifiedFortifiedCI); + if (SimplifiedCI && SimplifiedCI->getCalledFunction()) + if (Value *V = optimizeStringMemoryLibCall(SimplifiedCI, Builder)) { + // If we were able to further simplify, remove the now redundant call. + SimplifiedCI->replaceAllUsesWith(V); + SimplifiedCI->eraseFromParent(); + return V; + } + return SimplifiedFortifiedCI; + } + + // Then check for known library functions. + if (TLI->getLibFunc(FuncName, Func) && TLI->has(Func)) { + // We never change the calling convention. + if (!ignoreCallingConv(Func) && !isCallingConvC) + return nullptr; + if (Value *V = optimizeStringMemoryLibCall(CI, Builder)) + return V; + switch (Func) { case LibFunc::cosf: case LibFunc::cos: case LibFunc::cosl: @@ -2177,40 +2075,32 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { if (UnsafeFPShrink && hasFloatVersion(FuncName)) return optimizeUnaryDoubleFP(CI, Builder, true); return nullptr; + case LibFunc::copysign: case LibFunc::fmin: case LibFunc::fmax: if (hasFloatVersion(FuncName)) return optimizeBinaryDoubleFP(CI, Builder); return nullptr; - case LibFunc::memcpy_chk: - return optimizeMemCpyChk(CI, Builder); - case LibFunc::memmove_chk: - return optimizeMemMoveChk(CI, Builder); - case LibFunc::memset_chk: - return optimizeMemSetChk(CI, Builder); - case LibFunc::strcpy_chk: - return optimizeStrCpyChk(CI, Builder); - case LibFunc::stpcpy_chk: - return optimizeStpCpyChk(CI, Builder); - case LibFunc::stpncpy_chk: - case LibFunc::strncpy_chk: - return optimizeStrNCpyChk(CI, Builder); default: return nullptr; } } - return nullptr; } -LibCallSimplifier::LibCallSimplifier(const DataLayout *DL, - const TargetLibraryInfo *TLI) : - DL(DL), - TLI(TLI), - UnsafeFPShrink(false) { +LibCallSimplifier::LibCallSimplifier( + const DataLayout *DL, const TargetLibraryInfo *TLI, + function_ref<void(Instruction *, Value *)> Replacer) + : FortifiedSimplifier(DL, TLI), DL(DL), TLI(TLI), UnsafeFPShrink(false), + Replacer(Replacer) {} + +void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) { + // Indirect through the replacer used in this instance. + Replacer(I, With); } -void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) const { +/*static*/ void LibCallSimplifier::replaceAllUsesWithDefault(Instruction *I, + Value *With) { I->replaceAllUsesWith(With); I->eraseFromParent(); } @@ -2262,3 +2152,184 @@ void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) const { // * trunc(cnst) -> cnst' // // + +//===----------------------------------------------------------------------===// +// Fortified Library Call Optimizations +//===----------------------------------------------------------------------===// + +bool FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI, + unsigned ObjSizeOp, + unsigned SizeOp, + bool isString) { + if (CI->getArgOperand(ObjSizeOp) == CI->getArgOperand(SizeOp)) + return true; + if (ConstantInt *ObjSizeCI = + dyn_cast<ConstantInt>(CI->getArgOperand(ObjSizeOp))) { + if (ObjSizeCI->isAllOnesValue()) + return true; + // If the object size wasn't -1 (unknown), bail out if we were asked to. + if (OnlyLowerUnknownSize) + return false; + if (isString) { + uint64_t Len = GetStringLength(CI->getArgOperand(SizeOp)); + // If the length is 0 we don't know how long it is and so we can't + // remove the check. + if (Len == 0) + return false; + return ObjSizeCI->getZExtValue() >= Len; + } + if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getArgOperand(SizeOp))) + return ObjSizeCI->getZExtValue() >= SizeCI->getZExtValue(); + } + return false; +} + +Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI, IRBuilder<> &B) { + Function *Callee = CI->getCalledFunction(); + + if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memcpy_chk, DL)) + return nullptr; + + if (isFortifiedCallFoldable(CI, 3, 2, false)) { + B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1); + return CI->getArgOperand(0); + } + return nullptr; +} + +Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI, IRBuilder<> &B) { + Function *Callee = CI->getCalledFunction(); + + if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memmove_chk, DL)) + return nullptr; + + if (isFortifiedCallFoldable(CI, 3, 2, false)) { + B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1); + return CI->getArgOperand(0); + } + return nullptr; +} + +Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI, IRBuilder<> &B) { + Function *Callee = CI->getCalledFunction(); + + if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memset_chk, DL)) + return nullptr; + + if (isFortifiedCallFoldable(CI, 3, 2, false)) { + Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false); + B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1); + return CI->getArgOperand(0); + } + return nullptr; +} + +Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI, + IRBuilder<> &B, + LibFunc::Func Func) { + Function *Callee = CI->getCalledFunction(); + StringRef Name = Callee->getName(); + + if (!checkStringCopyLibFuncSignature(Callee, Func, DL)) + return nullptr; + + Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1), + *ObjSize = CI->getArgOperand(2); + + // __stpcpy_chk(x,x,...) -> x+strlen(x) + if (Func == LibFunc::stpcpy_chk && !OnlyLowerUnknownSize && Dst == Src) { + Value *StrLen = EmitStrLen(Src, B, DL, TLI); + return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : nullptr; + } + + // If a) we don't have any length information, or b) we know this will + // fit then just lower to a plain st[rp]cpy. Otherwise we'll keep our + // st[rp]cpy_chk call which may fail at runtime if the size is too long. + // TODO: It might be nice to get a maximum length out of the possible + // string lengths for varying. + if (isFortifiedCallFoldable(CI, 2, 1, true)) { + Value *Ret = EmitStrCpy(Dst, Src, B, DL, TLI, Name.substr(2, 6)); + return Ret; + } else if (!OnlyLowerUnknownSize) { + // Maybe we can stil fold __st[rp]cpy_chk to __memcpy_chk. + uint64_t Len = GetStringLength(Src); + if (Len == 0) + return nullptr; + + // This optimization requires DataLayout. + if (!DL) + return nullptr; + + Type *SizeTTy = DL->getIntPtrType(CI->getContext()); + Value *LenV = ConstantInt::get(SizeTTy, Len); + Value *Ret = EmitMemCpyChk(Dst, Src, LenV, ObjSize, B, DL, TLI); + // If the function was an __stpcpy_chk, and we were able to fold it into + // a __memcpy_chk, we still need to return the correct end pointer. + if (Ret && Func == LibFunc::stpcpy_chk) + return B.CreateGEP(Dst, ConstantInt::get(SizeTTy, Len - 1)); + return Ret; + } + return nullptr; +} + +Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI, + IRBuilder<> &B, + LibFunc::Func Func) { + Function *Callee = CI->getCalledFunction(); + StringRef Name = Callee->getName(); + + if (!checkStringCopyLibFuncSignature(Callee, Func, DL)) + return nullptr; + if (isFortifiedCallFoldable(CI, 3, 2, false)) { + Value *Ret = + EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), B, DL, TLI, Name.substr(2, 7)); + return Ret; + } + return nullptr; +} + +Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) { + if (CI->isNoBuiltin()) + return nullptr; + + LibFunc::Func Func; + Function *Callee = CI->getCalledFunction(); + StringRef FuncName = Callee->getName(); + IRBuilder<> Builder(CI); + bool isCallingConvC = CI->getCallingConv() == llvm::CallingConv::C; + + // First, check that this is a known library functions. + if (!TLI->getLibFunc(FuncName, Func) || !TLI->has(Func)) + return nullptr; + + // We never change the calling convention. + if (!ignoreCallingConv(Func) && !isCallingConvC) + return nullptr; + + switch (Func) { + case LibFunc::memcpy_chk: + return optimizeMemCpyChk(CI, Builder); + case LibFunc::memmove_chk: + return optimizeMemMoveChk(CI, Builder); + case LibFunc::memset_chk: + return optimizeMemSetChk(CI, Builder); + case LibFunc::stpcpy_chk: + case LibFunc::strcpy_chk: + return optimizeStrpCpyChk(CI, Builder, Func); + case LibFunc::stpncpy_chk: + case LibFunc::strncpy_chk: + return optimizeStrpNCpyChk(CI, Builder, Func); + default: + break; + } + return nullptr; +} + +FortifiedLibCallSimplifier:: +FortifiedLibCallSimplifier(const DataLayout *DL, const TargetLibraryInfo *TLI, + bool OnlyLowerUnknownSize) + : DL(DL), TLI(TLI), OnlyLowerUnknownSize(OnlyLowerUnknownSize) { +} diff --git a/lib/Transforms/Utils/SymbolRewriter.cpp b/lib/Transforms/Utils/SymbolRewriter.cpp index aacc945..b343cc4 100644 --- a/lib/Transforms/Utils/SymbolRewriter.cpp +++ b/lib/Transforms/Utils/SymbolRewriter.cpp @@ -60,7 +60,7 @@ #define DEBUG_TYPE "symbol-rewriter" #include "llvm/CodeGen/Passes.h" #include "llvm/Pass.h" -#include "llvm/PassManager.h" +#include "llvm/IR/LegacyPassManager.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MemoryBuffer.h" @@ -79,6 +79,19 @@ static cl::list<std::string> RewriteMapFiles("rewrite-map-file", namespace llvm { namespace SymbolRewriter { +void rewriteComdat(Module &M, GlobalObject *GO, const std::string &Source, + const std::string &Target) { + if (Comdat *CD = GO->getComdat()) { + auto &Comdats = M.getComdatSymbolTable(); + + Comdat *C = M.getOrInsertComdat(Target); + C->setSelectionKind(CD->getSelectionKind()); + GO->setComdat(C); + + Comdats.erase(Comdats.find(Source)); + } +} + template <RewriteDescriptor::Type DT, typename ValueType, ValueType *(llvm::Module::*Get)(StringRef) const> class ExplicitRewriteDescriptor : public RewriteDescriptor { @@ -102,10 +115,14 @@ template <RewriteDescriptor::Type DT, typename ValueType, bool ExplicitRewriteDescriptor<DT, ValueType, Get>::performOnModule(Module &M) { bool Changed = false; if (ValueType *S = (M.*Get)(Source)) { + if (GlobalObject *GO = dyn_cast<GlobalObject>(S)) + rewriteComdat(M, GO, Source, Target); + if (Value *T = (M.*Get)(Target)) S->setValueName(T->getValueName()); else S->setName(Target); + Changed = true; } return Changed; @@ -113,7 +130,8 @@ bool ExplicitRewriteDescriptor<DT, ValueType, Get>::performOnModule(Module &M) { template <RewriteDescriptor::Type DT, typename ValueType, ValueType *(llvm::Module::*Get)(StringRef) const, - iterator_range<typename iplist<ValueType>::iterator> (llvm::Module::*Iterator)()> + iterator_range<typename iplist<ValueType>::iterator> + (llvm::Module::*Iterator)()> class PatternRewriteDescriptor : public RewriteDescriptor { public: const std::string Pattern; @@ -131,7 +149,8 @@ public: template <RewriteDescriptor::Type DT, typename ValueType, ValueType *(llvm::Module::*Get)(StringRef) const, - iterator_range<typename iplist<ValueType>::iterator> (llvm::Module::*Iterator)()> + iterator_range<typename iplist<ValueType>::iterator> + (llvm::Module::*Iterator)()> bool PatternRewriteDescriptor<DT, ValueType, Get, Iterator>:: performOnModule(Module &M) { bool Changed = false; @@ -143,6 +162,12 @@ performOnModule(Module &M) { report_fatal_error("unable to transforn " + C.getName() + " in " + M.getModuleIdentifier() + ": " + Error); + if (C.getName() == Name) + continue; + + if (GlobalObject *GO = dyn_cast<GlobalObject>(&C)) + rewriteComdat(M, GO, C.getName(), Name); + if (Value *V = (M.*Get)(Name)) C.setValueName(V->getValueName()); else @@ -492,7 +517,7 @@ RewriteSymbols::RewriteSymbols() : ModulePass(ID) { RewriteSymbols::RewriteSymbols(SymbolRewriter::RewriteDescriptorList &DL) : ModulePass(ID) { - std::swap(Descriptors, DL); + Descriptors.splice(Descriptors.begin(), DL); } bool RewriteSymbols::runOnModule(Module &M) { diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp index 0c2fc0a..7e00a80 100644 --- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp +++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp @@ -35,7 +35,6 @@ void UnifyFunctionExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{ // We preserve the non-critical-edgeness property AU.addPreservedID(BreakCriticalEdgesID); // This is a cluster of orthogonal Transforms - AU.addPreserved("mem2reg"); AU.addPreservedID(LowerSwitchID); } diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index a2f69d1..49c0902 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -40,7 +40,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, // Global values do not need to be seeded into the VM if they // are using the identity mapping. - if (isa<GlobalValue>(V) || isa<MDString>(V)) + if (isa<GlobalValue>(V)) return VM[V] = const_cast<Value*>(V); if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) { @@ -56,57 +56,24 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, return VM[V] = const_cast<Value*>(V); } - - if (const MDNode *MD = dyn_cast<MDNode>(V)) { + if (const auto *MDV = dyn_cast<MetadataAsValue>(V)) { + const Metadata *MD = MDV->getMetadata(); // If this is a module-level metadata and we know that nothing at the module // level is changing, then use an identity mapping. - if (!MD->isFunctionLocal() && (Flags & RF_NoModuleLevelChanges)) - return VM[V] = const_cast<Value*>(V); - - // Create a dummy node in case we have a metadata cycle. - MDNode *Dummy = MDNode::getTemporary(V->getContext(), None); - VM[V] = Dummy; - - // Check all operands to see if any need to be remapped. - for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) { - Value *OP = MD->getOperand(i); - if (!OP) continue; - Value *Mapped_OP = MapValue(OP, VM, Flags, TypeMapper, Materializer); - // Use identity map if Mapped_Op is null and we can ignore missing - // entries. - if (Mapped_OP == OP || - (Mapped_OP == nullptr && (Flags & RF_IgnoreMissingEntries))) - continue; - - // Ok, at least one operand needs remapping. - SmallVector<Value*, 4> Elts; - Elts.reserve(MD->getNumOperands()); - for (i = 0; i != e; ++i) { - Value *Op = MD->getOperand(i); - if (!Op) - Elts.push_back(nullptr); - else { - Value *Mapped_Op = MapValue(Op, VM, Flags, TypeMapper, Materializer); - // Use identity map if Mapped_Op is null and we can ignore missing - // entries. - if (Mapped_Op == nullptr && (Flags & RF_IgnoreMissingEntries)) - Mapped_Op = Op; - Elts.push_back(Mapped_Op); - } - } - MDNode *NewMD = MDNode::get(V->getContext(), Elts); - Dummy->replaceAllUsesWith(NewMD); - VM[V] = NewMD; - MDNode::deleteTemporary(Dummy); - return NewMD; - } + if (!isa<LocalAsMetadata>(MD) && (Flags & RF_NoModuleLevelChanges)) + return VM[V] = const_cast<Value *>(V); - VM[V] = const_cast<Value*>(V); - MDNode::deleteTemporary(Dummy); + auto *MappedMD = MapMetadata(MD, VM, Flags, TypeMapper, Materializer); + if (MD == MappedMD || (!MappedMD && (Flags & RF_IgnoreMissingEntries))) + return VM[V] = const_cast<Value *>(V); - // No operands needed remapping. Use an identity mapping. - return const_cast<Value*>(V); + // FIXME: This assert crashes during bootstrap, but I think it should be + // correct. For now, just match behaviour from before the metadata/value + // split. + // + // assert(MappedMD && "Referenced metadata value not in value map"); + return VM[V] = MetadataAsValue::get(V->getContext(), MappedMD); } // Okay, this either must be a constant (which may or may not be mappable) or @@ -177,6 +144,198 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, return VM[V] = ConstantPointerNull::get(cast<PointerType>(NewTy)); } +static Metadata *mapToMetadata(ValueToValueMapTy &VM, const Metadata *Key, + Metadata *Val) { + VM.MD()[Key].reset(Val); + return Val; +} + +static Metadata *mapToSelf(ValueToValueMapTy &VM, const Metadata *MD) { + return mapToMetadata(VM, MD, const_cast<Metadata *>(MD)); +} + +static Metadata *MapMetadataImpl(const Metadata *MD, + SmallVectorImpl<MDNode *> &Cycles, + ValueToValueMapTy &VM, RemapFlags Flags, + ValueMapTypeRemapper *TypeMapper, + ValueMaterializer *Materializer); + +static Metadata *mapMetadataOp(Metadata *Op, SmallVectorImpl<MDNode *> &Cycles, + ValueToValueMapTy &VM, RemapFlags Flags, + ValueMapTypeRemapper *TypeMapper, + ValueMaterializer *Materializer) { + if (!Op) + return nullptr; + if (Metadata *MappedOp = + MapMetadataImpl(Op, Cycles, VM, Flags, TypeMapper, Materializer)) + return MappedOp; + // Use identity map if MappedOp is null and we can ignore missing entries. + if (Flags & RF_IgnoreMissingEntries) + return Op; + + // FIXME: This assert crashes during bootstrap, but I think it should be + // correct. For now, just match behaviour from before the metadata/value + // split. + // + // llvm_unreachable("Referenced metadata not in value map!"); + return nullptr; +} + +/// \brief Remap nodes. +/// +/// Insert \c NewNode in the value map, and then remap \c OldNode's operands. +/// Assumes that \c NewNode is already a clone of \c OldNode. +/// +/// \pre \c NewNode is a clone of \c OldNode. +static bool remap(const MDNode *OldNode, MDNode *NewNode, + SmallVectorImpl<MDNode *> &Cycles, ValueToValueMapTy &VM, + RemapFlags Flags, ValueMapTypeRemapper *TypeMapper, + ValueMaterializer *Materializer) { + assert(OldNode->getNumOperands() == NewNode->getNumOperands() && + "Expected nodes to match"); + assert(OldNode->isResolved() && "Expected resolved node"); + assert(!NewNode->isUniqued() && "Expected non-uniqued node"); + + // Map the node upfront so it's available for cyclic references. + mapToMetadata(VM, OldNode, NewNode); + bool AnyChanged = false; + for (unsigned I = 0, E = OldNode->getNumOperands(); I != E; ++I) { + Metadata *Old = OldNode->getOperand(I); + assert(NewNode->getOperand(I) == Old && + "Expected old operands to already be in place"); + + Metadata *New = mapMetadataOp(OldNode->getOperand(I), Cycles, VM, Flags, + TypeMapper, Materializer); + if (Old != New) { + AnyChanged = true; + NewNode->replaceOperandWith(I, New); + } + } + + return AnyChanged; +} + +/// \brief Map a distinct MDNode. +/// +/// Distinct nodes are not uniqued, so they must always recreated. +static Metadata *mapDistinctNode(const MDNode *Node, + SmallVectorImpl<MDNode *> &Cycles, + ValueToValueMapTy &VM, RemapFlags Flags, + ValueMapTypeRemapper *TypeMapper, + ValueMaterializer *Materializer) { + assert(Node->isDistinct() && "Expected distinct node"); + + MDNode *NewMD = MDNode::replaceWithDistinct(Node->clone()); + remap(Node, NewMD, Cycles, VM, Flags, TypeMapper, Materializer); + + // Track any cycles beneath this node. + for (Metadata *Op : NewMD->operands()) + if (auto *Node = dyn_cast_or_null<MDNode>(Op)) + if (!Node->isResolved()) + Cycles.push_back(Node); + + return NewMD; +} + +/// \brief Map a uniqued MDNode. +/// +/// Uniqued nodes may not need to be recreated (they may map to themselves). +static Metadata *mapUniquedNode(const MDNode *Node, + SmallVectorImpl<MDNode *> &Cycles, + ValueToValueMapTy &VM, RemapFlags Flags, + ValueMapTypeRemapper *TypeMapper, + ValueMaterializer *Materializer) { + assert(Node->isUniqued() && "Expected uniqued node"); + + // Create a temporary node upfront in case we have a metadata cycle. + auto ClonedMD = Node->clone(); + if (!remap(Node, ClonedMD.get(), Cycles, VM, Flags, TypeMapper, Materializer)) + // No operands changed, so use the identity mapping. + return mapToSelf(VM, Node); + + // At least one operand has changed, so uniquify the cloned node. + return mapToMetadata(VM, Node, + MDNode::replaceWithUniqued(std::move(ClonedMD))); +} + +static Metadata *MapMetadataImpl(const Metadata *MD, + SmallVectorImpl<MDNode *> &Cycles, + ValueToValueMapTy &VM, RemapFlags Flags, + ValueMapTypeRemapper *TypeMapper, + ValueMaterializer *Materializer) { + // If the value already exists in the map, use it. + if (Metadata *NewMD = VM.MD().lookup(MD).get()) + return NewMD; + + if (isa<MDString>(MD)) + return mapToSelf(VM, MD); + + if (isa<ConstantAsMetadata>(MD)) + if ((Flags & RF_NoModuleLevelChanges)) + return mapToSelf(VM, MD); + + if (const auto *VMD = dyn_cast<ValueAsMetadata>(MD)) { + Value *MappedV = + MapValue(VMD->getValue(), VM, Flags, TypeMapper, Materializer); + if (VMD->getValue() == MappedV || + (!MappedV && (Flags & RF_IgnoreMissingEntries))) + return mapToSelf(VM, MD); + + // FIXME: This assert crashes during bootstrap, but I think it should be + // correct. For now, just match behaviour from before the metadata/value + // split. + // + // assert(MappedV && "Referenced metadata not in value map!"); + if (MappedV) + return mapToMetadata(VM, MD, ValueAsMetadata::get(MappedV)); + return nullptr; + } + + const MDNode *Node = cast<MDNode>(MD); + assert(Node->isResolved() && "Unexpected unresolved node"); + + // If this is a module-level metadata and we know that nothing at the + // module level is changing, then use an identity mapping. + if (Flags & RF_NoModuleLevelChanges) + return mapToSelf(VM, MD); + + if (Node->isDistinct()) + return mapDistinctNode(Node, Cycles, VM, Flags, TypeMapper, Materializer); + + return mapUniquedNode(Node, Cycles, VM, Flags, TypeMapper, Materializer); +} + +Metadata *llvm::MapMetadata(const Metadata *MD, ValueToValueMapTy &VM, + RemapFlags Flags, ValueMapTypeRemapper *TypeMapper, + ValueMaterializer *Materializer) { + SmallVector<MDNode *, 8> Cycles; + Metadata *NewMD = + MapMetadataImpl(MD, Cycles, VM, Flags, TypeMapper, Materializer); + + // Resolve cycles underneath MD. + if (NewMD && NewMD != MD) { + if (auto *N = dyn_cast<MDNode>(NewMD)) + if (!N->isResolved()) + N->resolveCycles(); + + for (MDNode *N : Cycles) + if (!N->isResolved()) + N->resolveCycles(); + } else { + // Shouldn't get unresolved cycles if nothing was remapped. + assert(Cycles.empty() && "Expected no unresolved cycles"); + } + + return NewMD; +} + +MDNode *llvm::MapMetadata(const MDNode *MD, ValueToValueMapTy &VM, + RemapFlags Flags, ValueMapTypeRemapper *TypeMapper, + ValueMaterializer *Materializer) { + return cast<MDNode>(MapMetadata(static_cast<const Metadata *>(MD), VM, Flags, + TypeMapper, Materializer)); +} + /// RemapInstruction - Convert the instruction operands from referencing the /// current values into those specified by VMap. /// @@ -215,7 +374,7 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap, ME = MDs.end(); MI != ME; ++MI) { MDNode *Old = MI->second; - MDNode *New = MapValue(Old, VMap, Flags, TypeMapper, Materializer); + MDNode *New = MapMetadata(Old, VMap, Flags, TypeMapper, Materializer); if (New != Old) I->setMetadata(MI->first, New); } |