aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Transforms/Utils
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Utils')
-rw-r--r--lib/Transforms/Utils/Android.mk1
-rw-r--r--lib/Transforms/Utils/BasicBlockUtils.cpp166
-rw-r--r--lib/Transforms/Utils/BreakCriticalEdges.cpp34
-rw-r--r--lib/Transforms/Utils/CMakeLists.txt2
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp5
-rw-r--r--lib/Transforms/Utils/CodeExtractor.cpp4
-rw-r--r--lib/Transforms/Utils/DemoteRegToStack.cpp1
-rw-r--r--lib/Transforms/Utils/FlattenCFG.cpp488
-rw-r--r--lib/Transforms/Utils/InlineFunction.cpp67
-rw-r--r--lib/Transforms/Utils/Local.cpp313
-rw-r--r--lib/Transforms/Utils/LoopSimplify.cpp23
-rw-r--r--lib/Transforms/Utils/LowerInvoke.cpp20
-rw-r--r--lib/Transforms/Utils/Mem2Reg.cpp7
-rw-r--r--lib/Transforms/Utils/MetaRenamer.cpp2
-rw-r--r--lib/Transforms/Utils/ModuleUtils.cpp18
-rw-r--r--lib/Transforms/Utils/PromoteMemoryToRegister.cpp1276
-rw-r--r--lib/Transforms/Utils/SSAUpdater.cpp44
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp276
-rw-r--r--lib/Transforms/Utils/SimplifyIndVar.cpp2
-rw-r--r--lib/Transforms/Utils/SimplifyLibCalls.cpp73
-rw-r--r--lib/Transforms/Utils/SpecialCaseList.cpp225
-rw-r--r--lib/Transforms/Utils/Utils.cpp1
-rw-r--r--lib/Transforms/Utils/ValueMapper.cpp31
23 files changed, 2045 insertions, 1034 deletions
diff --git a/lib/Transforms/Utils/Android.mk b/lib/Transforms/Utils/Android.mk
index df87208..9bf9ef3 100644
--- a/lib/Transforms/Utils/Android.mk
+++ b/lib/Transforms/Utils/Android.mk
@@ -29,6 +29,7 @@ transforms_utils_SRC_FILES := \
SimplifyIndVar.cpp \
SimplifyInstructions.cpp \
SimplifyLibCalls.cpp \
+ SpecialCaseList.cpp \
UnifyFunctionExitNodes.cpp \
Utils.cpp \
ValueMapper.cpp
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index ba99d2e..e17a416 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -14,6 +14,7 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
@@ -170,7 +171,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
if (DomTreeNode *DTN = DT->getNode(BB)) {
DomTreeNode *PredDTN = DT->getNode(PredBB);
SmallVector<DomTreeNode*, 8> Children(DTN->begin(), DTN->end());
- for (SmallVector<DomTreeNode*, 8>::iterator DI = Children.begin(),
+ for (SmallVectorImpl<DomTreeNode *>::iterator DI = Children.begin(),
DE = Children.end(); DI != DE; ++DI)
DT->changeImmediateDominator(*DI, PredDTN);
@@ -235,22 +236,6 @@ void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) {
ReplaceInstWithInst(From->getParent()->getInstList(), BI, To);
}
-/// GetSuccessorNumber - Search for the specified successor of basic block BB
-/// and return its position in the terminator instruction's list of
-/// successors. It is an error to call this with a block that is not a
-/// successor.
-unsigned llvm::GetSuccessorNumber(BasicBlock *BB, BasicBlock *Succ) {
- TerminatorInst *Term = BB->getTerminator();
-#ifndef NDEBUG
- unsigned e = Term->getNumSuccessors();
-#endif
- for (unsigned i = 0; ; ++i) {
- assert(i != e && "Didn't find edge?");
- if (Term->getSuccessor(i) == Succ)
- return i;
- }
-}
-
/// SplitEdge - Split the edge connecting specified block. Pass P must
/// not be NULL.
BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
@@ -598,52 +583,6 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
}
}
-/// FindFunctionBackedges - Analyze the specified function to find all of the
-/// loop backedges in the function and return them. This is a relatively cheap
-/// (compared to computing dominators and loop info) analysis.
-///
-/// The output is added to Result, as pairs of <from,to> edge info.
-void llvm::FindFunctionBackedges(const Function &F,
- SmallVectorImpl<std::pair<const BasicBlock*,const BasicBlock*> > &Result) {
- const BasicBlock *BB = &F.getEntryBlock();
- if (succ_begin(BB) == succ_end(BB))
- return;
-
- SmallPtrSet<const BasicBlock*, 8> Visited;
- SmallVector<std::pair<const BasicBlock*, succ_const_iterator>, 8> VisitStack;
- SmallPtrSet<const BasicBlock*, 8> InStack;
-
- Visited.insert(BB);
- VisitStack.push_back(std::make_pair(BB, succ_begin(BB)));
- InStack.insert(BB);
- do {
- std::pair<const BasicBlock*, succ_const_iterator> &Top = VisitStack.back();
- const BasicBlock *ParentBB = Top.first;
- succ_const_iterator &I = Top.second;
-
- bool FoundNew = false;
- while (I != succ_end(ParentBB)) {
- BB = *I++;
- if (Visited.insert(BB)) {
- FoundNew = true;
- break;
- }
- // Successor is in VisitStack, it's a back edge.
- if (InStack.count(BB))
- Result.push_back(std::make_pair(ParentBB, BB));
- }
-
- if (FoundNew) {
- // Go down one level if there is a unvisited successor.
- InStack.insert(BB);
- VisitStack.push_back(std::make_pair(BB, succ_begin(BB)));
- } else {
- // Go up one level.
- InStack.erase(VisitStack.pop_back_val().first);
- }
- } while (!VisitStack.empty());
-}
-
/// FoldReturnIntoUncondBranch - This method duplicates the specified return
/// instruction into a predecessor which ends in an unconditional branch. If
/// the return instruction returns a value defined by a PHI, propagate the
@@ -726,3 +665,104 @@ TerminatorInst *llvm::SplitBlockAndInsertIfThen(Instruction *Cmp,
ReplaceInstWithInst(HeadOldTerm, HeadNewTerm);
return CheckTerm;
}
+
+/// GetIfCondition - Given a basic block (BB) with two predecessors,
+/// check to see if the merge at this block is due
+/// to an "if condition". If so, return the boolean condition that determines
+/// which entry into BB will be taken. Also, return by references the block
+/// that will be entered from if the condition is true, and the block that will
+/// be entered if the condition is false.
+///
+/// This does no checking to see if the true/false blocks have large or unsavory
+/// instructions in them.
+Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
+ BasicBlock *&IfFalse) {
+ PHINode *SomePHI = dyn_cast<PHINode>(BB->begin());
+ BasicBlock *Pred1 = NULL;
+ BasicBlock *Pred2 = NULL;
+
+ if (SomePHI) {
+ if (SomePHI->getNumIncomingValues() != 2)
+ return NULL;
+ Pred1 = SomePHI->getIncomingBlock(0);
+ Pred2 = SomePHI->getIncomingBlock(1);
+ } else {
+ pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+ if (PI == PE) // No predecessor
+ return NULL;
+ Pred1 = *PI++;
+ if (PI == PE) // Only one predecessor
+ return NULL;
+ Pred2 = *PI++;
+ if (PI != PE) // More than two predecessors
+ return NULL;
+ }
+
+ // We can only handle branches. Other control flow will be lowered to
+ // branches if possible anyway.
+ BranchInst *Pred1Br = dyn_cast<BranchInst>(Pred1->getTerminator());
+ BranchInst *Pred2Br = dyn_cast<BranchInst>(Pred2->getTerminator());
+ if (Pred1Br == 0 || Pred2Br == 0)
+ return 0;
+
+ // Eliminate code duplication by ensuring that Pred1Br is conditional if
+ // either are.
+ if (Pred2Br->isConditional()) {
+ // If both branches are conditional, we don't have an "if statement". In
+ // reality, we could transform this case, but since the condition will be
+ // required anyway, we stand no chance of eliminating it, so the xform is
+ // probably not profitable.
+ if (Pred1Br->isConditional())
+ return 0;
+
+ std::swap(Pred1, Pred2);
+ std::swap(Pred1Br, Pred2Br);
+ }
+
+ if (Pred1Br->isConditional()) {
+ // The only thing we have to watch out for here is to make sure that Pred2
+ // doesn't have incoming edges from other blocks. If it does, the condition
+ // doesn't dominate BB.
+ if (Pred2->getSinglePredecessor() == 0)
+ return 0;
+
+ // If we found a conditional branch predecessor, make sure that it branches
+ // to BB and Pred2Br. If it doesn't, this isn't an "if statement".
+ if (Pred1Br->getSuccessor(0) == BB &&
+ Pred1Br->getSuccessor(1) == Pred2) {
+ IfTrue = Pred1;
+ IfFalse = Pred2;
+ } else if (Pred1Br->getSuccessor(0) == Pred2 &&
+ Pred1Br->getSuccessor(1) == BB) {
+ IfTrue = Pred2;
+ IfFalse = Pred1;
+ } else {
+ // We know that one arm of the conditional goes to BB, so the other must
+ // go somewhere unrelated, and this must not be an "if statement".
+ return 0;
+ }
+
+ return Pred1Br->getCondition();
+ }
+
+ // Ok, if we got here, both predecessors end with an unconditional branch to
+ // BB. Don't panic! If both blocks only have a single (identical)
+ // predecessor, and THAT is a conditional branch, then we're all ok!
+ BasicBlock *CommonPred = Pred1->getSinglePredecessor();
+ if (CommonPred == 0 || CommonPred != Pred2->getSinglePredecessor())
+ return 0;
+
+ // Otherwise, if this is a conditional branch, then we can use it!
+ BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator());
+ if (BI == 0) return 0;
+
+ assert(BI->isConditional() && "Two successors but not conditional?");
+ if (BI->getSuccessor(0) == Pred1) {
+ IfTrue = Pred1;
+ IfFalse = Pred2;
+ } else {
+ IfTrue = Pred2;
+ IfFalse = Pred1;
+ }
+ return BI->getCondition();
+}
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 8513772..8f3ff96 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -19,6 +19,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ProfileInfo.h"
@@ -84,39 +85,6 @@ bool BreakCriticalEdges::runOnFunction(Function &F) {
// Implementation of the external critical edge manipulation functions
//===----------------------------------------------------------------------===//
-// isCriticalEdge - Return true if the specified edge is a critical edge.
-// Critical edges are edges from a block with multiple successors to a block
-// with multiple predecessors.
-//
-bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum,
- bool AllowIdenticalEdges) {
- assert(SuccNum < TI->getNumSuccessors() && "Illegal edge specification!");
- if (TI->getNumSuccessors() == 1) return false;
-
- const BasicBlock *Dest = TI->getSuccessor(SuccNum);
- const_pred_iterator I = pred_begin(Dest), E = pred_end(Dest);
-
- // If there is more than one predecessor, this is a critical edge...
- assert(I != E && "No preds, but we have an edge to the block?");
- const BasicBlock *FirstPred = *I;
- ++I; // Skip one edge due to the incoming arc from TI.
- if (!AllowIdenticalEdges)
- return I != E;
-
- // If AllowIdenticalEdges is true, then we allow this edge to be considered
- // non-critical iff all preds come from TI's block.
- while (I != E) {
- const BasicBlock *P = *I;
- if (P != FirstPred)
- return true;
- // Note: leave this as is until no one ever compiles with either gcc 4.0.1
- // or Xcode 2. This seems to work around the pred_iterator assert in PR 2207
- E = pred_end(P);
- ++I;
- }
- return false;
-}
-
/// createPHIsForSplitLoopExit - When a loop exit edge is split, LCSSA form
/// may require new PHIs in the new exit block. This function inserts the
/// new PHIs, as needed. Preds is a list of preds inside the loop, SplitBB
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index b71628b..3648fd6 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -25,9 +25,11 @@ add_llvm_library(LLVMTransformUtils
PromoteMemoryToRegister.cpp
SSAUpdater.cpp
SimplifyCFG.cpp
+ FlattenCFG.cpp
SimplifyIndVar.cpp
SimplifyInstructions.cpp
SimplifyLibCalls.cpp
+ SpecialCaseList.cpp
UnifyFunctionExitNodes.cpp
Utils.cpp
ValueMapper.cpp
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index be8d39e..d105f5e 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -78,7 +78,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
bool ModuleLevelChanges,
SmallVectorImpl<ReturnInst*> &Returns,
const char *NameSuffix, ClonedCodeInfo *CodeInfo,
- ValueMapTypeRemapper *TypeMapper) {
+ ValueMapTypeRemapper *TypeMapper,
+ ValueMaterializer *Materializer) {
assert(NameSuffix && "NameSuffix cannot be null!");
#ifndef NDEBUG
@@ -147,7 +148,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II)
RemapInstruction(II, VMap,
ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
- TypeMapper);
+ TypeMapper, Materializer);
}
/// CloneFunction - Return a copy of the specified function, but without
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index f7c659f..82013f9 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -277,8 +277,8 @@ void CodeExtractor::splitReturnBlocks() {
DomTreeNode *NewNode = DT->addNewBlock(New, *I);
- for (SmallVector<DomTreeNode*, 8>::iterator I = Children.begin(),
- E = Children.end(); I != E; ++I)
+ for (SmallVectorImpl<DomTreeNode *>::iterator I = Children.begin(),
+ E = Children.end(); I != E; ++I)
DT->changeImmediateDominator(*I, NewNode);
}
}
diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp
index db525cd..0723b35 100644
--- a/lib/Transforms/Utils/DemoteRegToStack.cpp
+++ b/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -10,6 +10,7 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/Analysis/CFG.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Type.h"
diff --git a/lib/Transforms/Utils/FlattenCFG.cpp b/lib/Transforms/Utils/FlattenCFG.cpp
new file mode 100644
index 0000000..9cbe15d
--- /dev/null
+++ b/lib/Transforms/Utils/FlattenCFG.cpp
@@ -0,0 +1,488 @@
+//===- FlatternCFG.cpp - Code to perform CFG flattening ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Reduce conditional branches in CFG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "flattencfg"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+using namespace llvm;
+
+namespace {
+class FlattenCFGOpt {
+ AliasAnalysis *AA;
+ /// \brief Use parallel-and or parallel-or to generate conditions for
+ /// conditional branches.
+ bool FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder, Pass *P = 0);
+ /// \brief If \param BB is the merge block of an if-region, attempt to merge
+ /// the if-region with an adjacent if-region upstream if two if-regions
+ /// contain identical instructions.
+ bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder, Pass *P = 0);
+ /// \brief Compare a pair of blocks: \p Block1 and \p Block2, which
+ /// are from two if-regions whose entry blocks are \p Head1 and \p
+ /// Head2. \returns true if \p Block1 and \p Block2 contain identical
+ /// instructions, and have no memory reference alias with \p Head2.
+ /// This is used as a legality check for merging if-regions.
+ bool CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
+ BasicBlock *Block1, BasicBlock *Block2);
+
+public:
+ FlattenCFGOpt(AliasAnalysis *AA) : AA(AA) {}
+ bool run(BasicBlock *BB);
+};
+}
+
+/// If \param [in] BB has more than one predecessor that is a conditional
+/// branch, attempt to use parallel and/or for the branch condition. \returns
+/// true on success.
+///
+/// Before:
+/// ......
+/// %cmp10 = fcmp une float %tmp1, %tmp2
+/// br i1 %cmp1, label %if.then, label %lor.rhs
+///
+/// lor.rhs:
+/// ......
+/// %cmp11 = fcmp une float %tmp3, %tmp4
+/// br i1 %cmp11, label %if.then, label %ifend
+///
+/// if.end: // the merge block
+/// ......
+///
+/// if.then: // has two predecessors, both of them contains conditional branch.
+/// ......
+/// br label %if.end;
+///
+/// After:
+/// ......
+/// %cmp10 = fcmp une float %tmp1, %tmp2
+/// ......
+/// %cmp11 = fcmp une float %tmp3, %tmp4
+/// %cmp12 = or i1 %cmp10, %cmp11 // parallel-or mode.
+/// br i1 %cmp12, label %if.then, label %ifend
+///
+/// if.end:
+/// ......
+///
+/// if.then:
+/// ......
+/// br label %if.end;
+///
+/// Current implementation handles two cases.
+/// Case 1: \param BB is on the else-path.
+///
+/// BB1
+/// / |
+/// BB2 |
+/// / \ |
+/// BB3 \ | where, BB1, BB2 contain conditional branches.
+/// \ | / BB3 contains unconditional branch.
+/// \ | / BB4 corresponds to \param BB which is also the merge.
+/// BB => BB4
+///
+///
+/// Corresponding source code:
+///
+/// if (a == b && c == d)
+/// statement; // BB3
+///
+/// Case 2: \param BB BB is on the then-path.
+///
+/// BB1
+/// / |
+/// | BB2
+/// \ / | where BB1, BB2 contain conditional branches.
+/// BB => BB3 | BB3 contains unconditiona branch and corresponds
+/// \ / to \param BB. BB4 is the merge.
+/// BB4
+///
+/// Corresponding source code:
+///
+/// if (a == b || c == d)
+/// statement; // BB3
+///
+/// In both cases, \param BB is the common successor of conditional branches.
+/// In Case 1, \param BB (BB4) has an unconditional branch (BB3) as
+/// its predecessor. In Case 2, \param BB (BB3) only has conditional branches
+/// as its predecessors.
+///
+bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder,
+ Pass *P) {
+ PHINode *PHI = dyn_cast<PHINode>(BB->begin());
+ if (PHI)
+ return false; // For simplicity, avoid cases containing PHI nodes.
+
+ BasicBlock *LastCondBlock = NULL;
+ BasicBlock *FirstCondBlock = NULL;
+ BasicBlock *UnCondBlock = NULL;
+ int Idx = -1;
+
+ // Check predecessors of \param BB.
+ SmallPtrSet<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
+ for (SmallPtrSetIterator<BasicBlock *> PI = Preds.begin(), PE = Preds.end();
+ PI != PE; ++PI) {
+ BasicBlock *Pred = *PI;
+ BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator());
+
+ // All predecessors should terminate with a branch.
+ if (!PBI)
+ return false;
+
+ BasicBlock *PP = Pred->getSinglePredecessor();
+
+ if (PBI->isUnconditional()) {
+ // Case 1: Pred (BB3) is an unconditional block, it should
+ // have a single predecessor (BB2) that is also a predecessor
+ // of \param BB (BB4) and should not have address-taken.
+ // There should exist only one such unconditional
+ // branch among the predecessors.
+ if (UnCondBlock || !PP || (Preds.count(PP) == 0) ||
+ Pred->hasAddressTaken())
+ return false;
+
+ UnCondBlock = Pred;
+ continue;
+ }
+
+ // Only conditional branches are allowed beyond this point.
+ assert(PBI->isConditional());
+
+ // Condition's unique use should be the branch instruction.
+ Value *PC = PBI->getCondition();
+ if (!PC || !PC->hasOneUse())
+ return false;
+
+ if (PP && Preds.count(PP)) {
+ // These are internal condition blocks to be merged from, e.g.,
+ // BB2 in both cases.
+ // Should not be address-taken.
+ if (Pred->hasAddressTaken())
+ return false;
+
+ // Instructions in the internal condition blocks should be safe
+ // to hoist up.
+ for (BasicBlock::iterator BI = Pred->begin(), BE = PBI; BI != BE;) {
+ Instruction *CI = BI++;
+ if (isa<PHINode>(CI) || !isSafeToSpeculativelyExecute(CI))
+ return false;
+ }
+ } else {
+ // This is the condition block to be merged into, e.g. BB1 in
+ // both cases.
+ if (FirstCondBlock)
+ return false;
+ FirstCondBlock = Pred;
+ }
+
+ // Find whether BB is uniformly on the true (or false) path
+ // for all of its predecessors.
+ BasicBlock *PS1 = PBI->getSuccessor(0);
+ BasicBlock *PS2 = PBI->getSuccessor(1);
+ BasicBlock *PS = (PS1 == BB) ? PS2 : PS1;
+ int CIdx = (PS1 == BB) ? 0 : 1;
+
+ if (Idx == -1)
+ Idx = CIdx;
+ else if (CIdx != Idx)
+ return false;
+
+ // PS is the successor which is not BB. Check successors to identify
+ // the last conditional branch.
+ if (Preds.count(PS) == 0) {
+ // Case 2.
+ LastCondBlock = Pred;
+ } else {
+ // Case 1
+ BranchInst *BPS = dyn_cast<BranchInst>(PS->getTerminator());
+ if (BPS && BPS->isUnconditional()) {
+ // Case 1: PS(BB3) should be an unconditional branch.
+ LastCondBlock = Pred;
+ }
+ }
+ }
+
+ if (!FirstCondBlock || !LastCondBlock || (FirstCondBlock == LastCondBlock))
+ return false;
+
+ TerminatorInst *TBB = LastCondBlock->getTerminator();
+ BasicBlock *PS1 = TBB->getSuccessor(0);
+ BasicBlock *PS2 = TBB->getSuccessor(1);
+ BranchInst *PBI1 = dyn_cast<BranchInst>(PS1->getTerminator());
+ BranchInst *PBI2 = dyn_cast<BranchInst>(PS2->getTerminator());
+
+ // If PS1 does not jump into PS2, but PS2 jumps into PS1,
+ // attempt branch inversion.
+ if (!PBI1 || !PBI1->isUnconditional() ||
+ (PS1->getTerminator()->getSuccessor(0) != PS2)) {
+ // Check whether PS2 jumps into PS1.
+ if (!PBI2 || !PBI2->isUnconditional() ||
+ (PS2->getTerminator()->getSuccessor(0) != PS1))
+ return false;
+
+ // Do branch inversion.
+ BasicBlock *CurrBlock = LastCondBlock;
+ bool EverChanged = false;
+ while (1) {
+ BranchInst *BI = dyn_cast<BranchInst>(CurrBlock->getTerminator());
+ CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
+ CmpInst::Predicate Predicate = CI->getPredicate();
+ // Cannonicalize icmp_ne -> icmp_eq, fcmp_one -> fcmp_oeq
+ if ((Predicate == CmpInst::ICMP_NE) || (Predicate == CmpInst::FCMP_ONE)) {
+ CI->setPredicate(ICmpInst::getInversePredicate(Predicate));
+ BI->swapSuccessors();
+ EverChanged = true;
+ }
+ if (CurrBlock == FirstCondBlock)
+ break;
+ CurrBlock = CurrBlock->getSinglePredecessor();
+ }
+ return EverChanged;
+ }
+
+ // PS1 must have a conditional branch.
+ if (!PBI1 || !PBI1->isUnconditional())
+ return false;
+
+ // PS2 should not contain PHI node.
+ PHI = dyn_cast<PHINode>(PS2->begin());
+ if (PHI)
+ return false;
+
+ // Do the transformation.
+ BasicBlock *CB;
+ BranchInst *PBI = dyn_cast<BranchInst>(FirstCondBlock->getTerminator());
+ bool Iteration = true;
+ BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+ BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+ Value *PC = PBI->getCondition();
+
+ do {
+ CB = PBI->getSuccessor(1 - Idx);
+ // Delete the conditional branch.
+ FirstCondBlock->getInstList().pop_back();
+ FirstCondBlock->getInstList()
+ .splice(FirstCondBlock->end(), CB->getInstList());
+ PBI = cast<BranchInst>(FirstCondBlock->getTerminator());
+ Value *CC = PBI->getCondition();
+ // Merge conditions.
+ Builder.SetInsertPoint(PBI);
+ Value *NC;
+ if (Idx == 0)
+ // Case 2, use parallel or.
+ NC = Builder.CreateOr(PC, CC);
+ else
+ // Case 1, use parallel and.
+ NC = Builder.CreateAnd(PC, CC);
+
+ PBI->replaceUsesOfWith(CC, NC);
+ PC = NC;
+ if (CB == LastCondBlock)
+ Iteration = false;
+ // Remove internal conditional branches.
+ CB->dropAllReferences();
+ // make CB unreachable and let downstream to delete the block.
+ new UnreachableInst(CB->getContext(), CB);
+ } while (Iteration);
+
+ Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt);
+ DEBUG(dbgs() << "Use parallel and/or in:\n" << *FirstCondBlock);
+ return true;
+}
+
+/// Compare blocks from two if-regions, where \param Head1 is the entry of the
+/// 1st if-region. \param Head2 is the entry of the 2nd if-region. \param
+/// Block1 is a block in the 1st if-region to compare. \param Block2 is a block
+// in the 2nd if-region to compare. \returns true if \param Block1 and \param
+/// Block2 have identical instructions and do not have memory reference alias
+/// with \param Head2.
+///
+bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
+ BasicBlock *Block1,
+ BasicBlock *Block2) {
+ TerminatorInst *PTI2 = Head2->getTerminator();
+ Instruction *PBI2 = Head2->begin();
+
+ bool eq1 = (Block1 == Head1);
+ bool eq2 = (Block2 == Head2);
+ if (eq1 || eq2) {
+ // An empty then-path or else-path.
+ return (eq1 == eq2);
+ }
+
+ // Check whether instructions in Block1 and Block2 are identical
+ // and do not alias with instructions in Head2.
+ BasicBlock::iterator iter1 = Block1->begin();
+ BasicBlock::iterator end1 = Block1->getTerminator();
+ BasicBlock::iterator iter2 = Block2->begin();
+ BasicBlock::iterator end2 = Block2->getTerminator();
+
+ while (1) {
+ if (iter1 == end1) {
+ if (iter2 != end2)
+ return false;
+ break;
+ }
+
+ if (!iter1->isIdenticalTo(iter2))
+ return false;
+
+ // Illegal to remove instructions with side effects except
+ // non-volatile stores.
+ if (iter1->mayHaveSideEffects()) {
+ Instruction *CurI = &*iter1;
+ StoreInst *SI = dyn_cast<StoreInst>(CurI);
+ if (!SI || SI->isVolatile())
+ return false;
+ }
+
+ // For simplicity and speed, data dependency check can be
+ // avoided if read from memory doesn't exist.
+ if (iter1->mayReadFromMemory())
+ return false;
+
+ if (iter1->mayWriteToMemory()) {
+ for (BasicBlock::iterator BI = PBI2, BE = PTI2; BI != BE; ++BI) {
+ if (BI->mayReadFromMemory() || BI->mayWriteToMemory()) {
+ // Check alias with Head2.
+ if (!AA || AA->alias(iter1, BI))
+ return false;
+ }
+ }
+ }
+ ++iter1;
+ ++iter2;
+ }
+
+ return true;
+}
+
+/// Check whether \param BB is the merge block of a if-region. If yes, check
+/// whether there exists an adjacent if-region upstream, the two if-regions
+/// contain identical instuctions and can be legally merged. \returns true if
+/// the two if-regions are merged.
+///
+/// From:
+/// if (a)
+/// statement;
+/// if (b)
+/// statement;
+///
+/// To:
+/// if (a || b)
+/// statement;
+///
+bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder,
+ Pass *P) {
+ BasicBlock *IfTrue2, *IfFalse2;
+ Value *IfCond2 = GetIfCondition(BB, IfTrue2, IfFalse2);
+ Instruction *CInst2 = dyn_cast_or_null<Instruction>(IfCond2);
+ if (!CInst2)
+ return false;
+
+ BasicBlock *SecondEntryBlock = CInst2->getParent();
+ if (SecondEntryBlock->hasAddressTaken())
+ return false;
+
+ BasicBlock *IfTrue1, *IfFalse1;
+ Value *IfCond1 = GetIfCondition(SecondEntryBlock, IfTrue1, IfFalse1);
+ Instruction *CInst1 = dyn_cast_or_null<Instruction>(IfCond1);
+ if (!CInst1)
+ return false;
+
+ BasicBlock *FirstEntryBlock = CInst1->getParent();
+
+ // Either then-path or else-path should be empty.
+ if ((IfTrue1 != FirstEntryBlock) && (IfFalse1 != FirstEntryBlock))
+ return false;
+ if ((IfTrue2 != SecondEntryBlock) && (IfFalse2 != SecondEntryBlock))
+ return false;
+
+ TerminatorInst *PTI2 = SecondEntryBlock->getTerminator();
+ Instruction *PBI2 = SecondEntryBlock->begin();
+
+ if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfTrue1,
+ IfTrue2))
+ return false;
+
+ if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfFalse1,
+ IfFalse2))
+ return false;
+
+ // Check whether \param SecondEntryBlock has side-effect and is safe to
+ // speculate.
+ for (BasicBlock::iterator BI = PBI2, BE = PTI2; BI != BE; ++BI) {
+ Instruction *CI = BI;
+ if (isa<PHINode>(CI) || CI->mayHaveSideEffects() ||
+ !isSafeToSpeculativelyExecute(CI))
+ return false;
+ }
+
+ // Merge \param SecondEntryBlock into \param FirstEntryBlock.
+ FirstEntryBlock->getInstList().pop_back();
+ FirstEntryBlock->getInstList()
+ .splice(FirstEntryBlock->end(), SecondEntryBlock->getInstList());
+ BranchInst *PBI = dyn_cast<BranchInst>(FirstEntryBlock->getTerminator());
+ Value *CC = PBI->getCondition();
+ BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+ BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+ Builder.SetInsertPoint(PBI);
+ Value *NC = Builder.CreateOr(CInst1, CC);
+ PBI->replaceUsesOfWith(CC, NC);
+ Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt);
+
+ // Remove IfTrue1
+ if (IfTrue1 != FirstEntryBlock) {
+ IfTrue1->dropAllReferences();
+ IfTrue1->eraseFromParent();
+ }
+
+ // Remove IfFalse1
+ if (IfFalse1 != FirstEntryBlock) {
+ IfFalse1->dropAllReferences();
+ IfFalse1->eraseFromParent();
+ }
+
+ // Remove \param SecondEntryBlock
+ SecondEntryBlock->dropAllReferences();
+ SecondEntryBlock->eraseFromParent();
+ DEBUG(dbgs() << "If conditions merged into:\n" << *FirstEntryBlock);
+ return true;
+}
+
+bool FlattenCFGOpt::run(BasicBlock *BB) {
+ bool Changed = false;
+ assert(BB && BB->getParent() && "Block not embedded in function!");
+ assert(BB->getTerminator() && "Degenerate basic block encountered!");
+
+ IRBuilder<> Builder(BB);
+
+ if (FlattenParallelAndOr(BB, Builder))
+ return true;
+
+ if (MergeIfRegion(BB, Builder))
+ return true;
+
+ return Changed;
+}
+
+/// FlattenCFG - This function is used to flatten a CFG. For
+/// example, it uses parallel-and and parallel-or mode to collapse
+// if-conditions and merge if-regions with identical statements.
+///
+bool llvm::FlattenCFG(BasicBlock *BB, AliasAnalysis *AA) {
+ return FlattenCFGOpt(AA).run(BB);
+}
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 0d2598a..dabb67b 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -82,7 +82,8 @@ namespace {
/// a simple branch. When there is more than one predecessor, we need to
/// split the landing pad block after the landingpad instruction and jump
/// to there.
- void forwardResume(ResumeInst *RI);
+ void forwardResume(ResumeInst *RI,
+ SmallPtrSet<LandingPadInst*, 16> &InlinedLPads);
/// addIncomingPHIValuesFor - Add incoming-PHI values to the unwind
/// destination block for the given basic block, using the values for the
@@ -140,8 +141,10 @@ BasicBlock *InvokeInliningInfo::getInnerResumeDest() {
/// block. When the landing pad block has only one predecessor, this is a simple
/// branch. When there is more than one predecessor, we need to split the
/// landing pad block after the landingpad instruction and jump to there.
-void InvokeInliningInfo::forwardResume(ResumeInst *RI) {
+void InvokeInliningInfo::forwardResume(ResumeInst *RI,
+ SmallPtrSet<LandingPadInst*, 16> &InlinedLPads) {
BasicBlock *Dest = getInnerResumeDest();
+ LandingPadInst *OuterLPad = getLandingPadInst();
BasicBlock *Src = RI->getParent();
BranchInst::Create(Dest, Src);
@@ -152,6 +155,16 @@ void InvokeInliningInfo::forwardResume(ResumeInst *RI) {
InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src);
RI->eraseFromParent();
+
+ // Append the clauses from the outer landing pad instruction into the inlined
+ // landing pad instructions.
+ for (SmallPtrSet<LandingPadInst*, 16>::iterator I = InlinedLPads.begin(),
+ E = InlinedLPads.end(); I != E; ++I) {
+ LandingPadInst *InlinedLPad = *I;
+ for (unsigned OuterIdx = 0, OuterNum = OuterLPad->getNumClauses();
+ OuterIdx != OuterNum; ++OuterIdx)
+ InlinedLPad->addClause(OuterLPad->getClause(OuterIdx));
+ }
}
/// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into
@@ -229,19 +242,15 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
// The inlined code is currently at the end of the function, scan from the
// start of the inlined code to its end, checking for stuff we need to
- // rewrite. If the code doesn't have calls or unwinds, we know there is
- // nothing to rewrite.
- if (!InlinedCodeInfo.ContainsCalls) {
- // Now that everything is happy, we have one final detail. The PHI nodes in
- // the exception destination block still have entries due to the original
- // invoke instruction. Eliminate these entries (which might even delete the
- // PHI node) now.
- InvokeDest->removePredecessor(II->getParent());
- return;
- }
-
+ // rewrite.
InvokeInliningInfo Invoke(II);
-
+
+ // Get all of the inlined landing pad instructions.
+ SmallPtrSet<LandingPadInst*, 16> InlinedLPads;
+ for (Function::iterator I = FirstNewBlock, E = Caller->end(); I != E; ++I)
+ if (InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator()))
+ InlinedLPads.insert(II->getLandingPadInst());
+
for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){
if (InlinedCodeInfo.ContainsCalls)
if (HandleCallsInBlockInlinedThroughInvoke(BB, Invoke)) {
@@ -250,13 +259,14 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
continue;
}
+ // Forward any resumes that are remaining here.
if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator()))
- Invoke.forwardResume(RI);
+ Invoke.forwardResume(RI, InlinedLPads);
}
// Now that everything is happy, we have one final detail. The PHI nodes in
// the exception destination block still have entries due to the original
- // invoke instruction. Eliminate these entries (which might even delete the
+ // invoke instruction. Eliminate these entries (which might even delete the
// PHI node) now.
InvokeDest->removePredecessor(II->getParent());
}
@@ -748,8 +758,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// If the call site was an invoke instruction, add a branch to the normal
// destination.
- if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
- BranchInst::Create(II->getNormalDest(), TheCall);
+ if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
+ BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall);
+ NewBr->setDebugLoc(Returns[0]->getDebugLoc());
+ }
// If the return instruction returned a value, replace uses of the call with
// uses of the returned value.
@@ -777,15 +789,16 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// "starter" and "ender" blocks. How we accomplish this depends on whether
// this is an invoke instruction or a call instruction.
BasicBlock *AfterCallBB;
+ BranchInst *CreatedBranchToNormalDest = NULL;
if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
// Add an unconditional branch to make this look like the CallInst case...
- BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall);
+ CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), TheCall);
// Split the basic block. This guarantees that no PHI nodes will have to be
// updated due to new incoming edges, and make the invoke case more
// symmetric to the call case.
- AfterCallBB = OrigBB->splitBasicBlock(NewBr,
+ AfterCallBB = OrigBB->splitBasicBlock(CreatedBranchToNormalDest,
CalledFunc->getName()+".exit");
} else { // It's a call
@@ -840,11 +853,20 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Add a branch to the merge points and remove return instructions.
+ DebugLoc Loc;
for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
ReturnInst *RI = Returns[i];
- BranchInst::Create(AfterCallBB, RI);
+ BranchInst* BI = BranchInst::Create(AfterCallBB, RI);
+ Loc = RI->getDebugLoc();
+ BI->setDebugLoc(Loc);
RI->eraseFromParent();
}
+ // We need to set the debug location to *somewhere* inside the
+ // inlined function. The line number may be nonsensical, but the
+ // instruction will at least be associated with the right
+ // function.
+ if (CreatedBranchToNormalDest)
+ CreatedBranchToNormalDest->setDebugLoc(Loc);
} else if (!Returns.empty()) {
// Otherwise, if there is exactly one return value, just replace anything
// using the return value of the call with the computed value.
@@ -864,6 +886,9 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
AfterCallBB->getInstList().splice(AfterCallBB->begin(),
ReturnBB->getInstList());
+ if (CreatedBranchToNormalDest)
+ CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc());
+
// Delete the return instruction now and empty ReturnBB now.
Returns[0]->eraseFromParent();
ReturnBB->eraseFromParent();
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index a54ee08..08e1808 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -84,7 +84,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
BI->eraseFromParent();
return true;
}
-
+
if (Dest2 == Dest1) { // Conditional branch to same location?
// This branch matches something like this:
// br bool %cond, label %Dest, label %Dest
@@ -104,7 +104,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
}
return false;
}
-
+
if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
// If we are switching on a constant, we can convert the switch into a
// single branch instruction!
@@ -188,7 +188,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI);
return true;
}
-
+
if (SI->getNumCases() == 1) {
// Otherwise, we can fold this switch into a conditional branch
// instruction if it has only one non-default destination.
@@ -231,7 +231,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
BasicBlock *TheOnlyDest = BA->getBasicBlock();
// Insert the new branch.
Builder.CreateBr(TheOnlyDest);
-
+
for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
if (IBI->getDestination(i) == TheOnlyDest)
TheOnlyDest = 0;
@@ -242,7 +242,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
IBI->eraseFromParent();
if (DeleteDeadConditions)
RecursivelyDeleteTriviallyDeadInstructions(Address, TLI);
-
+
// If we didn't find our destination in the IBI successor list, then we
// have undefined behavior. Replace the unconditional branch with an
// 'unreachable' instruction.
@@ -250,11 +250,11 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
BB->getTerminator()->eraseFromParent();
new UnreachableInst(BB->getContext(), BB);
}
-
+
return true;
}
}
-
+
return false;
}
@@ -321,10 +321,10 @@ llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V,
Instruction *I = dyn_cast<Instruction>(V);
if (!I || !I->use_empty() || !isInstructionTriviallyDead(I, TLI))
return false;
-
+
SmallVector<Instruction*, 16> DeadInsts;
DeadInsts.push_back(I);
-
+
do {
I = DeadInsts.pop_back_val();
@@ -333,9 +333,9 @@ llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V,
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
Value *OpV = I->getOperand(i);
I->setOperand(i, 0);
-
+
if (!OpV->use_empty()) continue;
-
+
// If the operand is an instruction that became dead as we nulled out the
// operand, and if it is 'trivially' dead, delete it in a future loop
// iteration.
@@ -343,7 +343,7 @@ llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V,
if (isInstructionTriviallyDead(OpI, TLI))
DeadInsts.push_back(OpI);
}
-
+
I->eraseFromParent();
} while (!DeadInsts.empty());
@@ -450,12 +450,12 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
// This only adjusts blocks with PHI nodes.
if (!isa<PHINode>(BB->begin()))
return;
-
+
// Remove the entries for Pred from the PHI nodes in BB, but do not simplify
// them down. This will leave us with single entry phi nodes and other phis
// that can be removed.
BB->removePredecessor(Pred, true);
-
+
WeakVH PhiIt = &BB->front();
while (PHINode *PN = dyn_cast<PHINode>(PhiIt)) {
PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt));
@@ -486,10 +486,10 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
PN->replaceAllUsesWith(NewVal);
PN->eraseFromParent();
}
-
+
BasicBlock *PredBB = DestBB->getSinglePredecessor();
assert(PredBB && "Block doesn't have a single predecessor!");
-
+
// Zap anything that took the address of DestBB. Not doing this will give the
// address an invalid value.
if (DestBB->hasAddressTaken()) {
@@ -500,10 +500,10 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
BA->getType()));
BA->destroyConstant();
}
-
+
// Anything that branched to PredBB now branches to DestBB.
PredBB->replaceAllUsesWith(DestBB);
-
+
// Splice all the instructions from PredBB to DestBB.
PredBB->getTerminator()->eraseFromParent();
DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList());
@@ -525,6 +525,13 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
PredBB->eraseFromParent();
}
+/// CanMergeValues - Return true if we can choose one of these values to use
+/// in place of the other. Note that we will always choose the non-undef
+/// value to keep.
+static bool CanMergeValues(Value *First, Value *Second) {
+ return First == Second || isa<UndefValue>(First) || isa<UndefValue>(Second);
+}
+
/// CanPropagatePredecessorsForPHIs - Return true if we can fold BB, an
/// almost-empty BB ending in an unconditional branch to Succ, into succ.
///
@@ -533,7 +540,7 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!");
- DEBUG(dbgs() << "Looking to fold " << BB->getName() << " into "
+ DEBUG(dbgs() << "Looking to fold " << BB->getName() << " into "
<< Succ->getName() << "\n");
// Shortcut, if there is only a single predecessor it must be BB and merging
// is always safe
@@ -555,9 +562,10 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
for (unsigned PI = 0, PE = PN->getNumIncomingValues(); PI != PE; ++PI) {
BasicBlock *IBB = PN->getIncomingBlock(PI);
if (BBPreds.count(IBB) &&
- BBPN->getIncomingValueForBlock(IBB) != PN->getIncomingValue(PI)) {
- DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in "
- << Succ->getName() << " is conflicting with "
+ !CanMergeValues(BBPN->getIncomingValueForBlock(IBB),
+ PN->getIncomingValue(PI))) {
+ DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in "
+ << Succ->getName() << " is conflicting with "
<< BBPN->getName() << " with regard to common predecessor "
<< IBB->getName() << "\n");
return false;
@@ -570,8 +578,9 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
// one for BB, in which case this phi node will not prevent the merging
// of the block.
BasicBlock *IBB = PN->getIncomingBlock(PI);
- if (BBPreds.count(IBB) && Val != PN->getIncomingValue(PI)) {
- DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in "
+ if (BBPreds.count(IBB) &&
+ !CanMergeValues(Val, PN->getIncomingValue(PI))) {
+ DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in "
<< Succ->getName() << " is conflicting with regard to common "
<< "predecessor " << IBB->getName() << "\n");
return false;
@@ -583,6 +592,139 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
return true;
}
+typedef SmallVector<BasicBlock *, 16> PredBlockVector;
+typedef DenseMap<BasicBlock *, Value *> IncomingValueMap;
+
+/// \brief Determines the value to use as the phi node input for a block.
+///
+/// Select between \p OldVal any value that we know flows from \p BB
+/// to a particular phi on the basis of which one (if either) is not
+/// undef. Update IncomingValues based on the selected value.
+///
+/// \param OldVal The value we are considering selecting.
+/// \param BB The block that the value flows in from.
+/// \param IncomingValues A map from block-to-value for other phi inputs
+/// that we have examined.
+///
+/// \returns the selected value.
+static Value *selectIncomingValueForBlock(Value *OldVal, BasicBlock *BB,
+ IncomingValueMap &IncomingValues) {
+ if (!isa<UndefValue>(OldVal)) {
+ assert((!IncomingValues.count(BB) ||
+ IncomingValues.find(BB)->second == OldVal) &&
+ "Expected OldVal to match incoming value from BB!");
+
+ IncomingValues.insert(std::make_pair(BB, OldVal));
+ return OldVal;
+ }
+
+ IncomingValueMap::const_iterator It = IncomingValues.find(BB);
+ if (It != IncomingValues.end()) return It->second;
+
+ return OldVal;
+}
+
+/// \brief Create a map from block to value for the operands of a
+/// given phi.
+///
+/// Create a map from block to value for each non-undef value flowing
+/// into \p PN.
+///
+/// \param PN The phi we are collecting the map for.
+/// \param IncomingValues [out] The map from block to value for this phi.
+static void gatherIncomingValuesToPhi(PHINode *PN,
+ IncomingValueMap &IncomingValues) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *BB = PN->getIncomingBlock(i);
+ Value *V = PN->getIncomingValue(i);
+
+ if (!isa<UndefValue>(V))
+ IncomingValues.insert(std::make_pair(BB, V));
+ }
+}
+
+/// \brief Replace the incoming undef values to a phi with the values
+/// from a block-to-value map.
+///
+/// \param PN The phi we are replacing the undefs in.
+/// \param IncomingValues A map from block to value.
+static void replaceUndefValuesInPhi(PHINode *PN,
+ const IncomingValueMap &IncomingValues) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *V = PN->getIncomingValue(i);
+
+ if (!isa<UndefValue>(V)) continue;
+
+ BasicBlock *BB = PN->getIncomingBlock(i);
+ IncomingValueMap::const_iterator It = IncomingValues.find(BB);
+ if (It == IncomingValues.end()) continue;
+
+ PN->setIncomingValue(i, It->second);
+ }
+}
+
+/// \brief Replace a value flowing from a block to a phi with
+/// potentially multiple instances of that value flowing from the
+/// block's predecessors to the phi.
+///
+/// \param BB The block with the value flowing into the phi.
+/// \param BBPreds The predecessors of BB.
+/// \param PN The phi that we are updating.
+static void redirectValuesFromPredecessorsToPhi(BasicBlock *BB,
+ const PredBlockVector &BBPreds,
+ PHINode *PN) {
+ Value *OldVal = PN->removeIncomingValue(BB, false);
+ assert(OldVal && "No entry in PHI for Pred BB!");
+
+ IncomingValueMap IncomingValues;
+
+ // We are merging two blocks - BB, and the block containing PN - and
+ // as a result we need to redirect edges from the predecessors of BB
+ // to go to the block containing PN, and update PN
+ // accordingly. Since we allow merging blocks in the case where the
+ // predecessor and successor blocks both share some predecessors,
+ // and where some of those common predecessors might have undef
+ // values flowing into PN, we want to rewrite those values to be
+ // consistent with the non-undef values.
+
+ gatherIncomingValuesToPhi(PN, IncomingValues);
+
+ // If this incoming value is one of the PHI nodes in BB, the new entries
+ // in the PHI node are the entries from the old PHI.
+ if (isa<PHINode>(OldVal) && cast<PHINode>(OldVal)->getParent() == BB) {
+ PHINode *OldValPN = cast<PHINode>(OldVal);
+ for (unsigned i = 0, e = OldValPN->getNumIncomingValues(); i != e; ++i) {
+ // Note that, since we are merging phi nodes and BB and Succ might
+ // have common predecessors, we could end up with a phi node with
+ // identical incoming branches. This will be cleaned up later (and
+ // will trigger asserts if we try to clean it up now, without also
+ // simplifying the corresponding conditional branch).
+ BasicBlock *PredBB = OldValPN->getIncomingBlock(i);
+ Value *PredVal = OldValPN->getIncomingValue(i);
+ Value *Selected = selectIncomingValueForBlock(PredVal, PredBB,
+ IncomingValues);
+
+ // And add a new incoming value for this predecessor for the
+ // newly retargeted branch.
+ PN->addIncoming(Selected, PredBB);
+ }
+ } else {
+ for (unsigned i = 0, e = BBPreds.size(); i != e; ++i) {
+ // Update existing incoming values in PN for this
+ // predecessor of BB.
+ BasicBlock *PredBB = BBPreds[i];
+ Value *Selected = selectIncomingValueForBlock(OldVal, PredBB,
+ IncomingValues);
+
+ // And add a new incoming value for this predecessor for the
+ // newly retargeted branch.
+ PN->addIncoming(Selected, PredBB);
+ }
+ }
+
+ replaceUndefValuesInPhi(PN, IncomingValues);
+}
+
/// TryToSimplifyUncondBranchFromEmptyBlock - BB is known to contain an
/// unconditional branch, and contains no instructions other than PHI nodes,
/// potential side-effect free intrinsics and the branch. If possible,
@@ -595,7 +737,7 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {
// We can't eliminate infinite loops.
BasicBlock *Succ = cast<BranchInst>(BB->getTerminator())->getSuccessor(0);
if (BB == Succ) return false;
-
+
// Check to see if merging these blocks would cause conflicts for any of the
// phi nodes in BB or Succ. If not, we can safely merge.
if (!CanPropagatePredecessorsForPHIs(BB, Succ)) return false;
@@ -629,39 +771,21 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {
}
DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB);
-
+
if (isa<PHINode>(Succ->begin())) {
// If there is more than one pred of succ, and there are PHI nodes in
// the successor, then we need to add incoming edges for the PHI nodes
//
- const SmallVector<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB));
-
+ const PredBlockVector BBPreds(pred_begin(BB), pred_end(BB));
+
// Loop over all of the PHI nodes in the successor of BB.
for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
PHINode *PN = cast<PHINode>(I);
- Value *OldVal = PN->removeIncomingValue(BB, false);
- assert(OldVal && "No entry in PHI for Pred BB!");
-
- // If this incoming value is one of the PHI nodes in BB, the new entries
- // in the PHI node are the entries from the old PHI.
- if (isa<PHINode>(OldVal) && cast<PHINode>(OldVal)->getParent() == BB) {
- PHINode *OldValPN = cast<PHINode>(OldVal);
- for (unsigned i = 0, e = OldValPN->getNumIncomingValues(); i != e; ++i)
- // Note that, since we are merging phi nodes and BB and Succ might
- // have common predecessors, we could end up with a phi node with
- // identical incoming branches. This will be cleaned up later (and
- // will trigger asserts if we try to clean it up now, without also
- // simplifying the corresponding conditional branch).
- PN->addIncoming(OldValPN->getIncomingValue(i),
- OldValPN->getIncomingBlock(i));
- } else {
- // Add an incoming value for each of the new incoming values.
- for (unsigned i = 0, e = BBPreds.size(); i != e; ++i)
- PN->addIncoming(OldVal, BBPreds[i]);
- }
+
+ redirectValuesFromPredecessorsToPhi(BB, BBPreds, PN);
}
}
-
+
if (Succ->getSinglePredecessor()) {
// BB is the only predecessor of Succ, so Succ will end up with exactly
// the same predecessors BB had.
@@ -676,7 +800,7 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {
PN->eraseFromParent();
}
}
-
+
// Everything that jumped to BB now goes to Succ.
BB->replaceAllUsesWith(Succ);
if (!Succ->hasName()) Succ->takeName(BB);
@@ -784,7 +908,7 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align,
// the final program then it is impossible for us to reliably enforce the
// preferred alignment.
if (GV->isWeakForLinker()) return Align;
-
+
if (GV->getAlignment() >= PrefAlign)
return GV->getAlignment();
// We can only increase the alignment of the global if it has no alignment
@@ -804,26 +928,27 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align,
/// and it is more than the alignment of the ultimate object, see if we can
/// increase the alignment of the ultimate object, making this check succeed.
unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
- const DataLayout *TD) {
+ const DataLayout *DL) {
assert(V->getType()->isPointerTy() &&
"getOrEnforceKnownAlignment expects a pointer!");
- unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64;
+ unsigned BitWidth = DL ? DL->getPointerTypeSizeInBits(V->getType()) : 64;
+
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- ComputeMaskedBits(V, KnownZero, KnownOne, TD);
+ ComputeMaskedBits(V, KnownZero, KnownOne, DL);
unsigned TrailZ = KnownZero.countTrailingOnes();
-
- // Avoid trouble with rediculously large TrailZ values, such as
+
+ // Avoid trouble with ridiculously large TrailZ values, such as
// those computed from a null pointer.
TrailZ = std::min(TrailZ, unsigned(sizeof(unsigned) * CHAR_BIT - 1));
-
+
unsigned Align = 1u << std::min(BitWidth - 1, TrailZ);
-
+
// LLVM doesn't support alignments larger than this currently.
Align = std::min(Align, +Value::MaximumAlignment);
-
+
if (PrefAlign > Align)
- Align = enforceKnownAlignment(V, Align, PrefAlign, TD);
-
+ Align = enforceKnownAlignment(V, Align, PrefAlign, DL);
+
// We don't need to make any adjustment.
return Align;
}
@@ -832,14 +957,36 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
/// Dbg Intrinsic utilities
///
-/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value
+/// See if there is a dbg.value intrinsic for DIVar before I.
+static bool LdStHasDebugValue(DIVariable &DIVar, Instruction *I) {
+ // Since we can't guarantee that the original dbg.declare instrinsic
+ // is removed by LowerDbgDeclare(), we need to make sure that we are
+ // not inserting the same dbg.value intrinsic over and over.
+ llvm::BasicBlock::InstListType::iterator PrevI(I);
+ if (PrevI != I->getParent()->getInstList().begin()) {
+ --PrevI;
+ if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(PrevI))
+ if (DVI->getValue() == I->getOperand(0) &&
+ DVI->getOffset() == 0 &&
+ DVI->getVariable() == DIVar)
+ return true;
+ }
+ return false;
+}
+
+/// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value
/// that has an associated llvm.dbg.decl intrinsic.
bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
StoreInst *SI, DIBuilder &Builder) {
DIVariable DIVar(DDI->getVariable());
- if (!DIVar.Verify())
+ assert((!DIVar || DIVar.isVariable()) &&
+ "Variable in DbgDeclareInst should be either null or a DIVariable.");
+ if (!DIVar)
return false;
+ if (LdStHasDebugValue(DIVar, SI))
+ return true;
+
Instruction *DbgVal = NULL;
// If an argument is zero extended then use argument directly. The ZExt
// may be zapped by an optimization pass in future.
@@ -863,18 +1010,23 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
return true;
}
-/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value
+/// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value
/// that has an associated llvm.dbg.decl intrinsic.
bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
LoadInst *LI, DIBuilder &Builder) {
DIVariable DIVar(DDI->getVariable());
- if (!DIVar.Verify())
+ assert((!DIVar || DIVar.isVariable()) &&
+ "Variable in DbgDeclareInst should be either null or a DIVariable.");
+ if (!DIVar)
return false;
- Instruction *DbgVal =
+ if (LdStHasDebugValue(DIVar, LI))
+ return true;
+
+ Instruction *DbgVal =
Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0,
DIVar, LI);
-
+
// Propagate any debug metadata from the store onto the dbg.value.
DebugLoc LIDL = LI->getDebugLoc();
if (!LIDL.isUnknown())
@@ -898,10 +1050,12 @@ bool llvm::LowerDbgDeclare(Function &F) {
if (Dbgs.empty())
return false;
- for (SmallVector<DbgDeclareInst *, 4>::iterator I = Dbgs.begin(),
+ for (SmallVectorImpl<DbgDeclareInst *>::iterator I = Dbgs.begin(),
E = Dbgs.end(); I != E; ++I) {
DbgDeclareInst *DDI = *I;
if (AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress())) {
+ // We only remove the dbg.declare intrinsic if all uses are
+ // converted to dbg.value intrinsics.
bool RemoveDDI = true;
for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
UI != E; ++UI)
@@ -936,7 +1090,9 @@ bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
if (!DDI)
return false;
DIVariable DIVar(DDI->getVariable());
- if (!DIVar.Verify())
+ assert((!DIVar || DIVar.isVariable()) &&
+ "Variable in DbgDeclareInst should be either null or a DIVariable.");
+ if (!DIVar)
return false;
// Create a copy of the original DIDescriptor for user variable, appending
@@ -985,22 +1141,17 @@ bool llvm::removeUnreachableBlocks(Function &F) {
if (Reachable.count(I))
continue;
- // Remove the block as predecessor of all its reachable successors.
- // Unreachable successors don't matter as they'll soon be removed, too.
for (succ_iterator SI = succ_begin(I), SE = succ_end(I); SI != SE; ++SI)
if (Reachable.count(*SI))
(*SI)->removePredecessor(I);
+ I->dropAllReferences();
+ }
- // Zap all instructions in this basic block.
- while (!I->empty()) {
- Instruction &Inst = I->back();
- if (!Inst.use_empty())
- Inst.replaceAllUsesWith(UndefValue::get(Inst.getType()));
- I->getInstList().pop_back();
- }
+ for (Function::iterator I = llvm::next(F.begin()), E=F.end(); I != E;)
+ if (!Reachable.count(I))
+ I = F.getBasicBlockList().erase(I);
+ else
+ ++I;
- --I;
- llvm::next(I)->eraseFromParent();
- }
return true;
}
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index 37819cc..6d5f16c 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -59,6 +59,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
using namespace llvm;
STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted");
@@ -100,16 +101,16 @@ namespace {
private:
bool ProcessLoop(Loop *L, LPPassManager &LPM);
BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit);
- BasicBlock *InsertPreheaderForLoop(Loop *L);
Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM,
BasicBlock *Preheader);
BasicBlock *InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader);
- void PlaceSplitBlockCarefully(BasicBlock *NewBB,
- SmallVectorImpl<BasicBlock*> &SplitPreds,
- Loop *L);
};
}
+static void PlaceSplitBlockCarefully(BasicBlock *NewBB,
+ SmallVectorImpl<BasicBlock*> &SplitPreds,
+ Loop *L);
+
char LoopSimplify::ID = 0;
INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify",
"Canonicalize natural loops", true, false)
@@ -208,7 +209,7 @@ ReprocessLoop:
// Does the loop already have a preheader? If so, don't insert one.
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) {
- Preheader = InsertPreheaderForLoop(L);
+ Preheader = InsertPreheaderForLoop(L, this);
if (Preheader) {
++NumInserted;
Changed = true;
@@ -367,7 +368,7 @@ ReprocessLoop:
/// preheader, this method is called to insert one. This method has two phases:
/// preheader insertion and analysis updating.
///
-BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) {
+BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) {
BasicBlock *Header = L->getHeader();
// Compute the set of predecessors of the loop that are not in the loop.
@@ -390,11 +391,11 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) {
BasicBlock *PreheaderBB;
if (!Header->isLandingPad()) {
PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader",
- this);
+ PP);
} else {
SmallVector<BasicBlock*, 2> NewBBs;
SplitLandingPadPredecessors(Header, OutsideBlocks, ".preheader",
- ".split-lp", this, NewBBs);
+ ".split-lp", PP, NewBBs);
PreheaderBB = NewBBs[0];
}
@@ -491,9 +492,9 @@ static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT,
// PlaceSplitBlockCarefully - If the block isn't already, move the new block to
// right after some 'outside block' block. This prevents the preheader from
// being placed inside the loop body, e.g. when the loop hasn't been rotated.
-void LoopSimplify::PlaceSplitBlockCarefully(BasicBlock *NewBB,
- SmallVectorImpl<BasicBlock*> &SplitPreds,
- Loop *L) {
+void PlaceSplitBlockCarefully(BasicBlock *NewBB,
+ SmallVectorImpl<BasicBlock*> &SplitPreds,
+ Loop *L) {
// Check to see if NewBB is already well placed.
Function::iterator BBI = NewBB; --BBI;
for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp
index 9ec84d7..f66b54d 100644
--- a/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/lib/Transforms/Utils/LowerInvoke.cpp
@@ -61,6 +61,8 @@ static cl::opt<bool> ExpensiveEHSupport("enable-correct-eh-support",
namespace {
class LowerInvoke : public FunctionPass {
+ const TargetMachine *TM;
+
// Used for both models.
Constant *AbortFn;
@@ -70,15 +72,12 @@ namespace {
Constant *SetJmpFn, *LongJmpFn, *StackSaveFn, *StackRestoreFn;
bool useExpensiveEHSupport;
- // We peek in TLI to grab the target's jmp_buf size and alignment
- const TargetLowering *TLI;
-
public:
static char ID; // Pass identification, replacement for typeid
- explicit LowerInvoke(const TargetLowering *tli = NULL,
+ explicit LowerInvoke(const TargetMachine *TM = 0,
bool useExpensiveEHSupport = ExpensiveEHSupport)
- : FunctionPass(ID), useExpensiveEHSupport(useExpensiveEHSupport),
- TLI(tli) {
+ : FunctionPass(ID), TM(TM),
+ useExpensiveEHSupport(useExpensiveEHSupport) {
initializeLowerInvokePass(*PassRegistry::getPassRegistry());
}
bool doInitialization(Module &M);
@@ -108,12 +107,9 @@ INITIALIZE_PASS(LowerInvoke, "lowerinvoke",
char &llvm::LowerInvokePassID = LowerInvoke::ID;
// Public Interface To the LowerInvoke pass.
-FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI) {
- return new LowerInvoke(TLI, ExpensiveEHSupport);
-}
-FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI,
+FunctionPass *llvm::createLowerInvokePass(const TargetMachine *TM,
bool useExpensiveEHSupport) {
- return new LowerInvoke(TLI, useExpensiveEHSupport);
+ return new LowerInvoke(TM, useExpensiveEHSupport || ExpensiveEHSupport);
}
// doInitialization - Make sure that there is a prototype for abort in the
@@ -122,6 +118,7 @@ bool LowerInvoke::doInitialization(Module &M) {
Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
if (useExpensiveEHSupport) {
// Insert a type for the linked list of jump buffers.
+ const TargetLowering *TLI = TM ? TM->getTargetLowering() : 0;
unsigned JBSize = TLI ? TLI->getJumpBufSize() : 0;
JBSize = JBSize ? JBSize : 200;
Type *JmpBufTy = ArrayType::get(VoidPtrTy, JBSize);
@@ -430,6 +427,7 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
// Create an alloca for the incoming jump buffer ptr and the new jump buffer
// that needs to be restored on all exits from the function. This is an
// alloca because the value needs to be live across invokes.
+ const TargetLowering *TLI = TM ? TM->getTargetLowering() : 0;
unsigned Align = TLI ? TLI->getJumpBufAlignment() : 0;
AllocaInst *JmpBuf =
new AllocaInst(JBLinkTy, 0, Align,
diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp
index 61b3965..ebd7db6 100644
--- a/lib/Transforms/Utils/Mem2Reg.cpp
+++ b/lib/Transforms/Utils/Mem2Reg.cpp
@@ -16,6 +16,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/Dominators.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
@@ -27,6 +28,7 @@ STATISTIC(NumPromoted, "Number of alloca's promoted");
namespace {
struct PromotePass : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
+
PromotePass() : FunctionPass(ID) {
initializePromotePassPass(*PassRegistry::getPassRegistry());
}
@@ -62,6 +64,7 @@ bool PromotePass::runOnFunction(Function &F) {
bool Changed = false;
DominatorTree &DT = getAnalysis<DominatorTree>();
+ const DataLayout *DL = getAnalysisIfAvailable<DataLayout>();
while (1) {
Allocas.clear();
@@ -70,12 +73,12 @@ bool PromotePass::runOnFunction(Function &F) {
// the entry node
for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca?
- if (isAllocaPromotable(AI))
+ if (isAllocaPromotable(AI, DL))
Allocas.push_back(AI);
if (Allocas.empty()) break;
- PromoteMemToReg(Allocas, DT);
+ PromoteMemToReg(Allocas, DT, DL);
NumPromoted += Allocas.size();
Changed = true;
}
diff --git a/lib/Transforms/Utils/MetaRenamer.cpp b/lib/Transforms/Utils/MetaRenamer.cpp
index 3716f58..c370453 100644
--- a/lib/Transforms/Utils/MetaRenamer.cpp
+++ b/lib/Transforms/Utils/MetaRenamer.cpp
@@ -53,7 +53,7 @@ namespace {
}
bool runOnModule(Module &M) {
- static const char *metaNames[] = {
+ static const char *const metaNames[] = {
// See http://en.wikipedia.org/wiki/Metasyntactic_variable
"foo", "bar", "baz", "quux", "barney", "snork", "zot", "blam", "hoge",
"wibble", "wobble", "widget", "wombat", "ham", "eggs", "pluto", "spam"
diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp
index d090b48..ff6e6f9 100644
--- a/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/lib/Transforms/Utils/ModuleUtils.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@@ -62,3 +63,20 @@ void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority) {
void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority) {
appendToGlobalArray("llvm.global_dtors", M, F, Priority);
}
+
+GlobalVariable *
+llvm::collectUsedGlobalVariables(Module &M, SmallPtrSet<GlobalValue *, 8> &Set,
+ bool CompilerUsed) {
+ const char *Name = CompilerUsed ? "llvm.compiler.used" : "llvm.used";
+ GlobalVariable *GV = M.getGlobalVariable(Name);
+ if (!GV || !GV->hasInitializer())
+ return GV;
+
+ const ConstantArray *Init = cast<ConstantArray>(GV->getInitializer());
+ for (unsigned I = 0, E = Init->getNumOperands(); I != E; ++I) {
+ Value *Op = Init->getOperand(I);
+ GlobalValue *G = cast<GlobalValue>(Op->stripPointerCastsNoFollowAliases());
+ Set.insert(G);
+ }
+ return GV;
+}
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index de335ec..6910180 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -27,9 +27,10 @@
#define DEBUG_TYPE "mem2reg"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -45,6 +46,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/InstVisitor.h"
#include "llvm/Support/CFG.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
@@ -56,360 +58,560 @@ STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store");
STATISTIC(NumDeadAlloca, "Number of dead alloca's removed");
STATISTIC(NumPHIInsert, "Number of PHI nodes inserted");
-namespace llvm {
-template<>
-struct DenseMapInfo<std::pair<BasicBlock*, unsigned> > {
- typedef std::pair<BasicBlock*, unsigned> EltTy;
- static inline EltTy getEmptyKey() {
- return EltTy(reinterpret_cast<BasicBlock*>(-1), ~0U);
+namespace {
+
+struct AllocaInfo : private InstVisitor<AllocaInfo, bool> {
+ const DataLayout *DL;
+
+ SmallVector<BasicBlock *, 32> DefiningBlocks;
+ SmallVector<BasicBlock *, 32> UsingBlocks;
+ SmallVector<Instruction *, 8> DeadInsts;
+
+ Type *AllocaTy;
+ StoreInst *OnlyStore;
+ BasicBlock *OnlyBlock;
+ bool OnlyUsedInOneBlock;
+
+ Value *AllocaPointerVal;
+ DbgDeclareInst *DbgDeclare;
+
+ AllocaInfo(const DataLayout *DL) : DL(DL) {}
+
+ void clear() {
+ DefiningBlocks.clear();
+ UsingBlocks.clear();
+ DeadInsts.clear();
+ AllocaTy = 0;
+ OnlyStore = 0;
+ OnlyBlock = 0;
+ OnlyUsedInOneBlock = true;
+ AllocaPointerVal = 0;
+ DbgDeclare = 0;
}
- static inline EltTy getTombstoneKey() {
- return EltTy(reinterpret_cast<BasicBlock*>(-2), 0U);
+
+ /// Scan the uses of the specified alloca, filling in the AllocaInfo used
+ /// by the rest of the pass to reason about the uses of this alloca.
+ bool analyzeAlloca(AllocaInst &AI) {
+ clear();
+
+ AllocaTy = AI.getAllocatedType();
+ enqueueUsers(AI);
+
+ // Walk queued up uses in the worklist to handle nested uses.
+ while (!UseWorklist.empty()) {
+ U = UseWorklist.pop_back_val();
+ Instruction &I = *cast<Instruction>(U->getUser());
+ if (!visit(I))
+ return false; // Propagate failure to promote up.
+
+ if (OnlyUsedInOneBlock) {
+ if (OnlyBlock == 0)
+ OnlyBlock = I.getParent();
+ else if (OnlyBlock != I.getParent())
+ OnlyUsedInOneBlock = false;
+ }
+ }
+
+ DbgDeclare = FindAllocaDbgDeclare(&AI);
+ return true;
}
- static unsigned getHashValue(const std::pair<BasicBlock*, unsigned> &Val) {
- using llvm::hash_value;
- return static_cast<unsigned>(hash_value(Val));
+
+private:
+ // Befriend the base class so it can call through private visitor methods.
+ friend class InstVisitor<AllocaInfo, bool>;
+
+ /// \brief A use pointer that is non-null when visiting uses.
+ Use *U;
+
+ /// \brief A worklist for recursively visiting all uses of an alloca.
+ SmallVector<Use *, 8> UseWorklist;
+
+ /// \brief A set for preventing cyclic visitation.
+ SmallPtrSet<Use *, 8> VisitedUses;
+
+ void enqueueUsers(Instruction &I) {
+ for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE;
+ ++UI)
+ if (VisitedUses.insert(&UI.getUse()))
+ UseWorklist.push_back(&UI.getUse());
}
- static bool isEqual(const EltTy &LHS, const EltTy &RHS) {
- return LHS == RHS;
+
+ bool visitLoadInst(LoadInst &LI) {
+ if (LI.isVolatile() || LI.getType() != AllocaTy)
+ return false;
+
+ // Keep track of variable reads.
+ UsingBlocks.push_back(LI.getParent());
+ AllocaPointerVal = &LI;
+ return true;
}
-};
-}
-/// isAllocaPromotable - Return true if this alloca is legal for promotion.
-/// This is true if there are only loads and stores to the alloca.
-///
-bool llvm::isAllocaPromotable(const AllocaInst *AI) {
- // FIXME: If the memory unit is of pointer or integer type, we can permit
- // assignments to subsections of the memory unit.
-
- // Only allow direct and non-volatile loads and stores...
- for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end();
- UI != UE; ++UI) { // Loop over all of the uses of the alloca
- const User *U = *UI;
- if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
- // Note that atomic loads can be transformed; atomic semantics do
- // not have any meaning for a local alloca.
- if (LI->isVolatile())
- return false;
- } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
- if (SI->getOperand(0) == AI)
- return false; // Don't allow a store OF the AI, only INTO the AI.
- // Note that atomic stores can be transformed; atomic semantics do
- // not have any meaning for a local alloca.
- if (SI->isVolatile())
- return false;
- } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
- if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
- II->getIntrinsicID() != Intrinsic::lifetime_end)
- return false;
- } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
- if (BCI->getType() != Type::getInt8PtrTy(U->getContext()))
- return false;
- if (!onlyUsedByLifetimeMarkers(BCI))
- return false;
- } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
- if (GEPI->getType() != Type::getInt8PtrTy(U->getContext()))
- return false;
- if (!GEPI->hasAllZeroIndices())
- return false;
- if (!onlyUsedByLifetimeMarkers(GEPI))
- return false;
- } else {
+ bool visitStoreInst(StoreInst &SI) {
+ if (SI.isVolatile() || SI.getValueOperand() == U->get() ||
+ SI.getValueOperand()->getType() != AllocaTy)
return false;
+
+ // Remember the basic blocks which define new values for the alloca
+ DefiningBlocks.push_back(SI.getParent());
+ AllocaPointerVal = SI.getOperand(0);
+ OnlyStore = &SI;
+ return true;
+ }
+
+ bool visitBitCastInst(BitCastInst &BC) {
+ if (BC.use_empty())
+ DeadInsts.push_back(&BC);
+ else
+ enqueueUsers(BC);
+ return true;
+ }
+
+ bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
+ if (GEPI.use_empty()) {
+ DeadInsts.push_back(&GEPI);
+ return true;
}
+
+ enqueueUsers(GEPI);
+
+ return GEPI.hasAllZeroIndices();
}
- return true;
-}
+ // We can promote through debug info intrinsics as they don't alter the
+ // value stored in memory.
+ bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &I) {
+ DeadInsts.push_back(&I);
+ return true;
+ }
-namespace {
- struct AllocaInfo;
-
- // Data package used by RenamePass()
- class RenamePassData {
- public:
- typedef std::vector<Value *> ValVector;
-
- RenamePassData() : BB(NULL), Pred(NULL), Values() {}
- RenamePassData(BasicBlock *B, BasicBlock *P,
- const ValVector &V) : BB(B), Pred(P), Values(V) {}
- BasicBlock *BB;
- BasicBlock *Pred;
- ValVector Values;
-
- void swap(RenamePassData &RHS) {
- std::swap(BB, RHS.BB);
- std::swap(Pred, RHS.Pred);
- Values.swap(RHS.Values);
+ bool visitIntrinsicInst(IntrinsicInst &II) {
+ switch (II.getIntrinsicID()) {
+ default:
+ return false;
+
+ // Lifetime intrinsics don't preclude promoting the memory to a register.
+ // FIXME: We should use these to promote to undef when outside of a valid
+ // lifetime.
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ DeadInsts.push_back(&II);
+ return true;
}
- };
-
- /// LargeBlockInfo - This assigns and keeps a per-bb relative ordering of
- /// load/store instructions in the block that directly load or store an alloca.
+ }
+
+ // The fallback is that the alloca cannot be promoted.
+ bool visitInstruction(Instruction &I) { return false; }
+};
+
+// Data package used by RenamePass()
+class RenamePassData {
+public:
+ typedef std::vector<Value *> ValVector;
+
+ RenamePassData() : BB(NULL), Pred(NULL), Values() {}
+ RenamePassData(BasicBlock *B, BasicBlock *P, const ValVector &V)
+ : BB(B), Pred(P), Values(V) {}
+ BasicBlock *BB;
+ BasicBlock *Pred;
+ ValVector Values;
+
+ void swap(RenamePassData &RHS) {
+ std::swap(BB, RHS.BB);
+ std::swap(Pred, RHS.Pred);
+ Values.swap(RHS.Values);
+ }
+};
+
+/// \brief This assigns and keeps a per-bb relative ordering of load/store
+/// instructions in the block that directly load or store an alloca.
+///
+/// This functionality is important because it avoids scanning large basic
+/// blocks multiple times when promoting many allocas in the same block.
+class LargeBlockInfo {
+ /// \brief For each instruction that we track, keep the index of the
+ /// instruction.
///
- /// This functionality is important because it avoids scanning large basic
- /// blocks multiple times when promoting many allocas in the same block.
- class LargeBlockInfo {
- /// InstNumbers - For each instruction that we track, keep the index of the
- /// instruction. The index starts out as the number of the instruction from
- /// the start of the block.
- DenseMap<const Instruction *, unsigned> InstNumbers;
- public:
-
- /// isInterestingInstruction - This code only looks at accesses to allocas.
- static bool isInterestingInstruction(const Instruction *I) {
- return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) ||
- (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1)));
- }
-
- /// getInstructionIndex - Get or calculate the index of the specified
- /// instruction.
- unsigned getInstructionIndex(const Instruction *I) {
- assert(isInterestingInstruction(I) &&
- "Not a load/store to/from an alloca?");
-
- // If we already have this instruction number, return it.
- DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I);
- if (It != InstNumbers.end()) return It->second;
-
- // Scan the whole block to get the instruction. This accumulates
- // information for every interesting instruction in the block, in order to
- // avoid gratuitus rescans.
- const BasicBlock *BB = I->getParent();
- unsigned InstNo = 0;
- for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end();
- BBI != E; ++BBI)
- if (isInterestingInstruction(BBI))
- InstNumbers[BBI] = InstNo++;
- It = InstNumbers.find(I);
-
- assert(It != InstNumbers.end() && "Didn't insert instruction?");
+ /// The index starts out as the number of the instruction from the start of
+ /// the block.
+ DenseMap<const Instruction *, unsigned> InstNumbers;
+
+public:
+
+ /// This code only looks at accesses to allocas.
+ static bool isInterestingInstruction(const Instruction *I) {
+ return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) ||
+ (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1)));
+ }
+
+ /// Get or calculate the index of the specified instruction.
+ unsigned getInstructionIndex(const Instruction *I) {
+ assert(isInterestingInstruction(I) &&
+ "Not a load/store to/from an alloca?");
+
+ // If we already have this instruction number, return it.
+ DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I);
+ if (It != InstNumbers.end())
return It->second;
- }
-
- void deleteValue(const Instruction *I) {
- InstNumbers.erase(I);
- }
-
- void clear() {
- InstNumbers.clear();
- }
- };
-
- struct PromoteMem2Reg {
- /// Allocas - The alloca instructions being promoted.
- ///
- std::vector<AllocaInst*> Allocas;
- DominatorTree &DT;
- DIBuilder *DIB;
-
- /// AST - An AliasSetTracker object to update. If null, don't update it.
- ///
- AliasSetTracker *AST;
-
- /// AllocaLookup - Reverse mapping of Allocas.
- ///
- DenseMap<AllocaInst*, unsigned> AllocaLookup;
-
- /// NewPhiNodes - The PhiNodes we're adding. That map is used to simplify
- /// some Phi nodes as we iterate over it, so it should have deterministic
- /// iterators. We could use a MapVector, but since we already maintain a
- /// map from BasicBlock* to a stable numbering (BBNumbers), the DenseMap is
- /// more efficient (also supports removal).
- ///
- DenseMap<std::pair<unsigned, unsigned>, PHINode*> NewPhiNodes;
-
- /// PhiToAllocaMap - For each PHI node, keep track of which entry in Allocas
- /// it corresponds to.
- DenseMap<PHINode*, unsigned> PhiToAllocaMap;
-
- /// PointerAllocaValues - If we are updating an AliasSetTracker, then for
- /// each alloca that is of pointer type, we keep track of what to copyValue
- /// to the inserted PHI nodes here.
- ///
- std::vector<Value*> PointerAllocaValues;
-
- /// AllocaDbgDeclares - For each alloca, we keep track of the dbg.declare
- /// intrinsic that describes it, if any, so that we can convert it to a
- /// dbg.value intrinsic if the alloca gets promoted.
- SmallVector<DbgDeclareInst*, 8> AllocaDbgDeclares;
-
- /// Visited - The set of basic blocks the renamer has already visited.
- ///
- SmallPtrSet<BasicBlock*, 16> Visited;
-
- /// BBNumbers - Contains a stable numbering of basic blocks to avoid
- /// non-determinstic behavior.
- DenseMap<BasicBlock*, unsigned> BBNumbers;
-
- /// DomLevels - Maps DomTreeNodes to their level in the dominator tree.
- DenseMap<DomTreeNode*, unsigned> DomLevels;
-
- /// BBNumPreds - Lazily compute the number of predecessors a block has.
- DenseMap<const BasicBlock*, unsigned> BBNumPreds;
- public:
- PromoteMem2Reg(const std::vector<AllocaInst*> &A, DominatorTree &dt,
- AliasSetTracker *ast)
- : Allocas(A), DT(dt), DIB(0), AST(ast) {}
- ~PromoteMem2Reg() {
- delete DIB;
- }
- void run();
+ // Scan the whole block to get the instruction. This accumulates
+ // information for every interesting instruction in the block, in order to
+ // avoid gratuitus rescans.
+ const BasicBlock *BB = I->getParent();
+ unsigned InstNo = 0;
+ for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end(); BBI != E;
+ ++BBI)
+ if (isInterestingInstruction(BBI))
+ InstNumbers[BBI] = InstNo++;
+ It = InstNumbers.find(I);
+
+ assert(It != InstNumbers.end() && "Didn't insert instruction?");
+ return It->second;
+ }
- /// dominates - Return true if BB1 dominates BB2 using the DominatorTree.
- ///
- bool dominates(BasicBlock *BB1, BasicBlock *BB2) const {
- return DT.dominates(BB1, BB2);
- }
+ void deleteValue(const Instruction *I) { InstNumbers.erase(I); }
- private:
- void RemoveFromAllocasList(unsigned &AllocaIdx) {
- Allocas[AllocaIdx] = Allocas.back();
- Allocas.pop_back();
- --AllocaIdx;
- }
+ void clear() { InstNumbers.clear(); }
+};
+
+struct PromoteMem2Reg {
+ /// The alloca instructions being promoted.
+ std::vector<AllocaInst *> Allocas;
+ DominatorTree &DT;
+ DIBuilder DIB;
+ const DataLayout *DL;
+
+ /// An AliasSetTracker object to update. If null, don't update it.
+ AliasSetTracker *AST;
+
+ /// Reverse mapping of Allocas.
+ DenseMap<AllocaInst *, unsigned> AllocaLookup;
+
+ /// \brief The PhiNodes we're adding.
+ ///
+ /// That map is used to simplify some Phi nodes as we iterate over it, so
+ /// it should have deterministic iterators. We could use a MapVector, but
+ /// since we already maintain a map from BasicBlock* to a stable numbering
+ /// (BBNumbers), the DenseMap is more efficient (also supports removal).
+ DenseMap<std::pair<unsigned, unsigned>, PHINode *> NewPhiNodes;
+
+ /// For each PHI node, keep track of which entry in Allocas it corresponds
+ /// to.
+ DenseMap<PHINode *, unsigned> PhiToAllocaMap;
+
+ /// If we are updating an AliasSetTracker, then for each alloca that is of
+ /// pointer type, we keep track of what to copyValue to the inserted PHI
+ /// nodes here.
+ std::vector<Value *> PointerAllocaValues;
+
+ /// For each alloca, we keep track of the dbg.declare intrinsic that
+ /// describes it, if any, so that we can convert it to a dbg.value
+ /// intrinsic if the alloca gets promoted.
+ SmallVector<DbgDeclareInst *, 8> AllocaDbgDeclares;
+
+ /// The set of basic blocks the renamer has already visited.
+ ///
+ SmallPtrSet<BasicBlock *, 16> Visited;
+
+ /// Contains a stable numbering of basic blocks to avoid non-determinstic
+ /// behavior.
+ DenseMap<BasicBlock *, unsigned> BBNumbers;
+
+ /// Maps DomTreeNodes to their level in the dominator tree.
+ DenseMap<DomTreeNode *, unsigned> DomLevels;
+
+ /// Lazily compute the number of predecessors a block has.
+ DenseMap<const BasicBlock *, unsigned> BBNumPreds;
+
+public:
+ PromoteMem2Reg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
+ const DataLayout *DL, AliasSetTracker *AST)
+ : Allocas(Allocas.begin(), Allocas.end()), DT(DT),
+ DIB(*DT.getRoot()->getParent()->getParent()), DL(DL), AST(AST) {}
+
+ void run();
+
+private:
+ void RemoveFromAllocasList(unsigned &AllocaIdx) {
+ Allocas[AllocaIdx] = Allocas.back();
+ Allocas.pop_back();
+ --AllocaIdx;
+ }
+
+ unsigned getNumPreds(const BasicBlock *BB) {
+ unsigned &NP = BBNumPreds[BB];
+ if (NP == 0)
+ NP = std::distance(pred_begin(BB), pred_end(BB)) + 1;
+ return NP - 1;
+ }
- unsigned getNumPreds(const BasicBlock *BB) {
- unsigned &NP = BBNumPreds[BB];
- if (NP == 0)
- NP = std::distance(pred_begin(BB), pred_end(BB))+1;
- return NP-1;
+ void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
+ AllocaInfo &Info);
+ void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
+ const SmallPtrSet<BasicBlock *, 32> &DefBlocks,
+ SmallPtrSet<BasicBlock *, 32> &LiveInBlocks);
+ void RenamePass(BasicBlock *BB, BasicBlock *Pred,
+ RenamePassData::ValVector &IncVals,
+ std::vector<RenamePassData> &Worklist);
+ bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version);
+};
+
+} // end of anonymous namespace
+
+/// \brief Walk a small vector of dead instructions and recursively remove them
+/// and subsequently dead instructions.
+///
+/// This is only valid to call on dead instructions using an alloca which is
+/// promotable, as we leverage that assumption to delete them faster.
+static void removeDeadInstructions(AllocaInst *AI,
+ SmallVectorImpl<Instruction *> &DeadInsts) {
+ while (!DeadInsts.empty()) {
+ Instruction *I = DeadInsts.pop_back_val();
+
+ // Don't delete the alloca itself.
+ if (I == AI)
+ continue;
+
+ // Note that we open code the deletion algorithm here because we know
+ // apriori that all of the instructions using an alloca that reaches here
+ // are trivially dead when their use list becomes empty (The only risk are
+ // lifetime markers which we specifically want to nuke). By coding it here
+ // we can skip the triviality test and be more efficient.
+ //
+ // Null out all of the instruction's operands to see if any operand becomes
+ // dead as we go.
+ for (User::op_iterator OI = I->op_begin(), OE = I->op_end(); OI != OE;
+ ++OI) {
+ Instruction *Op = dyn_cast<Instruction>(*OI);
+ if (!Op)
+ continue;
+
+ OI->set(0);
+ if (!Op->use_empty())
+ continue;
+
+ DeadInsts.push_back(Op);
}
+ I->eraseFromParent();
+ }
+}
+
+/// \brief Rewrite as many loads as possible given a single store.
+///
+/// When there is only a single store, we can use the domtree to trivially
+/// replace all of the dominated loads with the stored value. Do so, and return
+/// true if this has successfully promoted the alloca entirely. If this returns
+/// false there were some loads which were not dominated by the single store
+/// and thus must be phi-ed with undef. We fall back to the standard alloca
+/// promotion algorithm in that case.
+static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
+ LargeBlockInfo &LBI,
+ DominatorTree &DT,
+ AliasSetTracker *AST) {
+ StoreInst *OnlyStore = Info.OnlyStore;
+ bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0));
+ BasicBlock *StoreBB = OnlyStore->getParent();
+ int StoreIndex = -1;
+
+ // Clear out UsingBlocks. We will reconstruct it here if needed.
+ Info.UsingBlocks.clear();
- void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
- AllocaInfo &Info);
- void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
- const SmallPtrSet<BasicBlock*, 32> &DefBlocks,
- SmallPtrSet<BasicBlock*, 32> &LiveInBlocks);
-
- void RewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
- LargeBlockInfo &LBI);
- void PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info,
- LargeBlockInfo &LBI);
-
- void RenamePass(BasicBlock *BB, BasicBlock *Pred,
- RenamePassData::ValVector &IncVals,
- std::vector<RenamePassData> &Worklist);
- bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version);
- };
-
- struct AllocaInfo {
- SmallVector<BasicBlock*, 32> DefiningBlocks;
- SmallVector<BasicBlock*, 32> UsingBlocks;
-
- StoreInst *OnlyStore;
- BasicBlock *OnlyBlock;
- bool OnlyUsedInOneBlock;
-
- Value *AllocaPointerVal;
- DbgDeclareInst *DbgDeclare;
-
- void clear() {
- DefiningBlocks.clear();
- UsingBlocks.clear();
- OnlyStore = 0;
- OnlyBlock = 0;
- OnlyUsedInOneBlock = true;
- AllocaPointerVal = 0;
- DbgDeclare = 0;
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) {
+ Instruction *UserInst = cast<Instruction>(*UI++);
+ if (!isa<LoadInst>(UserInst)) {
+ assert(UserInst == OnlyStore && "Should only have load/stores");
+ continue;
}
-
- /// AnalyzeAlloca - Scan the uses of the specified alloca, filling in our
- /// ivars.
- void AnalyzeAlloca(AllocaInst *AI) {
- clear();
-
- // As we scan the uses of the alloca instruction, keep track of stores,
- // and decide whether all of the loads and stores to the alloca are within
- // the same basic block.
- for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
- UI != E;) {
- Instruction *User = cast<Instruction>(*UI++);
-
- if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
- // Remember the basic blocks which define new values for the alloca
- DefiningBlocks.push_back(SI->getParent());
- AllocaPointerVal = SI->getOperand(0);
- OnlyStore = SI;
- } else {
- LoadInst *LI = cast<LoadInst>(User);
- // Otherwise it must be a load instruction, keep track of variable
- // reads.
- UsingBlocks.push_back(LI->getParent());
- AllocaPointerVal = LI;
- }
-
- if (OnlyUsedInOneBlock) {
- if (OnlyBlock == 0)
- OnlyBlock = User->getParent();
- else if (OnlyBlock != User->getParent())
- OnlyUsedInOneBlock = false;
+ LoadInst *LI = cast<LoadInst>(UserInst);
+
+ // Okay, if we have a load from the alloca, we want to replace it with the
+ // only value stored to the alloca. We can do this if the value is
+ // dominated by the store. If not, we use the rest of the mem2reg machinery
+ // to insert the phi nodes as needed.
+ if (!StoringGlobalVal) { // Non-instructions are always dominated.
+ if (LI->getParent() == StoreBB) {
+ // If we have a use that is in the same block as the store, compare the
+ // indices of the two instructions to see which one came first. If the
+ // load came before the store, we can't handle it.
+ if (StoreIndex == -1)
+ StoreIndex = LBI.getInstructionIndex(OnlyStore);
+
+ if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) {
+ // Can't handle this load, bail out.
+ Info.UsingBlocks.push_back(StoreBB);
+ continue;
}
+
+ } else if (LI->getParent() != StoreBB &&
+ !DT.dominates(StoreBB, LI->getParent())) {
+ // If the load and store are in different blocks, use BB dominance to
+ // check their relationships. If the store doesn't dom the use, bail
+ // out.
+ Info.UsingBlocks.push_back(LI->getParent());
+ continue;
}
-
- DbgDeclare = FindAllocaDbgDeclare(AI);
}
- };
- typedef std::pair<DomTreeNode*, unsigned> DomTreeNodePair;
+ // Otherwise, we *can* safely rewrite this load.
+ Value *ReplVal = OnlyStore->getOperand(0);
+ // If the replacement value is the load, this must occur in unreachable
+ // code.
+ if (ReplVal == LI)
+ ReplVal = UndefValue::get(LI->getType());
+ LI->replaceAllUsesWith(ReplVal);
+ if (AST && LI->getType()->isPointerTy())
+ AST->deleteValue(LI);
+ LI->eraseFromParent();
+ LBI.deleteValue(LI);
+ }
+
+ // Finally, after the scan, check to see if the store is all that is left.
+ if (!Info.UsingBlocks.empty())
+ return false; // If not, we'll have to fall back for the remainder.
- struct DomTreeNodeCompare {
- bool operator()(const DomTreeNodePair &LHS, const DomTreeNodePair &RHS) {
- return LHS.second < RHS.second;
- }
- };
-} // end of anonymous namespace
-
-static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
- // Knowing that this alloca is promotable, we know that it's safe to kill all
- // instructions except for load and store.
-
- for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end();
- UI != UE;) {
- Instruction *I = cast<Instruction>(*UI);
- ++UI;
- if (isa<LoadInst>(I) || isa<StoreInst>(I))
+ // Record debuginfo for the store and remove the declaration's
+ // debuginfo.
+ if (DbgDeclareInst *DDI = Info.DbgDeclare) {
+ DIBuilder DIB(*AI->getParent()->getParent()->getParent());
+ ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, DIB);
+ DDI->eraseFromParent();
+ }
+ // Remove the (now dead) store and alloca.
+ Info.OnlyStore->eraseFromParent();
+ LBI.deleteValue(Info.OnlyStore);
+
+ if (AST)
+ AST->deleteValue(AI);
+ AI->eraseFromParent();
+ LBI.deleteValue(AI);
+ return true;
+}
+
+namespace {
+/// This is a helper predicate used to search by the first element of a pair.
+struct StoreIndexSearchPredicate {
+ bool operator()(const std::pair<unsigned, StoreInst *> &LHS,
+ const std::pair<unsigned, StoreInst *> &RHS) {
+ return LHS.first < RHS.first;
+ }
+};
+}
+
+/// Many allocas are only used within a single basic block. If this is the
+/// case, avoid traversing the CFG and inserting a lot of potentially useless
+/// PHI nodes by just performing a single linear pass over the basic block
+/// using the Alloca.
+///
+/// If we cannot promote this alloca (because it is read before it is written),
+/// return true. This is necessary in cases where, due to control flow, the
+/// alloca is potentially undefined on some control flow paths. e.g. code like
+/// this is potentially correct:
+///
+/// for (...) { if (c) { A = undef; undef = B; } }
+///
+/// ... so long as A is not used before undef is set.
+static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
+ LargeBlockInfo &LBI,
+ AliasSetTracker *AST) {
+ // The trickiest case to handle is when we have large blocks. Because of this,
+ // this code is optimized assuming that large blocks happen. This does not
+ // significantly pessimize the small block case. This uses LargeBlockInfo to
+ // make it efficient to get the index of various operations in the block.
+
+ // Walk the use-def list of the alloca, getting the locations of all stores.
+ typedef SmallVector<std::pair<unsigned, StoreInst *>, 64> StoresByIndexTy;
+ StoresByIndexTy StoresByIndex;
+
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;
+ ++UI)
+ if (StoreInst *SI = dyn_cast<StoreInst>(*UI))
+ StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI));
+
+ // Sort the stores by their index, making it efficient to do a lookup with a
+ // binary search.
+ std::sort(StoresByIndex.begin(), StoresByIndex.end(),
+ StoreIndexSearchPredicate());
+
+ // Walk all of the loads from this alloca, replacing them with the nearest
+ // store above them, if any.
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) {
+ LoadInst *LI = dyn_cast<LoadInst>(*UI++);
+ if (!LI)
continue;
- if (!I->getType()->isVoidTy()) {
- // The only users of this bitcast/GEP instruction are lifetime intrinsics.
- // Follow the use/def chain to erase them now instead of leaving it for
- // dead code elimination later.
- for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
- UI != UE;) {
- Instruction *Inst = cast<Instruction>(*UI);
- ++UI;
- Inst->eraseFromParent();
- }
+ unsigned LoadIdx = LBI.getInstructionIndex(LI);
+
+ // Find the nearest store that has a lower index than this load.
+ StoresByIndexTy::iterator I =
+ std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(),
+ std::make_pair(LoadIdx, static_cast<StoreInst *>(0)),
+ StoreIndexSearchPredicate());
+
+ if (I == StoresByIndex.begin())
+ // If there is no store before this load, the load takes the undef value.
+ LI->replaceAllUsesWith(UndefValue::get(LI->getType()));
+ else
+ // Otherwise, there was a store before this load, the load takes its value.
+ LI->replaceAllUsesWith(llvm::prior(I)->second->getOperand(0));
+
+ if (AST && LI->getType()->isPointerTy())
+ AST->deleteValue(LI);
+ LI->eraseFromParent();
+ LBI.deleteValue(LI);
+ }
+
+ // Remove the (now dead) stores and alloca.
+ while (!AI->use_empty()) {
+ StoreInst *SI = cast<StoreInst>(AI->use_back());
+ // Record debuginfo for the store before removing it.
+ if (DbgDeclareInst *DDI = Info.DbgDeclare) {
+ DIBuilder DIB(*AI->getParent()->getParent()->getParent());
+ ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
}
- I->eraseFromParent();
+ SI->eraseFromParent();
+ LBI.deleteValue(SI);
}
+
+ if (AST)
+ AST->deleteValue(AI);
+ AI->eraseFromParent();
+ LBI.deleteValue(AI);
+
+ // The alloca's debuginfo can be removed as well.
+ if (DbgDeclareInst *DDI = Info.DbgDeclare)
+ DDI->eraseFromParent();
+
+ ++NumLocalPromoted;
}
void PromoteMem2Reg::run() {
Function &F = *DT.getRoot()->getParent();
- if (AST) PointerAllocaValues.resize(Allocas.size());
+ if (AST)
+ PointerAllocaValues.resize(Allocas.size());
AllocaDbgDeclares.resize(Allocas.size());
- AllocaInfo Info;
+ AllocaInfo Info(DL);
LargeBlockInfo LBI;
for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) {
AllocaInst *AI = Allocas[AllocaNum];
- assert(isAllocaPromotable(AI) &&
- "Cannot promote non-promotable alloca!");
assert(AI->getParent()->getParent() == &F &&
"All allocas should be in the same function, which is same as DF!");
- removeLifetimeIntrinsicUsers(AI);
+ // Calculate the set of read and write-locations for each alloca. This is
+ // analogous to finding the 'uses' and 'definitions' of each variable.
+ bool Good = Info.analyzeAlloca(*AI);
+ (void)Good;
+ assert(Good && "Cannot promote non-promotable alloca!");
+
+ // Nuke all of the dead instructions.
+ removeDeadInstructions(AI, Info.DeadInsts);
if (AI->use_empty()) {
// If there are no uses of the alloca, just delete it now.
- if (AST) AST->deleteValue(AI);
+ if (AST)
+ AST->deleteValue(AI);
AI->eraseFromParent();
// Remove the alloca from the Allocas list, since it has been processed
@@ -417,83 +619,31 @@ void PromoteMem2Reg::run() {
++NumDeadAlloca;
continue;
}
-
- // Calculate the set of read and write-locations for each alloca. This is
- // analogous to finding the 'uses' and 'definitions' of each variable.
- Info.AnalyzeAlloca(AI);
// If there is only a single store to this value, replace any loads of
// it that are directly dominated by the definition with the value stored.
if (Info.DefiningBlocks.size() == 1) {
- RewriteSingleStoreAlloca(AI, Info, LBI);
-
- // Finally, after the scan, check to see if the store is all that is left.
- if (Info.UsingBlocks.empty()) {
- // Record debuginfo for the store and remove the declaration's
- // debuginfo.
- if (DbgDeclareInst *DDI = Info.DbgDeclare) {
- if (!DIB)
- DIB = new DIBuilder(*DDI->getParent()->getParent()->getParent());
- ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, *DIB);
- DDI->eraseFromParent();
- }
- // Remove the (now dead) store and alloca.
- Info.OnlyStore->eraseFromParent();
- LBI.deleteValue(Info.OnlyStore);
-
- if (AST) AST->deleteValue(AI);
- AI->eraseFromParent();
- LBI.deleteValue(AI);
-
+ if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AST)) {
// The alloca has been processed, move on.
RemoveFromAllocasList(AllocaNum);
-
++NumSingleStore;
continue;
}
}
-
+
// If the alloca is only read and written in one basic block, just perform a
// linear sweep over the block to eliminate it.
if (Info.OnlyUsedInOneBlock) {
- PromoteSingleBlockAlloca(AI, Info, LBI);
-
- // Finally, after the scan, check to see if the stores are all that is
- // left.
- if (Info.UsingBlocks.empty()) {
-
- // Remove the (now dead) stores and alloca.
- while (!AI->use_empty()) {
- StoreInst *SI = cast<StoreInst>(AI->use_back());
- // Record debuginfo for the store before removing it.
- if (DbgDeclareInst *DDI = Info.DbgDeclare) {
- if (!DIB)
- DIB = new DIBuilder(*SI->getParent()->getParent()->getParent());
- ConvertDebugDeclareToDebugValue(DDI, SI, *DIB);
- }
- SI->eraseFromParent();
- LBI.deleteValue(SI);
- }
-
- if (AST) AST->deleteValue(AI);
- AI->eraseFromParent();
- LBI.deleteValue(AI);
-
- // The alloca has been processed, move on.
- RemoveFromAllocasList(AllocaNum);
-
- // The alloca's debuginfo can be removed as well.
- if (DbgDeclareInst *DDI = Info.DbgDeclare)
- DDI->eraseFromParent();
+ promoteSingleBlockAlloca(AI, Info, LBI, AST);
- ++NumLocalPromoted;
- continue;
- }
+ // The alloca has been processed, move on.
+ RemoveFromAllocasList(AllocaNum);
+ continue;
}
// If we haven't computed dominator tree levels, do so now.
if (DomLevels.empty()) {
- SmallVector<DomTreeNode*, 32> Worklist;
+ SmallVector<DomTreeNode *, 32> Worklist;
DomTreeNode *Root = DT.getRootNode();
DomLevels[Root] = 0;
@@ -522,10 +672,11 @@ void PromoteMem2Reg::run() {
// stored into the alloca.
if (AST)
PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal;
-
+
// Remember the dbg.declare intrinsic describing this alloca, if any.
- if (Info.DbgDeclare) AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare;
-
+ if (Info.DbgDeclare)
+ AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare;
+
// Keep the reverse mapping of the 'Allocas' array for the rename pass.
AllocaLookup[Allocas[AllocaNum]] = AllocaNum;
@@ -540,8 +691,7 @@ void PromoteMem2Reg::run() {
return; // All of the allocas must have been trivial!
LBI.clear();
-
-
+
// Set the incoming values for the basic block to be null values for all of
// the alloca's. We do this in case there is a load of a value that has not
// been stored yet. In this case, it will get this null value.
@@ -562,7 +712,7 @@ void PromoteMem2Reg::run() {
// RenamePass may add new worklist entries.
RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList);
} while (!RenamePassWorkList.empty());
-
+
// The renamer uses the Visited set to avoid infinite loops. Clear it now.
Visited.clear();
@@ -575,7 +725,8 @@ void PromoteMem2Reg::run() {
// tree. Just delete the users now.
if (!A->use_empty())
A->replaceAllUsesWith(UndefValue::get(A->getType()));
- if (AST) AST->deleteValue(A);
+ if (AST)
+ AST->deleteValue(A);
A->eraseFromParent();
}
@@ -591,13 +742,15 @@ void PromoteMem2Reg::run() {
bool EliminatedAPHI = true;
while (EliminatedAPHI) {
EliminatedAPHI = false;
-
+
// Iterating over NewPhiNodes is deterministic, so it is safe to try to
// simplify and RAUW them as we go. If it was not, we could add uses to
// the values we replace with in a non deterministic order, thus creating
// non deterministic def->use chains.
- for (DenseMap<std::pair<unsigned, unsigned>, PHINode*>::iterator I =
- NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) {
+ for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator
+ I = NewPhiNodes.begin(),
+ E = NewPhiNodes.end();
+ I != E;) {
PHINode *PN = I->second;
// If this PHI node merges one value and/or undefs, get the value.
@@ -613,15 +766,17 @@ void PromoteMem2Reg::run() {
++I;
}
}
-
+
// At this point, the renamer has added entries to PHI nodes for all reachable
// code. Unfortunately, there may be unreachable blocks which the renamer
// hasn't traversed. If this is the case, the PHI nodes may not
// have incoming values for all predecessors. Loop over all PHI nodes we have
// created, inserting undef values if they are missing any incoming values.
//
- for (DenseMap<std::pair<unsigned, unsigned>, PHINode*>::iterator I =
- NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) {
+ for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator
+ I = NewPhiNodes.begin(),
+ E = NewPhiNodes.end();
+ I != E; ++I) {
// We want to do this once per basic block. As such, only process a block
// when we find the PHI that is the first entry in the block.
PHINode *SomePHI = I->second;
@@ -636,21 +791,20 @@ void PromoteMem2Reg::run() {
continue;
// Get the preds for BB.
- SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB));
-
+ SmallVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
+
// Ok, now we know that all of the PHI nodes are missing entries for some
// basic blocks. Start by sorting the incoming predecessors for efficient
// access.
std::sort(Preds.begin(), Preds.end());
-
+
// Now we loop through all BB's which have entries in SomePHI and remove
// them from the Preds list.
for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) {
// Do a log(n) search of the Preds list for the entry we want.
- SmallVector<BasicBlock*, 16>::iterator EntIt =
- std::lower_bound(Preds.begin(), Preds.end(),
- SomePHI->getIncomingBlock(i));
- assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i)&&
+ SmallVectorImpl<BasicBlock *>::iterator EntIt = std::lower_bound(
+ Preds.begin(), Preds.end(), SomePHI->getIncomingBlock(i));
+ assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i) &&
"PHI node has entry for a block which is not a predecessor!");
// Remove the entry
@@ -670,39 +824,41 @@ void PromoteMem2Reg::run() {
SomePHI->addIncoming(UndefVal, Preds[pred]);
}
}
-
+
NewPhiNodes.clear();
}
+/// \brief Determine which blocks the value is live in.
+///
+/// These are blocks which lead to uses. Knowing this allows us to avoid
+/// inserting PHI nodes into blocks which don't lead to uses (thus, the
+/// inserted phi nodes would be dead).
+void PromoteMem2Reg::ComputeLiveInBlocks(
+ AllocaInst *AI, AllocaInfo &Info,
+ const SmallPtrSet<BasicBlock *, 32> &DefBlocks,
+ SmallPtrSet<BasicBlock *, 32> &LiveInBlocks) {
-/// ComputeLiveInBlocks - Determine which blocks the value is live in. These
-/// are blocks which lead to uses. Knowing this allows us to avoid inserting
-/// PHI nodes into blocks which don't lead to uses (thus, the inserted phi nodes
-/// would be dead).
-void PromoteMem2Reg::
-ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
- const SmallPtrSet<BasicBlock*, 32> &DefBlocks,
- SmallPtrSet<BasicBlock*, 32> &LiveInBlocks) {
-
// To determine liveness, we must iterate through the predecessors of blocks
// where the def is live. Blocks are added to the worklist if we need to
// check their predecessors. Start with all the using blocks.
- SmallVector<BasicBlock*, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(),
- Info.UsingBlocks.end());
-
+ SmallVector<BasicBlock *, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(),
+ Info.UsingBlocks.end());
+
// If any of the using blocks is also a definition block, check to see if the
// definition occurs before or after the use. If it happens before the use,
// the value isn't really live-in.
for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) {
BasicBlock *BB = LiveInBlockWorklist[i];
- if (!DefBlocks.count(BB)) continue;
-
+ if (!DefBlocks.count(BB))
+ continue;
+
// Okay, this is a block that both uses and defines the value. If the first
// reference to the alloca is a def (store), then we know it isn't live-in.
- for (BasicBlock::iterator I = BB->begin(); ; ++I) {
+ for (BasicBlock::iterator I = BB->begin();; ++I) {
if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- if (SI->getOperand(1) != AI) continue;
-
+ if (SI->getOperand(1) != AI)
+ continue;
+
// We found a store to the alloca before a load. The alloca is not
// actually live-in here.
LiveInBlockWorklist[i] = LiveInBlockWorklist.back();
@@ -710,73 +866,86 @@ ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
--i, --e;
break;
}
-
+
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- if (LI->getOperand(0) != AI) continue;
-
+ if (LI->getOperand(0) != AI)
+ continue;
+
// Okay, we found a load before a store to the alloca. It is actually
// live into this block.
break;
}
}
}
-
+
// Now that we have a set of blocks where the phi is live-in, recursively add
// their predecessors until we find the full region the value is live.
while (!LiveInBlockWorklist.empty()) {
BasicBlock *BB = LiveInBlockWorklist.pop_back_val();
-
+
// The block really is live in here, insert it into the set. If already in
// the set, then it has already been processed.
if (!LiveInBlocks.insert(BB))
continue;
-
+
// Since the value is live into BB, it is either defined in a predecessor or
// live into it to. Add the preds to the worklist unless they are a
// defining block.
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
BasicBlock *P = *PI;
-
+
// The value is not live into a predecessor if it defines the value.
if (DefBlocks.count(P))
continue;
-
+
// Otherwise it is, add to the worklist.
LiveInBlockWorklist.push_back(P);
}
}
}
-/// DetermineInsertionPoint - At this point, we're committed to promoting the
-/// alloca using IDF's, and the standard SSA construction algorithm. Determine
-/// which blocks need phi nodes and see if we can optimize out some work by
-/// avoiding insertion of dead phi nodes.
+namespace {
+typedef std::pair<DomTreeNode *, unsigned> DomTreeNodePair;
+
+struct DomTreeNodeCompare {
+ bool operator()(const DomTreeNodePair &LHS, const DomTreeNodePair &RHS) {
+ return LHS.second < RHS.second;
+ }
+};
+} // end anonymous namespace
+
+/// At this point, we're committed to promoting the alloca using IDF's, and the
+/// standard SSA construction algorithm. Determine which blocks need phi nodes
+/// and see if we can optimize out some work by avoiding insertion of dead phi
+/// nodes.
void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
AllocaInfo &Info) {
// Unique the set of defining blocks for efficient lookup.
- SmallPtrSet<BasicBlock*, 32> DefBlocks;
+ SmallPtrSet<BasicBlock *, 32> DefBlocks;
DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end());
// Determine which blocks the value is live in. These are blocks which lead
// to uses.
- SmallPtrSet<BasicBlock*, 32> LiveInBlocks;
+ SmallPtrSet<BasicBlock *, 32> LiveInBlocks;
ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks);
// Use a priority queue keyed on dominator tree level so that inserted nodes
// are handled from the bottom of the dominator tree upwards.
- typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>,
+ typedef std::priority_queue<DomTreeNodePair,
+ SmallVector<DomTreeNodePair, 32>,
DomTreeNodeCompare> IDFPriorityQueue;
IDFPriorityQueue PQ;
- for (SmallPtrSet<BasicBlock*, 32>::const_iterator I = DefBlocks.begin(),
- E = DefBlocks.end(); I != E; ++I) {
+ for (SmallPtrSet<BasicBlock *, 32>::const_iterator I = DefBlocks.begin(),
+ E = DefBlocks.end();
+ I != E; ++I) {
if (DomTreeNode *Node = DT.getNode(*I))
PQ.push(std::make_pair(Node, DomLevels[Node]));
}
- SmallVector<std::pair<unsigned, BasicBlock*>, 32> DFBlocks;
- SmallPtrSet<DomTreeNode*, 32> Visited;
- SmallVector<DomTreeNode*, 32> Worklist;
+ SmallVector<std::pair<unsigned, BasicBlock *>, 32> DFBlocks;
+ SmallPtrSet<DomTreeNode *, 32> Visited;
+ SmallVector<DomTreeNode *, 32> Worklist;
while (!PQ.empty()) {
DomTreeNodePair RootPair = PQ.top();
PQ.pop();
@@ -836,179 +1005,22 @@ void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
QueuePhiNode(DFBlocks[i].second, AllocaNum, CurrentVersion);
}
-/// RewriteSingleStoreAlloca - If there is only a single store to this value,
-/// replace any loads of it that are directly dominated by the definition with
-/// the value stored.
-void PromoteMem2Reg::RewriteSingleStoreAlloca(AllocaInst *AI,
- AllocaInfo &Info,
- LargeBlockInfo &LBI) {
- StoreInst *OnlyStore = Info.OnlyStore;
- bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0));
- BasicBlock *StoreBB = OnlyStore->getParent();
- int StoreIndex = -1;
-
- // Clear out UsingBlocks. We will reconstruct it here if needed.
- Info.UsingBlocks.clear();
-
- for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ) {
- Instruction *UserInst = cast<Instruction>(*UI++);
- if (!isa<LoadInst>(UserInst)) {
- assert(UserInst == OnlyStore && "Should only have load/stores");
- continue;
- }
- LoadInst *LI = cast<LoadInst>(UserInst);
-
- // Okay, if we have a load from the alloca, we want to replace it with the
- // only value stored to the alloca. We can do this if the value is
- // dominated by the store. If not, we use the rest of the mem2reg machinery
- // to insert the phi nodes as needed.
- if (!StoringGlobalVal) { // Non-instructions are always dominated.
- if (LI->getParent() == StoreBB) {
- // If we have a use that is in the same block as the store, compare the
- // indices of the two instructions to see which one came first. If the
- // load came before the store, we can't handle it.
- if (StoreIndex == -1)
- StoreIndex = LBI.getInstructionIndex(OnlyStore);
-
- if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) {
- // Can't handle this load, bail out.
- Info.UsingBlocks.push_back(StoreBB);
- continue;
- }
-
- } else if (LI->getParent() != StoreBB &&
- !dominates(StoreBB, LI->getParent())) {
- // If the load and store are in different blocks, use BB dominance to
- // check their relationships. If the store doesn't dom the use, bail
- // out.
- Info.UsingBlocks.push_back(LI->getParent());
- continue;
- }
- }
-
- // Otherwise, we *can* safely rewrite this load.
- Value *ReplVal = OnlyStore->getOperand(0);
- // If the replacement value is the load, this must occur in unreachable
- // code.
- if (ReplVal == LI)
- ReplVal = UndefValue::get(LI->getType());
- LI->replaceAllUsesWith(ReplVal);
- if (AST && LI->getType()->isPointerTy())
- AST->deleteValue(LI);
- LI->eraseFromParent();
- LBI.deleteValue(LI);
- }
-}
-
-namespace {
-
-/// StoreIndexSearchPredicate - This is a helper predicate used to search by the
-/// first element of a pair.
-struct StoreIndexSearchPredicate {
- bool operator()(const std::pair<unsigned, StoreInst*> &LHS,
- const std::pair<unsigned, StoreInst*> &RHS) {
- return LHS.first < RHS.first;
- }
-};
-
-}
-
-/// PromoteSingleBlockAlloca - Many allocas are only used within a single basic
-/// block. If this is the case, avoid traversing the CFG and inserting a lot of
-/// potentially useless PHI nodes by just performing a single linear pass over
-/// the basic block using the Alloca.
-///
-/// If we cannot promote this alloca (because it is read before it is written),
-/// return true. This is necessary in cases where, due to control flow, the
-/// alloca is potentially undefined on some control flow paths. e.g. code like
-/// this is potentially correct:
-///
-/// for (...) { if (c) { A = undef; undef = B; } }
-///
-/// ... so long as A is not used before undef is set.
+/// \brief Queue a phi-node to be added to a basic-block for a specific Alloca.
///
-void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info,
- LargeBlockInfo &LBI) {
- // The trickiest case to handle is when we have large blocks. Because of this,
- // this code is optimized assuming that large blocks happen. This does not
- // significantly pessimize the small block case. This uses LargeBlockInfo to
- // make it efficient to get the index of various operations in the block.
-
- // Clear out UsingBlocks. We will reconstruct it here if needed.
- Info.UsingBlocks.clear();
-
- // Walk the use-def list of the alloca, getting the locations of all stores.
- typedef SmallVector<std::pair<unsigned, StoreInst*>, 64> StoresByIndexTy;
- StoresByIndexTy StoresByIndex;
-
- for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
- UI != E; ++UI)
- if (StoreInst *SI = dyn_cast<StoreInst>(*UI))
- StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI));
-
- // If there are no stores to the alloca, just replace any loads with undef.
- if (StoresByIndex.empty()) {
- for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;)
- if (LoadInst *LI = dyn_cast<LoadInst>(*UI++)) {
- LI->replaceAllUsesWith(UndefValue::get(LI->getType()));
- if (AST && LI->getType()->isPointerTy())
- AST->deleteValue(LI);
- LBI.deleteValue(LI);
- LI->eraseFromParent();
- }
- return;
- }
-
- // Sort the stores by their index, making it efficient to do a lookup with a
- // binary search.
- std::sort(StoresByIndex.begin(), StoresByIndex.end());
-
- // Walk all of the loads from this alloca, replacing them with the nearest
- // store above them, if any.
- for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) {
- LoadInst *LI = dyn_cast<LoadInst>(*UI++);
- if (!LI) continue;
-
- unsigned LoadIdx = LBI.getInstructionIndex(LI);
-
- // Find the nearest store that has a lower than this load.
- StoresByIndexTy::iterator I =
- std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(),
- std::pair<unsigned, StoreInst*>(LoadIdx, static_cast<StoreInst*>(0)),
- StoreIndexSearchPredicate());
-
- // If there is no store before this load, then we can't promote this load.
- if (I == StoresByIndex.begin()) {
- // Can't handle this load, bail out.
- Info.UsingBlocks.push_back(LI->getParent());
- continue;
- }
-
- // Otherwise, there was a store before this load, the load takes its value.
- --I;
- LI->replaceAllUsesWith(I->second->getOperand(0));
- if (AST && LI->getType()->isPointerTy())
- AST->deleteValue(LI);
- LI->eraseFromParent();
- LBI.deleteValue(LI);
- }
-}
-
-// QueuePhiNode - queues a phi-node to be added to a basic-block for a specific
-// Alloca returns true if there wasn't already a phi-node for that variable
-//
+/// Returns true if there wasn't already a phi-node for that variable
bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
unsigned &Version) {
// Look up the basic-block in question.
PHINode *&PN = NewPhiNodes[std::make_pair(BBNumbers[BB], AllocaNo)];
// If the BB already has a phi node added for the i'th alloca then we're done!
- if (PN) return false;
+ if (PN)
+ return false;
// Create a PhiNode using the dereferenced type... and add the phi-node to the
// BasicBlock.
PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB),
- Allocas[AllocaNo]->getName() + "." + Twine(Version++),
+ Allocas[AllocaNo]->getName() + "." + Twine(Version++),
BB->begin());
++NumPHIInsert;
PhiToAllocaMap[PN] = AllocaNo;
@@ -1019,10 +1031,11 @@ bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
return true;
}
-// RenamePass - Recursively traverse the CFG of the function, renaming loads and
-// stores to the allocas which we are promoting. IncomingVals indicates what
-// value each Alloca contains on exit from the predecessor block Pred.
-//
+/// \brief Recursively traverse the CFG of the function, renaming loads and
+/// stores to the allocas which we are promoting.
+///
+/// IncomingVals indicates what value each Alloca contains on exit from the
+/// predecessor block Pred.
void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred,
RenamePassData::ValVector &IncomingVals,
std::vector<RenamePassData> &Worklist) {
@@ -1040,48 +1053,49 @@ NextIteration:
// inserted by this pass of mem2reg will have the same number of incoming
// operands so far. Remember this count.
unsigned NewPHINumOperands = APN->getNumOperands();
-
- unsigned NumEdges = 0;
- for (succ_iterator I = succ_begin(Pred), E = succ_end(Pred); I != E; ++I)
- if (*I == BB)
- ++NumEdges;
+
+ unsigned NumEdges = std::count(succ_begin(Pred), succ_end(Pred), BB);
assert(NumEdges && "Must be at least one edge from Pred to BB!");
-
+
// Add entries for all the phis.
BasicBlock::iterator PNI = BB->begin();
do {
unsigned AllocaNo = PhiToAllocaMap[APN];
-
+
// Add N incoming values to the PHI node.
for (unsigned i = 0; i != NumEdges; ++i)
APN->addIncoming(IncomingVals[AllocaNo], Pred);
-
+
// The currently active variable for this block is now the PHI.
IncomingVals[AllocaNo] = APN;
-
+
// Get the next phi node.
++PNI;
APN = dyn_cast<PHINode>(PNI);
- if (APN == 0) break;
-
+ if (APN == 0)
+ break;
+
// Verify that it is missing entries. If not, it is not being inserted
// by this mem2reg invocation so we want to ignore it.
} while (APN->getNumOperands() == NewPHINumOperands);
}
}
-
+
// Don't revisit blocks.
- if (!Visited.insert(BB)) return;
+ if (!Visited.insert(BB))
+ return;
- for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II); ) {
+ for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II);) {
Instruction *I = II++; // get the instruction, increment iterator
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand());
- if (!Src) continue;
-
- DenseMap<AllocaInst*, unsigned>::iterator AI = AllocaLookup.find(Src);
- if (AI == AllocaLookup.end()) continue;
+ if (!Src)
+ continue;
+
+ DenseMap<AllocaInst *, unsigned>::iterator AI = AllocaLookup.find(Src);
+ if (AI == AllocaLookup.end())
+ continue;
Value *V = IncomingVals[AI->second];
@@ -1094,30 +1108,29 @@ NextIteration:
// Delete this instruction and mark the name as the current holder of the
// value
AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand());
- if (!Dest) continue;
-
+ if (!Dest)
+ continue;
+
DenseMap<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest);
if (ai == AllocaLookup.end())
continue;
-
+
// what value were we writing?
IncomingVals[ai->second] = SI->getOperand(0);
// Record debuginfo for the store before removing it.
- if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second]) {
- if (!DIB)
- DIB = new DIBuilder(*SI->getParent()->getParent()->getParent());
- ConvertDebugDeclareToDebugValue(DDI, SI, *DIB);
- }
+ if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second])
+ ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
BB->getInstList().erase(SI);
}
}
// 'Recurse' to our successors.
succ_iterator I = succ_begin(BB), E = succ_end(BB);
- if (I == E) return;
+ if (I == E)
+ return;
// Keep track of the successors so we don't visit the same successor twice
- SmallPtrSet<BasicBlock*, 8> VisitedSuccs;
+ SmallPtrSet<BasicBlock *, 8> VisitedSuccs;
// Handle the first successor without using the worklist.
VisitedSuccs.insert(*I);
@@ -1132,18 +1145,19 @@ NextIteration:
goto NextIteration;
}
-/// PromoteMemToReg - Promote the specified list of alloca instructions into
-/// scalar registers, inserting PHI nodes as appropriate. This function does
-/// not modify the CFG of the function at all. All allocas must be from the
-/// same function.
-///
-/// If AST is specified, the specified tracker is updated to reflect changes
-/// made to the IR.
-///
-void llvm::PromoteMemToReg(const std::vector<AllocaInst*> &Allocas,
- DominatorTree &DT, AliasSetTracker *AST) {
+bool llvm::isAllocaPromotable(const AllocaInst *AI, const DataLayout *DL) {
+ // We cast away constness because we re-use the non-const analysis that the
+ // actual promotion routine uses. While it is non-const, it doesn't actually
+ // mutate anything at this phase, and we discard the non-const results that
+ // promotion uses to mutate the alloca.
+ return AllocaInfo(DL).analyzeAlloca(*const_cast<AllocaInst *>(AI));
+}
+
+void llvm::PromoteMemToReg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
+ const DataLayout *DL, AliasSetTracker *AST) {
// If there is nothing to do, bail out...
- if (Allocas.empty()) return;
+ if (Allocas.empty())
+ return;
- PromoteMem2Reg(Allocas, DT, AST).run();
+ PromoteMem2Reg(Allocas, DT, DL, AST).run();
}
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index 9d90fbe..fc85ef3 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -42,8 +42,6 @@ SSAUpdater::~SSAUpdater() {
delete static_cast<AvailableValsTy*>(AV);
}
-/// Initialize - Reset this object to get ready for a new set of SSA
-/// updates with type 'Ty'. PHI nodes get a name based on 'Name'.
void SSAUpdater::Initialize(Type *Ty, StringRef Name) {
if (AV == 0)
AV = new AvailableValsTy();
@@ -53,14 +51,10 @@ void SSAUpdater::Initialize(Type *Ty, StringRef Name) {
ProtoName = Name;
}
-/// HasValueForBlock - Return true if the SSAUpdater already has a value for
-/// the specified block.
bool SSAUpdater::HasValueForBlock(BasicBlock *BB) const {
return getAvailableVals(AV).count(BB);
}
-/// AddAvailableValue - Indicate that a rewritten value is available in the
-/// specified block with the specified value.
void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) {
assert(ProtoType != 0 && "Need to initialize SSAUpdater");
assert(ProtoType == V->getType() &&
@@ -68,8 +62,6 @@ void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) {
getAvailableVals(AV)[BB] = V;
}
-/// IsEquivalentPHI - Check if PHI has the same incoming value as specified
-/// in ValueMapping for each predecessor block.
static bool IsEquivalentPHI(PHINode *PHI,
DenseMap<BasicBlock*, Value*> &ValueMapping) {
unsigned PHINumValues = PHI->getNumIncomingValues();
@@ -86,32 +78,11 @@ static bool IsEquivalentPHI(PHINode *PHI,
return true;
}
-/// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is
-/// live at the end of the specified block.
Value *SSAUpdater::GetValueAtEndOfBlock(BasicBlock *BB) {
Value *Res = GetValueAtEndOfBlockInternal(BB);
return Res;
}
-/// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that
-/// is live in the middle of the specified block.
-///
-/// GetValueInMiddleOfBlock is the same as GetValueAtEndOfBlock except in one
-/// important case: if there is a definition of the rewritten value after the
-/// 'use' in BB. Consider code like this:
-///
-/// X1 = ...
-/// SomeBB:
-/// use(X)
-/// X2 = ...
-/// br Cond, SomeBB, OutBB
-///
-/// In this case, there are two values (X1 and X2) added to the AvailableVals
-/// set by the client of the rewriter, and those values are both live out of
-/// their respective blocks. However, the use of X happens in the *middle* of
-/// a block. Because of this, we need to insert a new PHI node in SomeBB to
-/// merge the appropriate values, and this value isn't live out of the block.
-///
Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
// If there is no definition of the renamed variable in this block, just use
// GetValueAtEndOfBlock to do our work.
@@ -203,8 +174,6 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
return InsertedPHI;
}
-/// RewriteUse - Rewrite a use of the symbolic value. This handles PHI nodes,
-/// which use their value in the corresponding predecessor.
void SSAUpdater::RewriteUse(Use &U) {
Instruction *User = cast<Instruction>(U.getUser());
@@ -222,10 +191,6 @@ void SSAUpdater::RewriteUse(Use &U) {
U.set(V);
}
-/// RewriteUseAfterInsertions - Rewrite a use, just like RewriteUse. However,
-/// this version of the method can rewrite uses in the same block as a
-/// definition, because it assumes that all uses of a value are below any
-/// inserted values.
void SSAUpdater::RewriteUseAfterInsertions(Use &U) {
Instruction *User = cast<Instruction>(U.getUser());
@@ -238,8 +203,6 @@ void SSAUpdater::RewriteUseAfterInsertions(Use &U) {
U.set(V);
}
-/// SSAUpdaterTraits<SSAUpdater> - Traits for the SSAUpdaterImpl template,
-/// specialized for SSAUpdater.
namespace llvm {
template<>
class SSAUpdaterTraits<SSAUpdater> {
@@ -342,10 +305,9 @@ public:
} // End llvm namespace
-/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry
-/// for the specified BB and if so, return it. If not, construct SSA form by
-/// first calculating the required placement of PHIs and then inserting new
-/// PHIs where needed.
+/// Check to see if AvailableVals has an entry for the specified BB and if so,
+/// return it. If not, construct SSA form by first calculating the required
+/// placement of PHIs and then inserting new PHIs where needed.
Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
AvailableValsTy &AvailableVals = getAvailableVals(AV);
if (Value *V = AvailableVals[BB])
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 681bf9c..c4c1423 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -40,12 +40,14 @@
#include "llvm/Support/ConstantRange.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/NoFolder.h"
+#include "llvm/Support/PatternMatch.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include <algorithm>
#include <map>
#include <set>
using namespace llvm;
+using namespace PatternMatch;
static cl::opt<unsigned>
PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(1),
@@ -59,6 +61,10 @@ static cl::opt<bool>
SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
cl::desc("Sink common instructions down to the end block"));
+static cl::opt<bool>
+HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
+ cl::desc("Hoist conditional stores if an unconditional store preceeds"));
+
STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables");
STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block");
@@ -84,7 +90,6 @@ namespace {
class SimplifyCFGOpt {
const TargetTransformInfo &TTI;
const DataLayout *const TD;
-
Value *isValueEqualityComparison(TerminatorInst *TI);
BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI,
std::vector<ValueEqualityComparisonCase> &Cases);
@@ -190,94 +195,7 @@ static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
PN->addIncoming(PN->getIncomingValueForBlock(ExistPred), NewPred);
}
-
-/// GetIfCondition - Given a basic block (BB) with two predecessors (and at
-/// least one PHI node in it), check to see if the merge at this block is due
-/// to an "if condition". If so, return the boolean condition that determines
-/// which entry into BB will be taken. Also, return by references the block
-/// that will be entered from if the condition is true, and the block that will
-/// be entered if the condition is false.
-///
-/// This does no checking to see if the true/false blocks have large or unsavory
-/// instructions in them.
-static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
- BasicBlock *&IfFalse) {
- PHINode *SomePHI = cast<PHINode>(BB->begin());
- assert(SomePHI->getNumIncomingValues() == 2 &&
- "Function can only handle blocks with 2 predecessors!");
- BasicBlock *Pred1 = SomePHI->getIncomingBlock(0);
- BasicBlock *Pred2 = SomePHI->getIncomingBlock(1);
-
- // We can only handle branches. Other control flow will be lowered to
- // branches if possible anyway.
- BranchInst *Pred1Br = dyn_cast<BranchInst>(Pred1->getTerminator());
- BranchInst *Pred2Br = dyn_cast<BranchInst>(Pred2->getTerminator());
- if (Pred1Br == 0 || Pred2Br == 0)
- return 0;
-
- // Eliminate code duplication by ensuring that Pred1Br is conditional if
- // either are.
- if (Pred2Br->isConditional()) {
- // If both branches are conditional, we don't have an "if statement". In
- // reality, we could transform this case, but since the condition will be
- // required anyway, we stand no chance of eliminating it, so the xform is
- // probably not profitable.
- if (Pred1Br->isConditional())
- return 0;
-
- std::swap(Pred1, Pred2);
- std::swap(Pred1Br, Pred2Br);
- }
-
- if (Pred1Br->isConditional()) {
- // The only thing we have to watch out for here is to make sure that Pred2
- // doesn't have incoming edges from other blocks. If it does, the condition
- // doesn't dominate BB.
- if (Pred2->getSinglePredecessor() == 0)
- return 0;
-
- // If we found a conditional branch predecessor, make sure that it branches
- // to BB and Pred2Br. If it doesn't, this isn't an "if statement".
- if (Pred1Br->getSuccessor(0) == BB &&
- Pred1Br->getSuccessor(1) == Pred2) {
- IfTrue = Pred1;
- IfFalse = Pred2;
- } else if (Pred1Br->getSuccessor(0) == Pred2 &&
- Pred1Br->getSuccessor(1) == BB) {
- IfTrue = Pred2;
- IfFalse = Pred1;
- } else {
- // We know that one arm of the conditional goes to BB, so the other must
- // go somewhere unrelated, and this must not be an "if statement".
- return 0;
- }
-
- return Pred1Br->getCondition();
- }
-
- // Ok, if we got here, both predecessors end with an unconditional branch to
- // BB. Don't panic! If both blocks only have a single (identical)
- // predecessor, and THAT is a conditional branch, then we're all ok!
- BasicBlock *CommonPred = Pred1->getSinglePredecessor();
- if (CommonPred == 0 || CommonPred != Pred2->getSinglePredecessor())
- return 0;
-
- // Otherwise, if this is a conditional branch, then we can use it!
- BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator());
- if (BI == 0) return 0;
-
- assert(BI->isConditional() && "Two successors but not conditional?");
- if (BI->getSuccessor(0) == Pred1) {
- IfTrue = Pred1;
- IfFalse = Pred2;
- } else {
- IfTrue = Pred2;
- IfFalse = Pred1;
- }
- return BI->getCondition();
-}
-
-/// ComputeSpeculuationCost - Compute an abstract "cost" of speculating the
+/// ComputeSpeculationCost - Compute an abstract "cost" of speculating the
/// given instruction, which is assumed to be safe to speculate. 1 means
/// cheap, 2 means less cheap, and UINT_MAX means prohibitively expensive.
static unsigned ComputeSpeculationCost(const User *I) {
@@ -428,7 +346,24 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
// If this is an icmp against a constant, handle this as one of the cases.
if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
if (ConstantInt *C = GetConstantInt(I->getOperand(1), TD)) {
+ Value *RHSVal;
+ ConstantInt *RHSC;
+
if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ:ICmpInst::ICMP_NE)) {
+ // (x & ~2^x) == y --> x == y || x == y|2^x
+ // This undoes a transformation done by instcombine to fuse 2 compares.
+ if (match(ICI->getOperand(0),
+ m_And(m_Value(RHSVal), m_ConstantInt(RHSC)))) {
+ APInt Not = ~RHSC->getValue();
+ if (Not.isPowerOf2()) {
+ Vals.push_back(C);
+ Vals.push_back(
+ ConstantInt::get(C->getContext(), C->getValue() | Not));
+ UsedICmps++;
+ return RHSVal;
+ }
+ }
+
UsedICmps++;
Vals.push_back(C);
return I->getOperand(0);
@@ -439,6 +374,13 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
ConstantRange Span =
ConstantRange::makeICmpRegion(ICI->getPredicate(), C->getValue());
+ // Shift the range if the compare is fed by an add. This is the range
+ // compare idiom as emitted by instcombine.
+ bool hasAdd =
+ match(I->getOperand(0), m_Add(m_Value(RHSVal), m_ConstantInt(RHSC)));
+ if (hasAdd)
+ Span = Span.subtract(RHSC->getValue());
+
// If this is an and/!= check then we want to optimize "x ugt 2" into
// x != 0 && x != 1.
if (!isEQ)
@@ -451,7 +393,7 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
Vals.push_back(ConstantInt::get(V->getContext(), Tmp));
UsedICmps++;
- return I->getOperand(0);
+ return hasAdd ? RHSVal : I->getOperand(0);
}
return 0;
}
@@ -529,9 +471,7 @@ Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) {
} else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
if (BI->isConditional() && BI->getCondition()->hasOneUse())
if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition()))
- if ((ICI->getPredicate() == ICmpInst::ICMP_EQ ||
- ICI->getPredicate() == ICmpInst::ICMP_NE) &&
- GetConstantInt(ICI->getOperand(1), TD))
+ if (ICI->isEquality() && GetConstantInt(ICI->getOperand(1), TD))
CV = ICI->getOperand(0);
// Unwrap any lossless ptrtoint cast.
@@ -1079,9 +1019,9 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) {
(isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
return false;
- // If we get here, we can hoist at least one instruction.
BasicBlock *BIParent = BI->getParent();
+ bool Changed = false;
do {
// If we are hoisting the terminator instruction, don't move one (making a
// broken BB), instead clone it, and remove BI.
@@ -1096,6 +1036,7 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) {
I2->replaceAllUsesWith(I1);
I1->intersectOptionalDataWith(I2);
I2->eraseFromParent();
+ Changed = true;
I1 = BB1_Itr++;
I2 = BB2_Itr++;
@@ -1115,7 +1056,23 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) {
HoistTerminator:
// It may not be possible to hoist an invoke.
if (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))
- return true;
+ return Changed;
+
+ for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) {
+ PHINode *PN;
+ for (BasicBlock::iterator BBI = SI->begin();
+ (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
+ Value *BB1V = PN->getIncomingValueForBlock(BB1);
+ Value *BB2V = PN->getIncomingValueForBlock(BB2);
+ if (BB1V == BB2V)
+ continue;
+
+ if (isa<ConstantExpr>(BB1V) && !isSafeToSpeculativelyExecute(BB1V))
+ return Changed;
+ if (isa<ConstantExpr>(BB2V) && !isSafeToSpeculativelyExecute(BB2V))
+ return Changed;
+ }
+ }
// Okay, it is safe to hoist the terminator.
Instruction *NT = I1->clone();
@@ -1332,6 +1289,66 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
return Changed;
}
+/// \brief Determine if we can hoist sink a sole store instruction out of a
+/// conditional block.
+///
+/// We are looking for code like the following:
+/// BrBB:
+/// store i32 %add, i32* %arrayidx2
+/// ... // No other stores or function calls (we could be calling a memory
+/// ... // function).
+/// %cmp = icmp ult %x, %y
+/// br i1 %cmp, label %EndBB, label %ThenBB
+/// ThenBB:
+/// store i32 %add5, i32* %arrayidx2
+/// br label EndBB
+/// EndBB:
+/// ...
+/// We are going to transform this into:
+/// BrBB:
+/// store i32 %add, i32* %arrayidx2
+/// ... //
+/// %cmp = icmp ult %x, %y
+/// %add.add5 = select i1 %cmp, i32 %add, %add5
+/// store i32 %add.add5, i32* %arrayidx2
+/// ...
+///
+/// \return The pointer to the value of the previous store if the store can be
+/// hoisted into the predecessor block. 0 otherwise.
+static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
+ BasicBlock *StoreBB, BasicBlock *EndBB) {
+ StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
+ if (!StoreToHoist)
+ return 0;
+
+ // Volatile or atomic.
+ if (!StoreToHoist->isSimple())
+ return 0;
+
+ Value *StorePtr = StoreToHoist->getPointerOperand();
+
+ // Look for a store to the same pointer in BrBB.
+ unsigned MaxNumInstToLookAt = 10;
+ for (BasicBlock::reverse_iterator RI = BrBB->rbegin(),
+ RE = BrBB->rend(); RI != RE && (--MaxNumInstToLookAt); ++RI) {
+ Instruction *CurI = &*RI;
+
+ // Could be calling an instruction that effects memory like free().
+ if (CurI->mayHaveSideEffects() && !isa<StoreInst>(CurI))
+ return 0;
+
+ StoreInst *SI = dyn_cast<StoreInst>(CurI);
+ // Found the previous store make sure it stores to the same location.
+ if (SI && SI->getPointerOperand() == StorePtr)
+ // Found the previous store, return its value operand.
+ return SI->getValueOperand();
+ else if (SI)
+ return 0; // Unknown store.
+ }
+
+ return 0;
+}
+
/// \brief Speculate a conditional basic block flattening the CFG.
///
/// Note that this is a very risky transform currently. Speculating
@@ -1395,6 +1412,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
unsigned SpeculationCost = 0;
+ Value *SpeculatedStoreValue = 0;
+ StoreInst *SpeculatedStore = 0;
for (BasicBlock::iterator BBI = ThenBB->begin(),
BBE = llvm::prior(ThenBB->end());
BBI != BBE; ++BBI) {
@@ -1410,13 +1429,21 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
return false;
// Don't hoist the instruction if it's unsafe or expensive.
- if (!isSafeToSpeculativelyExecute(I))
+ if (!isSafeToSpeculativelyExecute(I) &&
+ !(HoistCondStores &&
+ (SpeculatedStoreValue = isSafeToSpeculateStore(I, BB, ThenBB,
+ EndBB))))
return false;
- if (ComputeSpeculationCost(I) > PHINodeFoldingThreshold)
+ if (!SpeculatedStoreValue &&
+ ComputeSpeculationCost(I) > PHINodeFoldingThreshold)
return false;
+ // Store the store speculation candidate.
+ if (SpeculatedStoreValue)
+ SpeculatedStore = cast<StoreInst>(I);
+
// Do not hoist the instruction if any of its operands are defined but not
- // used in this BB. The transformation will prevent the operand from
+ // used in BB. The transformation will prevent the operand from
// being sunk into the use block.
for (User::op_iterator i = I->op_begin(), e = I->op_end();
i != e; ++i) {
@@ -1448,18 +1475,23 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
Value *OrigV = PN->getIncomingValueForBlock(BB);
Value *ThenV = PN->getIncomingValueForBlock(ThenBB);
+ // FIXME: Try to remove some of the duplication with HoistThenElseCodeToIf.
// Skip PHIs which are trivial.
if (ThenV == OrigV)
continue;
HaveRewritablePHIs = true;
- ConstantExpr *CE = dyn_cast<ConstantExpr>(ThenV);
- if (!CE)
+ ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
+ ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
+ if (!OrigCE && !ThenCE)
continue; // Known safe and cheap.
- if (!isSafeToSpeculativelyExecute(CE))
+ if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) ||
+ (OrigCE && !isSafeToSpeculativelyExecute(OrigCE)))
return false;
- if (ComputeSpeculationCost(CE) > PHINodeFoldingThreshold)
+ unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE) : 0;
+ unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE) : 0;
+ if (OrigCost + ThenCost > 2 * PHINodeFoldingThreshold)
return false;
// Account for the cost of an unfolded ConstantExpr which could end up
@@ -1473,12 +1505,24 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
// If there are no PHIs to process, bail early. This helps ensure idempotence
// as well.
- if (!HaveRewritablePHIs)
+ if (!HaveRewritablePHIs && !(HoistCondStores && SpeculatedStoreValue))
return false;
// If we get here, we can hoist the instruction and if-convert.
DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
+ // Insert a select of the value of the speculated store.
+ if (SpeculatedStoreValue) {
+ IRBuilder<true, NoFolder> Builder(BI);
+ Value *TrueV = SpeculatedStore->getValueOperand();
+ Value *FalseV = SpeculatedStoreValue;
+ if (Invert)
+ std::swap(TrueV, FalseV);
+ Value *S = Builder.CreateSelect(BrCond, TrueV, FalseV, TrueV->getName() +
+ "." + FalseV->getName());
+ SpeculatedStore->setOperand(0, S);
+ }
+
// Hoist the instructions.
BB->getInstList().splice(BI, ThenBB->getInstList(), ThenBB->begin(),
llvm::prior(ThenBB->end()));
@@ -3073,7 +3117,12 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
Value *Sub = SI->getCondition();
if (!Offset->isNullValue())
Sub = Builder.CreateAdd(Sub, Offset, Sub->getName()+".off");
- Value *Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
+ Value *Cmp;
+ // If NumCases overflowed, then all possible values jump to the successor.
+ if (NumCases->isNullValue() && SI->getNumCases() != 0)
+ Cmp = ConstantInt::getTrue(SI->getContext());
+ else
+ Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
BranchInst *NewBI = Builder.CreateCondBr(
Cmp, SI->case_begin().getCaseSuccessor(), SI->getDefaultDest());
@@ -3216,7 +3265,7 @@ static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
for (ForwardingNodesMap::iterator I = ForwardingNodes.begin(),
E = ForwardingNodes.end(); I != E; ++I) {
PHINode *Phi = I->first;
- SmallVector<int,4> &Indexes = I->second;
+ SmallVectorImpl<int> &Indexes = I->second;
if (Indexes.size() < 2) continue;
@@ -3301,11 +3350,12 @@ static Constant *ConstantFold(Instruction *I,
/// at the common destination basic block, *CommonDest, for one of the case
/// destionations CaseDest corresponding to value CaseVal (0 for the default
/// case), of a switch instruction SI.
-static bool GetCaseResults(SwitchInst *SI,
- ConstantInt *CaseVal,
- BasicBlock *CaseDest,
- BasicBlock **CommonDest,
- SmallVector<std::pair<PHINode*,Constant*>, 4> &Res) {
+static bool
+GetCaseResults(SwitchInst *SI,
+ ConstantInt *CaseVal,
+ BasicBlock *CaseDest,
+ BasicBlock **CommonDest,
+ SmallVectorImpl<std::pair<PHINode*,Constant*> > &Res) {
// The block from which we enter the common destination.
BasicBlock *Pred = SI->getParent();
@@ -3378,7 +3428,7 @@ namespace {
SwitchLookupTable(Module &M,
uint64_t TableSize,
ConstantInt *Offset,
- const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values,
+ const SmallVectorImpl<std::pair<ConstantInt*, Constant*> >& Values,
Constant *DefaultValue,
const DataLayout *TD);
@@ -3425,7 +3475,7 @@ namespace {
SwitchLookupTable::SwitchLookupTable(Module &M,
uint64_t TableSize,
ConstantInt *Offset,
- const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values,
+ const SmallVectorImpl<std::pair<ConstantInt*, Constant*> >& Values,
Constant *DefaultValue,
const DataLayout *TD)
: SingleValue(0), BitMap(0), BitMapElementTy(0), Array(0) {
@@ -3552,7 +3602,7 @@ bool SwitchLookupTable::WouldFitInRegister(const DataLayout *TD,
}
/// ShouldBuildLookupTable - Determine whether a lookup table should be built
-/// for this switch, based on the number of caes, size of the table and the
+/// for this switch, based on the number of cases, size of the table and the
/// types of the results.
static bool ShouldBuildLookupTable(SwitchInst *SI,
uint64_t TableSize,
diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp
index 41c207c..bf3442a 100644
--- a/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -119,7 +119,7 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand)
return 0;
D = ConstantInt::get(UseInst->getContext(),
- APInt(BitWidth, 1).shl(D->getZExtValue()));
+ APInt::getOneBitSet(BitWidth, D->getZExtValue()));
}
FoldedExpr = SE->getUDivExpr(SE->getSCEV(IVSrc), SE->getSCEV(D));
}
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index c231704..094c201 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -1518,6 +1518,12 @@ struct FPrintFOpt : public LibCallOptimization {
if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
return 0;
+ // Do not do any of the following transformations if the fprintf return
+ // value is used, in general the fprintf return value is not compatible
+ // with fwrite(), fputc() or fputs().
+ if (!CI->use_empty())
+ return 0;
+
// fprintf(F, "foo") --> fwrite("foo", 3, 1, F)
if (CI->getNumArgOperands() == 2) {
for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
@@ -1527,11 +1533,10 @@ struct FPrintFOpt : public LibCallOptimization {
// These optimizations require DataLayout.
if (!TD) return 0;
- Value *NewCI = EmitFWrite(CI->getArgOperand(1),
- ConstantInt::get(TD->getIntPtrType(*Context),
- FormatStr.size()),
- CI->getArgOperand(0), B, TD, TLI);
- return NewCI ? ConstantInt::get(CI->getType(), FormatStr.size()) : 0;
+ return EmitFWrite(CI->getArgOperand(1),
+ ConstantInt::get(TD->getIntPtrType(*Context),
+ FormatStr.size()),
+ CI->getArgOperand(0), B, TD, TLI);
}
// The remaining optimizations require the format string to be "%s" or "%c"
@@ -1544,14 +1549,12 @@ struct FPrintFOpt : public LibCallOptimization {
if (FormatStr[1] == 'c') {
// fprintf(F, "%c", chr) --> fputc(chr, F)
if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
- Value *NewCI = EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B,
- TD, TLI);
- return NewCI ? ConstantInt::get(CI->getType(), 1) : 0;
+ return EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI);
}
if (FormatStr[1] == 's') {
// fprintf(F, "%s", str) --> fputs(str, F)
- if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty())
+ if (!CI->getArgOperand(2)->getType()->isPointerTy())
return 0;
return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI);
}
@@ -1937,7 +1940,7 @@ LibCallSimplifier::~LibCallSimplifier() {
}
Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
- if (CI->hasFnAttr(Attribute::NoBuiltin)) return 0;
+ if (CI->isNoBuiltin()) return 0;
return Impl->optimizeCall(CI);
}
@@ -1947,3 +1950,53 @@ void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) const {
}
}
+
+// TODO:
+// Additional cases that we need to add to this file:
+//
+// cbrt:
+// * cbrt(expN(X)) -> expN(x/3)
+// * cbrt(sqrt(x)) -> pow(x,1/6)
+// * cbrt(sqrt(x)) -> pow(x,1/9)
+//
+// exp, expf, expl:
+// * exp(log(x)) -> x
+//
+// log, logf, logl:
+// * log(exp(x)) -> x
+// * log(x**y) -> y*log(x)
+// * log(exp(y)) -> y*log(e)
+// * log(exp2(y)) -> y*log(2)
+// * log(exp10(y)) -> y*log(10)
+// * log(sqrt(x)) -> 0.5*log(x)
+// * log(pow(x,y)) -> y*log(x)
+//
+// lround, lroundf, lroundl:
+// * lround(cnst) -> cnst'
+//
+// pow, powf, powl:
+// * pow(exp(x),y) -> exp(x*y)
+// * pow(sqrt(x),y) -> pow(x,y*0.5)
+// * pow(pow(x,y),z)-> pow(x,y*z)
+//
+// round, roundf, roundl:
+// * round(cnst) -> cnst'
+//
+// signbit:
+// * signbit(cnst) -> cnst'
+// * signbit(nncst) -> 0 (if pstv is a non-negative constant)
+//
+// sqrt, sqrtf, sqrtl:
+// * sqrt(expN(x)) -> expN(x*0.5)
+// * sqrt(Nroot(x)) -> pow(x,1/(2*N))
+// * sqrt(pow(x,y)) -> pow(|x|,y*0.5)
+//
+// strchr:
+// * strchr(p, 0) -> strlen(p)
+// tan, tanf, tanl:
+// * tan(atan(x)) -> x
+//
+// trunc, truncf, truncl:
+// * trunc(cnst) -> cnst'
+//
+//
diff --git a/lib/Transforms/Utils/SpecialCaseList.cpp b/lib/Transforms/Utils/SpecialCaseList.cpp
new file mode 100644
index 0000000..b98cb5b
--- /dev/null
+++ b/lib/Transforms/Utils/SpecialCaseList.cpp
@@ -0,0 +1,225 @@
+//===-- SpecialCaseList.cpp - special case list for sanitizers ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a utility class for instrumentation passes (like AddressSanitizer
+// or ThreadSanitizer) to avoid instrumenting some functions or global
+// variables, or to instrument some functions or global variables in a specific
+// way, based on a user-supplied list.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/SpecialCaseList.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+#include <string>
+#include <utility>
+
+namespace llvm {
+
+/// Represents a set of regular expressions. Regular expressions which are
+/// "literal" (i.e. no regex metacharacters) are stored in Strings, while all
+/// others are represented as a single pipe-separated regex in RegEx. The
+/// reason for doing so is efficiency; StringSet is much faster at matching
+/// literal strings than Regex.
+struct SpecialCaseList::Entry {
+ StringSet<> Strings;
+ Regex *RegEx;
+
+ Entry() : RegEx(0) {}
+
+ bool match(StringRef Query) const {
+ return Strings.count(Query) || (RegEx && RegEx->match(Query));
+ }
+};
+
+SpecialCaseList::SpecialCaseList(const StringRef Path) {
+ // Validate and open blacklist file.
+ if (Path.empty()) return;
+ OwningPtr<MemoryBuffer> File;
+ if (error_code EC = MemoryBuffer::getFile(Path, File)) {
+ report_fatal_error("Can't open blacklist file: " + Path + ": " +
+ EC.message());
+ }
+
+ init(File.get());
+}
+
+SpecialCaseList::SpecialCaseList(const MemoryBuffer *MB) {
+ init(MB);
+}
+
+void SpecialCaseList::init(const MemoryBuffer *MB) {
+ // Iterate through each line in the blacklist file.
+ SmallVector<StringRef, 16> Lines;
+ SplitString(MB->getBuffer(), Lines, "\n\r");
+ StringMap<StringMap<std::string> > Regexps;
+ for (SmallVectorImpl<StringRef>::iterator I = Lines.begin(), E = Lines.end();
+ I != E; ++I) {
+ // Ignore empty lines and lines starting with "#"
+ if (I->empty() || I->startswith("#"))
+ continue;
+ // Get our prefix and unparsed regexp.
+ std::pair<StringRef, StringRef> SplitLine = I->split(":");
+ StringRef Prefix = SplitLine.first;
+ if (SplitLine.second.empty()) {
+ // Missing ':' in the line.
+ report_fatal_error("malformed blacklist line: " + SplitLine.first);
+ }
+
+ std::pair<StringRef, StringRef> SplitRegexp = SplitLine.second.split("=");
+ std::string Regexp = SplitRegexp.first;
+ StringRef Category = SplitRegexp.second;
+
+ // Backwards compatibility.
+ if (Prefix == "global-init") {
+ Prefix = "global";
+ Category = "init";
+ } else if (Prefix == "global-init-type") {
+ Prefix = "type";
+ Category = "init";
+ } else if (Prefix == "global-init-src") {
+ Prefix = "src";
+ Category = "init";
+ }
+
+ // See if we can store Regexp in Strings.
+ if (Regex::isLiteralERE(Regexp)) {
+ Entries[Prefix][Category].Strings.insert(Regexp);
+ continue;
+ }
+
+ // Replace * with .*
+ for (size_t pos = 0; (pos = Regexp.find("*", pos)) != std::string::npos;
+ pos += strlen(".*")) {
+ Regexp.replace(pos, strlen("*"), ".*");
+ }
+
+ // Check that the regexp is valid.
+ Regex CheckRE(Regexp);
+ std::string Error;
+ if (!CheckRE.isValid(Error)) {
+ report_fatal_error("malformed blacklist regex: " + SplitLine.second +
+ ": " + Error);
+ }
+
+ // Add this regexp into the proper group by its prefix.
+ if (!Regexps[Prefix][Category].empty())
+ Regexps[Prefix][Category] += "|";
+ Regexps[Prefix][Category] += "^" + Regexp + "$";
+ }
+
+ // Iterate through each of the prefixes, and create Regexs for them.
+ for (StringMap<StringMap<std::string> >::const_iterator I = Regexps.begin(),
+ E = Regexps.end();
+ I != E; ++I) {
+ for (StringMap<std::string>::const_iterator II = I->second.begin(),
+ IE = I->second.end();
+ II != IE; ++II) {
+ Entries[I->getKey()][II->getKey()].RegEx = new Regex(II->getValue());
+ }
+ }
+}
+
+SpecialCaseList::~SpecialCaseList() {
+ for (StringMap<StringMap<Entry> >::iterator I = Entries.begin(),
+ E = Entries.end();
+ I != E; ++I) {
+ for (StringMap<Entry>::const_iterator II = I->second.begin(),
+ IE = I->second.end();
+ II != IE; ++II) {
+ delete II->second.RegEx;
+ }
+ }
+}
+
+bool SpecialCaseList::findCategory(const Function &F,
+ StringRef &Category) const {
+ return findCategory(*F.getParent(), Category) ||
+ findCategory("fun", F.getName(), Category);
+}
+
+bool SpecialCaseList::isIn(const Function& F, const StringRef Category) const {
+ return isIn(*F.getParent(), Category) ||
+ inSectionCategory("fun", F.getName(), Category);
+}
+
+static StringRef GetGVTypeString(const GlobalVariable &G) {
+ // Types of GlobalVariables are always pointer types.
+ Type *GType = G.getType()->getElementType();
+ // For now we support blacklisting struct types only.
+ if (StructType *SGType = dyn_cast<StructType>(GType)) {
+ if (!SGType->isLiteral())
+ return SGType->getName();
+ }
+ return "<unknown type>";
+}
+
+bool SpecialCaseList::findCategory(const GlobalVariable &G,
+ StringRef &Category) const {
+ return findCategory(*G.getParent(), Category) ||
+ findCategory("global", G.getName(), Category) ||
+ findCategory("type", GetGVTypeString(G), Category);
+}
+
+bool SpecialCaseList::isIn(const GlobalVariable &G,
+ const StringRef Category) const {
+ return isIn(*G.getParent(), Category) ||
+ inSectionCategory("global", G.getName(), Category) ||
+ inSectionCategory("type", GetGVTypeString(G), Category);
+}
+
+bool SpecialCaseList::findCategory(const Module &M, StringRef &Category) const {
+ return findCategory("src", M.getModuleIdentifier(), Category);
+}
+
+bool SpecialCaseList::isIn(const Module &M, const StringRef Category) const {
+ return inSectionCategory("src", M.getModuleIdentifier(), Category);
+}
+
+bool SpecialCaseList::findCategory(const StringRef Section,
+ const StringRef Query,
+ StringRef &Category) const {
+ StringMap<StringMap<Entry> >::const_iterator I = Entries.find(Section);
+ if (I == Entries.end()) return false;
+
+ for (StringMap<Entry>::const_iterator II = I->second.begin(),
+ IE = I->second.end();
+ II != IE; ++II) {
+ if (II->getValue().match(Query)) {
+ Category = II->first();
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool SpecialCaseList::inSectionCategory(const StringRef Section,
+ const StringRef Query,
+ const StringRef Category) const {
+ StringMap<StringMap<Entry> >::const_iterator I = Entries.find(Section);
+ if (I == Entries.end()) return false;
+ StringMap<Entry>::const_iterator II = I->second.find(Category);
+ if (II == I->second.end()) return false;
+
+ return II->getValue().match(Query);
+}
+
+} // namespace llvm
diff --git a/lib/Transforms/Utils/Utils.cpp b/lib/Transforms/Utils/Utils.cpp
index 5812d46..c3df215 100644
--- a/lib/Transforms/Utils/Utils.cpp
+++ b/lib/Transforms/Utils/Utils.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/InitializePasses.h"
+#include "llvm/PassRegistry.h"
#include "llvm-c/Initialization.h"
using namespace llvm;
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index b5941bd..457fc80 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -22,14 +22,22 @@ using namespace llvm;
// Out of line method to get vtable etc for class.
void ValueMapTypeRemapper::anchor() {}
+void ValueMaterializer::anchor() {}
Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
- ValueMapTypeRemapper *TypeMapper) {
+ ValueMapTypeRemapper *TypeMapper,
+ ValueMaterializer *Materializer) {
ValueToValueMapTy::iterator I = VM.find(V);
// If the value already exists in the map, use it.
if (I != VM.end() && I->second) return I->second;
+ // If we have a materializer and it can materialize a value, use that.
+ if (Materializer) {
+ if (Value *NewV = Materializer->materializeValueFor(const_cast<Value*>(V)))
+ return VM[V] = NewV;
+ }
+
// Global values do not need to be seeded into the VM if they
// are using the identity mapping.
if (isa<GlobalValue>(V) || isa<MDString>(V))
@@ -57,14 +65,14 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
return VM[V] = const_cast<Value*>(V);
// Create a dummy node in case we have a metadata cycle.
- MDNode *Dummy = MDNode::getTemporary(V->getContext(), ArrayRef<Value*>());
+ MDNode *Dummy = MDNode::getTemporary(V->getContext(), None);
VM[V] = Dummy;
// Check all operands to see if any need to be remapped.
for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) {
Value *OP = MD->getOperand(i);
if (OP == 0) continue;
- Value *Mapped_OP = MapValue(OP, VM, Flags, TypeMapper);
+ Value *Mapped_OP = MapValue(OP, VM, Flags, TypeMapper, Materializer);
// Use identity map if Mapped_Op is null and we can ignore missing
// entries.
if (Mapped_OP == OP ||
@@ -79,7 +87,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
if (Op == 0)
Elts.push_back(0);
else {
- Value *Mapped_Op = MapValue(Op, VM, Flags, TypeMapper);
+ Value *Mapped_Op = MapValue(Op, VM, Flags, TypeMapper, Materializer);
// Use identity map if Mapped_Op is null and we can ignore missing
// entries.
if (Mapped_Op == 0 && (Flags & RF_IgnoreMissingEntries))
@@ -109,9 +117,9 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) {
Function *F =
- cast<Function>(MapValue(BA->getFunction(), VM, Flags, TypeMapper));
+ cast<Function>(MapValue(BA->getFunction(), VM, Flags, TypeMapper, Materializer));
BasicBlock *BB = cast_or_null<BasicBlock>(MapValue(BA->getBasicBlock(), VM,
- Flags, TypeMapper));
+ Flags, TypeMapper, Materializer));
return VM[V] = BlockAddress::get(F, BB ? BB : BA->getBasicBlock());
}
@@ -121,7 +129,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
Value *Mapped = 0;
for (; OpNo != NumOperands; ++OpNo) {
Value *Op = C->getOperand(OpNo);
- Mapped = MapValue(Op, VM, Flags, TypeMapper);
+ Mapped = MapValue(Op, VM, Flags, TypeMapper, Materializer);
if (Mapped != C) break;
}
@@ -149,7 +157,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
// Map the rest of the operands that aren't processed yet.
for (++OpNo; OpNo != NumOperands; ++OpNo)
Ops.push_back(MapValue(cast<Constant>(C->getOperand(OpNo)), VM,
- Flags, TypeMapper));
+ Flags, TypeMapper, Materializer));
}
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
@@ -173,10 +181,11 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
/// current values into those specified by VMap.
///
void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
- RemapFlags Flags, ValueMapTypeRemapper *TypeMapper){
+ RemapFlags Flags, ValueMapTypeRemapper *TypeMapper,
+ ValueMaterializer *Materializer){
// Remap operands.
for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) {
- Value *V = MapValue(*op, VMap, Flags, TypeMapper);
+ Value *V = MapValue(*op, VMap, Flags, TypeMapper, Materializer);
// If we aren't ignoring missing entries, assert that something happened.
if (V != 0)
*op = V;
@@ -204,7 +213,7 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator
MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI) {
MDNode *Old = MI->second;
- MDNode *New = MapValue(Old, VMap, Flags, TypeMapper);
+ MDNode *New = MapValue(Old, VMap, Flags, TypeMapper, Materializer);
if (New != Old)
I->setMetadata(MI->first, New);
}