aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Transforms/Utils
diff options
context:
space:
mode:
authorStephen Hines <srhines@google.com>2015-03-23 12:10:34 -0700
committerStephen Hines <srhines@google.com>2015-03-23 12:10:34 -0700
commitebe69fe11e48d322045d5949c83283927a0d790b (patch)
treec92f1907a6b8006628a4b01615f38264d29834ea /lib/Transforms/Utils
parentb7d2e72b02a4cb8034f32f8247a2558d2434e121 (diff)
downloadexternal_llvm-ebe69fe11e48d322045d5949c83283927a0d790b.zip
external_llvm-ebe69fe11e48d322045d5949c83283927a0d790b.tar.gz
external_llvm-ebe69fe11e48d322045d5949c83283927a0d790b.tar.bz2
Update aosp/master LLVM for rebase to r230699.
Change-Id: I2b5be30509658cb8266be782de0ab24f9099f9b9
Diffstat (limited to 'lib/Transforms/Utils')
-rw-r--r--lib/Transforms/Utils/ASanStackFrameLayout.cpp8
-rw-r--r--lib/Transforms/Utils/AddDiscriminators.cpp2
-rw-r--r--lib/Transforms/Utils/Android.mk1
-rw-r--r--lib/Transforms/Utils/BasicBlockUtils.cpp211
-rw-r--r--lib/Transforms/Utils/BreakCriticalEdges.cpp63
-rw-r--r--lib/Transforms/Utils/BuildLibCalls.cpp134
-rw-r--r--lib/Transforms/Utils/CMakeLists.txt5
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp158
-rw-r--r--lib/Transforms/Utils/CloneModule.cpp4
-rw-r--r--lib/Transforms/Utils/DemoteRegToStack.cpp34
-rw-r--r--lib/Transforms/Utils/InlineFunction.cpp136
-rw-r--r--lib/Transforms/Utils/LCSSA.cpp55
-rw-r--r--lib/Transforms/Utils/LLVMBuild.txt2
-rw-r--r--lib/Transforms/Utils/Local.cpp85
-rw-r--r--lib/Transforms/Utils/LoopSimplify.cpp91
-rw-r--r--lib/Transforms/Utils/LoopUnroll.cpp40
-rw-r--r--lib/Transforms/Utils/LoopUnrollRuntime.cpp104
-rw-r--r--lib/Transforms/Utils/LowerExpectIntrinsic.cpp188
-rw-r--r--lib/Transforms/Utils/LowerSwitch.cpp272
-rw-r--r--lib/Transforms/Utils/Mem2Reg.cpp11
-rw-r--r--lib/Transforms/Utils/PromoteMemoryToRegister.cpp19
-rw-r--r--lib/Transforms/Utils/SSAUpdater.cpp41
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp600
-rw-r--r--lib/Transforms/Utils/SimplifyIndVar.cpp129
-rw-r--r--lib/Transforms/Utils/SimplifyInstructions.cpp20
-rw-r--r--lib/Transforms/Utils/SimplifyLibCalls.cpp677
-rw-r--r--lib/Transforms/Utils/SymbolRewriter.cpp33
-rw-r--r--lib/Transforms/Utils/UnifyFunctionExitNodes.cpp1
-rw-r--r--lib/Transforms/Utils/ValueMapper.cpp255
29 files changed, 1899 insertions, 1480 deletions
diff --git a/lib/Transforms/Utils/ASanStackFrameLayout.cpp b/lib/Transforms/Utils/ASanStackFrameLayout.cpp
index cce016a..03c3a80 100644
--- a/lib/Transforms/Utils/ASanStackFrameLayout.cpp
+++ b/lib/Transforms/Utils/ASanStackFrameLayout.cpp
@@ -13,6 +13,7 @@
#include "llvm/Transforms/Utils/ASanStackFrameLayout.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/MathExtras.h"
#include <algorithm>
namespace llvm {
@@ -33,11 +34,6 @@ static inline bool CompareVars(const ASanStackVariableDescription &a,
// with e.g. alignment 1 and alignment 16 do not get reordered by CompareVars.
static const size_t kMinAlignment = 16;
-static size_t RoundUpTo(size_t X, size_t RoundTo) {
- assert((RoundTo & (RoundTo - 1)) == 0);
- return (X + RoundTo - 1) & ~(RoundTo - 1);
-}
-
// The larger the variable Size the larger is the redzone.
// The resulting frame size is a multiple of Alignment.
static size_t VarAndRedzoneSize(size_t Size, size_t Alignment) {
@@ -48,7 +44,7 @@ static size_t VarAndRedzoneSize(size_t Size, size_t Alignment) {
else if (Size <= 512) Res = Size + 64;
else if (Size <= 4096) Res = Size + 128;
else Res = Size + 256;
- return RoundUpTo(Res, Alignment);
+ return RoundUpToAlignment(Res, Alignment);
}
void
diff --git a/lib/Transforms/Utils/AddDiscriminators.cpp b/lib/Transforms/Utils/AddDiscriminators.cpp
index f8e5af5..820544b 100644
--- a/lib/Transforms/Utils/AddDiscriminators.cpp
+++ b/lib/Transforms/Utils/AddDiscriminators.cpp
@@ -167,7 +167,7 @@ bool AddDiscriminators::runOnFunction(Function &F) {
bool Changed = false;
Module *M = F.getParent();
LLVMContext &Ctx = M->getContext();
- DIBuilder Builder(*M);
+ DIBuilder Builder(*M, /*AllowUnresolved*/ false);
// Traverse all the blocks looking for instructions in different
// blocks that are at the same file:line location.
diff --git a/lib/Transforms/Utils/Android.mk b/lib/Transforms/Utils/Android.mk
index e20dc0a..4d24928 100644
--- a/lib/Transforms/Utils/Android.mk
+++ b/lib/Transforms/Utils/Android.mk
@@ -22,7 +22,6 @@ transforms_utils_SRC_FILES := \
LoopSimplify.cpp \
LoopUnroll.cpp \
LoopUnrollRuntime.cpp \
- LowerExpectIntrinsic.cpp \
LowerInvoke.cpp \
LowerSwitch.cpp \
Mem2Reg.cpp \
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 983f025..b455257 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -65,16 +65,10 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) {
/// any single-entry PHI nodes in it, fold them away. This handles the case
/// when all entries to the PHI nodes in a block are guaranteed equal, such as
/// when the block has exactly one predecessor.
-void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, Pass *P) {
+void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, AliasAnalysis *AA,
+ MemoryDependenceAnalysis *MemDep) {
if (!isa<PHINode>(BB->begin())) return;
- AliasAnalysis *AA = nullptr;
- MemoryDependenceAnalysis *MemDep = nullptr;
- if (P) {
- AA = P->getAnalysisIfAvailable<AliasAnalysis>();
- MemDep = P->getAnalysisIfAvailable<MemoryDependenceAnalysis>();
- }
-
while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
if (PN->getIncomingValue(0) != PN)
PN->replaceAllUsesWith(PN->getIncomingValue(0));
@@ -113,7 +107,9 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) {
/// MergeBlockIntoPredecessor - Attempts to merge a block into its predecessor,
/// if possible. The return value indicates success or failure.
-bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
+bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,
+ LoopInfo *LI, AliasAnalysis *AA,
+ MemoryDependenceAnalysis *MemDep) {
// Don't merge away blocks who have their address taken.
if (BB->hasAddressTaken()) return false;
@@ -149,7 +145,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
// Begin by getting rid of unneeded PHIs.
if (isa<PHINode>(BB->front()))
- FoldSingleEntryPHINodes(BB, P);
+ FoldSingleEntryPHINodes(BB, AA, MemDep);
// Delete the unconditional branch from the predecessor...
PredBB->getInstList().pop_back();
@@ -166,28 +162,23 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
PredBB->takeName(BB);
// Finally, erase the old block and update dominator info.
- if (P) {
- if (DominatorTreeWrapperPass *DTWP =
- P->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) {
- DominatorTree &DT = DTWP->getDomTree();
- if (DomTreeNode *DTN = DT.getNode(BB)) {
- DomTreeNode *PredDTN = DT.getNode(PredBB);
- SmallVector<DomTreeNode*, 8> Children(DTN->begin(), DTN->end());
- for (SmallVectorImpl<DomTreeNode *>::iterator DI = Children.begin(),
- DE = Children.end(); DI != DE; ++DI)
- DT.changeImmediateDominator(*DI, PredDTN);
-
- DT.eraseNode(BB);
- }
+ if (DT)
+ if (DomTreeNode *DTN = DT->getNode(BB)) {
+ DomTreeNode *PredDTN = DT->getNode(PredBB);
+ SmallVector<DomTreeNode *, 8> Children(DTN->begin(), DTN->end());
+ for (SmallVectorImpl<DomTreeNode *>::iterator DI = Children.begin(),
+ DE = Children.end();
+ DI != DE; ++DI)
+ DT->changeImmediateDominator(*DI, PredDTN);
+
+ DT->eraseNode(BB);
+ }
- if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>())
- LI->removeBlock(BB);
+ if (LI)
+ LI->removeBlock(BB);
- if (MemoryDependenceAnalysis *MD =
- P->getAnalysisIfAvailable<MemoryDependenceAnalysis>())
- MD->invalidateCachedPredecessors();
- }
- }
+ if (MemDep)
+ MemDep->invalidateCachedPredecessors();
BB->eraseFromParent();
return true;
@@ -240,12 +231,14 @@ void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) {
/// SplitEdge - Split the edge connecting specified block. Pass P must
/// not be NULL.
-BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
+BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, DominatorTree *DT,
+ LoopInfo *LI) {
unsigned SuccNum = GetSuccessorNumber(BB, Succ);
// If this is a critical edge, let SplitCriticalEdge do it.
TerminatorInst *LatchTerm = BB->getTerminator();
- if (SplitCriticalEdge(LatchTerm, SuccNum, P))
+ if (SplitCriticalEdge(LatchTerm, SuccNum, CriticalEdgeSplittingOptions(DT, LI)
+ .setPreserveLCSSA()))
return LatchTerm->getSuccessor(SuccNum);
// If the edge isn't critical, then BB has a single successor or Succ has a
@@ -255,23 +248,25 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
// block.
assert(SP == BB && "CFG broken");
SP = nullptr;
- return SplitBlock(Succ, Succ->begin(), P);
+ return SplitBlock(Succ, Succ->begin(), DT, LI);
}
// Otherwise, if BB has a single successor, split it at the bottom of the
// block.
assert(BB->getTerminator()->getNumSuccessors() == 1 &&
"Should have a single succ!");
- return SplitBlock(BB, BB->getTerminator(), P);
+ return SplitBlock(BB, BB->getTerminator(), DT, LI);
}
-unsigned llvm::SplitAllCriticalEdges(Function &F, Pass *P) {
+unsigned
+llvm::SplitAllCriticalEdges(Function &F,
+ const CriticalEdgeSplittingOptions &Options) {
unsigned NumBroken = 0;
for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
TerminatorInst *TI = I->getTerminator();
if (TI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(TI))
for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
- if (SplitCriticalEdge(TI, i, P))
+ if (SplitCriticalEdge(TI, i, Options))
++NumBroken;
}
return NumBroken;
@@ -282,7 +277,8 @@ unsigned llvm::SplitAllCriticalEdges(Function &F, Pass *P) {
/// to a new block. The two blocks are joined by an unconditional branch and
/// the loop info is updated.
///
-BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {
+BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt,
+ DominatorTree *DT, LoopInfo *LI) {
BasicBlock::iterator SplitIt = SplitPt;
while (isa<PHINode>(SplitIt) || isa<LandingPadInst>(SplitIt))
++SplitIt;
@@ -290,26 +286,23 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {
// The new block lives in whichever loop the old one did. This preserves
// LCSSA as well, because we force the split point to be after any PHI nodes.
- if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>())
+ if (LI)
if (Loop *L = LI->getLoopFor(Old))
- L->addBasicBlockToLoop(New, LI->getBase());
+ L->addBasicBlockToLoop(New, *LI);
- if (DominatorTreeWrapperPass *DTWP =
- P->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) {
- DominatorTree &DT = DTWP->getDomTree();
+ if (DT)
// Old dominates New. New node dominates all other nodes dominated by Old.
- if (DomTreeNode *OldNode = DT.getNode(Old)) {
+ if (DomTreeNode *OldNode = DT->getNode(Old)) {
std::vector<DomTreeNode *> Children;
for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end();
I != E; ++I)
Children.push_back(*I);
- DomTreeNode *NewNode = DT.addNewBlock(New, Old);
+ DomTreeNode *NewNode = DT->addNewBlock(New, Old);
for (std::vector<DomTreeNode *>::iterator I = Children.begin(),
E = Children.end(); I != E; ++I)
- DT.changeImmediateDominator(*I, NewNode);
+ DT->changeImmediateDominator(*I, NewNode);
}
- }
return New;
}
@@ -318,45 +311,46 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {
/// analysis information.
static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
ArrayRef<BasicBlock *> Preds,
- Pass *P, bool &HasLoopExit) {
- if (!P) return;
+ DominatorTree *DT, LoopInfo *LI,
+ bool PreserveLCSSA, bool &HasLoopExit) {
+ // Update dominator tree if available.
+ if (DT)
+ DT->splitBlock(NewBB);
+
+ // The rest of the logic is only relevant for updating the loop structures.
+ if (!LI)
+ return;
- LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>();
- Loop *L = LI ? LI->getLoopFor(OldBB) : nullptr;
+ Loop *L = LI->getLoopFor(OldBB);
// If we need to preserve loop analyses, collect some information about how
// this split will affect loops.
bool IsLoopEntry = !!L;
bool SplitMakesNewLoopHeader = false;
- if (LI) {
- bool PreserveLCSSA = P->mustPreserveAnalysisID(LCSSAID);
- for (ArrayRef<BasicBlock*>::iterator
- i = Preds.begin(), e = Preds.end(); i != e; ++i) {
- BasicBlock *Pred = *i;
-
- // If we need to preserve LCSSA, determine if any of the preds is a loop
- // exit.
- if (PreserveLCSSA)
- if (Loop *PL = LI->getLoopFor(Pred))
- if (!PL->contains(OldBB))
- HasLoopExit = true;
-
- // If we need to preserve LoopInfo, note whether any of the preds crosses
- // an interesting loop boundary.
- if (!L) continue;
- if (L->contains(Pred))
- IsLoopEntry = false;
- else
- SplitMakesNewLoopHeader = true;
- }
+ for (ArrayRef<BasicBlock *>::iterator i = Preds.begin(), e = Preds.end();
+ i != e; ++i) {
+ BasicBlock *Pred = *i;
+
+ // If we need to preserve LCSSA, determine if any of the preds is a loop
+ // exit.
+ if (PreserveLCSSA)
+ if (Loop *PL = LI->getLoopFor(Pred))
+ if (!PL->contains(OldBB))
+ HasLoopExit = true;
+
+ // If we need to preserve LoopInfo, note whether any of the preds crosses
+ // an interesting loop boundary.
+ if (!L)
+ continue;
+ if (L->contains(Pred))
+ IsLoopEntry = false;
+ else
+ SplitMakesNewLoopHeader = true;
}
- // Update dominator tree if available.
- if (DominatorTreeWrapperPass *DTWP =
- P->getAnalysisIfAvailable<DominatorTreeWrapperPass>())
- DTWP->getDomTree().splitBlock(NewBB);
-
- if (!L) return;
+ // Unless we have a loop for OldBB, nothing else to do here.
+ if (!L)
+ return;
if (IsLoopEntry) {
// Add the new block to the nearest enclosing loop (and not an adjacent
@@ -382,9 +376,9 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
}
if (InnermostPredLoop)
- InnermostPredLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+ InnermostPredLoop->addBasicBlockToLoop(NewBB, *LI);
} else {
- L->addBasicBlockToLoop(NewBB, LI->getBase());
+ L->addBasicBlockToLoop(NewBB, *LI);
if (SplitMakesNewLoopHeader)
L->moveToHeader(NewBB);
}
@@ -393,10 +387,9 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
/// UpdatePHINodes - Update the PHI nodes in OrigBB to include the values coming
/// from NewBB. This also updates AliasAnalysis, if available.
static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
- ArrayRef<BasicBlock*> Preds, BranchInst *BI,
- Pass *P, bool HasLoopExit) {
+ ArrayRef<BasicBlock *> Preds, BranchInst *BI,
+ AliasAnalysis *AA, bool HasLoopExit) {
// Otherwise, create a new PHI node in NewBB for each PHI node in OrigBB.
- AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : nullptr;
SmallPtrSet<BasicBlock *, 16> PredSet(Preds.begin(), Preds.end());
for (BasicBlock::iterator I = OrigBB->begin(); isa<PHINode>(I); ) {
PHINode *PN = cast<PHINode>(I++);
@@ -461,11 +454,15 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
}
}
-/// SplitBlockPredecessors - This method transforms BB by introducing a new
-/// basic block into the function, and moving some of the predecessors of BB to
-/// be predecessors of the new block. The new predecessors are indicated by the
-/// Preds array, which has NumPreds elements in it. The new block is given a
-/// suffix of 'Suffix'.
+/// SplitBlockPredecessors - This method introduces at least one new basic block
+/// into the function and moves some of the predecessors of BB to be
+/// predecessors of the new block. The new predecessors are indicated by the
+/// Preds array. The new block is given a suffix of 'Suffix'. Returns new basic
+/// block to which predecessors from Preds are now pointing.
+///
+/// If BB is a landingpad block then additional basicblock might be introduced.
+/// It will have suffix of 'Suffix'+".split_lp".
+/// See SplitLandingPadPredecessors for more details on this case.
///
/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree,
/// LoopInfo, and LCCSA but no other analyses. In particular, it does not
@@ -473,8 +470,21 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
/// of the edges being split is an exit of a loop with other exits).
///
BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
- ArrayRef<BasicBlock*> Preds,
- const char *Suffix, Pass *P) {
+ ArrayRef<BasicBlock *> Preds,
+ const char *Suffix, AliasAnalysis *AA,
+ DominatorTree *DT, LoopInfo *LI,
+ bool PreserveLCSSA) {
+ // For the landingpads we need to act a bit differently.
+ // Delegate this work to the SplitLandingPadPredecessors.
+ if (BB->isLandingPad()) {
+ SmallVector<BasicBlock*, 2> NewBBs;
+ std::string NewName = std::string(Suffix) + ".split-lp";
+
+ SplitLandingPadPredecessors(BB, Preds, Suffix, NewName.c_str(),
+ NewBBs, AA, DT, LI, PreserveLCSSA);
+ return NewBBs[0];
+ }
+
// Create new basic block, insert right before the original block.
BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix,
BB->getParent(), BB);
@@ -505,10 +515,11 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
// Update DominatorTree, LoopInfo, and LCCSA analysis information.
bool HasLoopExit = false;
- UpdateAnalysisInformation(BB, NewBB, Preds, P, HasLoopExit);
+ UpdateAnalysisInformation(BB, NewBB, Preds, DT, LI, PreserveLCSSA,
+ HasLoopExit);
// Update the PHI nodes in BB with the values coming from NewBB.
- UpdatePHINodes(BB, NewBB, Preds, BI, P, HasLoopExit);
+ UpdatePHINodes(BB, NewBB, Preds, BI, AA, HasLoopExit);
return NewBB;
}
@@ -526,10 +537,11 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
/// exits).
///
void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
- ArrayRef<BasicBlock*> Preds,
+ ArrayRef<BasicBlock *> Preds,
const char *Suffix1, const char *Suffix2,
- Pass *P,
- SmallVectorImpl<BasicBlock*> &NewBBs) {
+ SmallVectorImpl<BasicBlock *> &NewBBs,
+ AliasAnalysis *AA, DominatorTree *DT,
+ LoopInfo *LI, bool PreserveLCSSA) {
assert(OrigBB->isLandingPad() && "Trying to split a non-landing pad!");
// Create a new basic block for OrigBB's predecessors listed in Preds. Insert
@@ -552,12 +564,12 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
Preds[i]->getTerminator()->replaceUsesOfWith(OrigBB, NewBB1);
}
- // Update DominatorTree, LoopInfo, and LCCSA analysis information.
bool HasLoopExit = false;
- UpdateAnalysisInformation(OrigBB, NewBB1, Preds, P, HasLoopExit);
+ UpdateAnalysisInformation(OrigBB, NewBB1, Preds, DT, LI, PreserveLCSSA,
+ HasLoopExit);
// Update the PHI nodes in OrigBB with the values coming from NewBB1.
- UpdatePHINodes(OrigBB, NewBB1, Preds, BI1, P, HasLoopExit);
+ UpdatePHINodes(OrigBB, NewBB1, Preds, BI1, AA, HasLoopExit);
// Move the remaining edges from OrigBB to point to NewBB2.
SmallVector<BasicBlock*, 8> NewBB2Preds;
@@ -589,10 +601,11 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
// Update DominatorTree, LoopInfo, and LCCSA analysis information.
HasLoopExit = false;
- UpdateAnalysisInformation(OrigBB, NewBB2, NewBB2Preds, P, HasLoopExit);
+ UpdateAnalysisInformation(OrigBB, NewBB2, NewBB2Preds, DT, LI,
+ PreserveLCSSA, HasLoopExit);
// Update the PHI nodes in OrigBB with the values coming from NewBB2.
- UpdatePHINodes(OrigBB, NewBB2, NewBB2Preds, BI2, P, HasLoopExit);
+ UpdatePHINodes(OrigBB, NewBB2, NewBB2Preds, BI2, AA, HasLoopExit);
}
LandingPadInst *LPad = OrigBB->getLandingPadInst();
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index eda22cf..7e83c9e 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -18,6 +18,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/CFG.h"
@@ -41,14 +42,19 @@ namespace {
}
bool runOnFunction(Function &F) override {
- unsigned N = SplitAllCriticalEdges(F, this);
+ auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
+ auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
+ unsigned N =
+ SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions(DT, LI));
NumBroken += N;
return N > 0;
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<LoopInfo>();
+ AU.addPreserved<LoopInfoWrapperPass>();
// No loop canonicalization guarantees are broken by this pass.
AU.addPreservedID(LoopSimplifyID);
@@ -125,10 +131,9 @@ static void createPHIsForSplitLoopExit(ArrayRef<BasicBlock *> Preds,
/// to.
///
BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
- Pass *P, bool MergeIdenticalEdges,
- bool DontDeleteUselessPhis,
- bool SplitLandingPads) {
- if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return nullptr;
+ const CriticalEdgeSplittingOptions &Options) {
+ if (!isCriticalEdge(TI, SuccNum, Options.MergeIdenticalEdges))
+ return nullptr;
assert(!isa<IndirectBrInst>(TI) &&
"Cannot split critical edge from IndirectBrInst");
@@ -179,29 +184,22 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
// If there are any other edges from TIBB to DestBB, update those to go
// through the split block, making those edges non-critical as well (and
// reducing the number of phi entries in the DestBB if relevant).
- if (MergeIdenticalEdges) {
+ if (Options.MergeIdenticalEdges) {
for (unsigned i = SuccNum+1, e = TI->getNumSuccessors(); i != e; ++i) {
if (TI->getSuccessor(i) != DestBB) continue;
// Remove an entry for TIBB from DestBB phi nodes.
- DestBB->removePredecessor(TIBB, DontDeleteUselessPhis);
+ DestBB->removePredecessor(TIBB, Options.DontDeleteUselessPHIs);
// We found another edge to DestBB, go to NewBB instead.
TI->setSuccessor(i, NewBB);
}
}
-
-
- // If we don't have a pass object, we can't update anything...
- if (!P) return NewBB;
-
- DominatorTreeWrapperPass *DTWP =
- P->getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
- LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>();
-
// If we have nothing to update, just return.
+ auto *AA = Options.AA;
+ auto *DT = Options.DT;
+ auto *LI = Options.LI;
if (!DT && !LI)
return NewBB;
@@ -268,13 +266,13 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
if (Loop *DestLoop = LI->getLoopFor(DestBB)) {
if (TIL == DestLoop) {
// Both in the same loop, the NewBB joins loop.
- DestLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+ DestLoop->addBasicBlockToLoop(NewBB, *LI);
} else if (TIL->contains(DestLoop)) {
// Edge from an outer loop to an inner loop. Add to the outer loop.
- TIL->addBasicBlockToLoop(NewBB, LI->getBase());
+ TIL->addBasicBlockToLoop(NewBB, *LI);
} else if (DestLoop->contains(TIL)) {
// Edge from an inner loop to an outer loop. Add to the outer loop.
- DestLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+ DestLoop->addBasicBlockToLoop(NewBB, *LI);
} else {
// Edge from two loops with no containment relation. Because these
// are natural loops, we know that the destination block must be the
@@ -283,19 +281,20 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
assert(DestLoop->getHeader() == DestBB &&
"Should not create irreducible loops!");
if (Loop *P = DestLoop->getParentLoop())
- P->addBasicBlockToLoop(NewBB, LI->getBase());
+ P->addBasicBlockToLoop(NewBB, *LI);
}
}
+
// If TIBB is in a loop and DestBB is outside of that loop, we may need
// to update LoopSimplify form and LCSSA form.
- if (!TIL->contains(DestBB) &&
- P->mustPreserveAnalysisID(LoopSimplifyID)) {
+ if (!TIL->contains(DestBB)) {
assert(!TIL->contains(NewBB) &&
"Split point for loop exit is contained in loop!");
// Update LCSSA form in the newly created exit block.
- if (P->mustPreserveAnalysisID(LCSSAID))
+ if (Options.PreserveLCSSA) {
createPHIsForSplitLoopExit(TIBB, NewBB, DestBB);
+ }
// The only that we can break LoopSimplify form by splitting a critical
// edge is if after the split there exists some edge from TIL to DestBB
@@ -322,20 +321,12 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
if (!LoopPreds.empty()) {
assert(!DestBB->isLandingPad() &&
"We don't split edges to landing pads!");
- BasicBlock *NewExitBB =
- SplitBlockPredecessors(DestBB, LoopPreds, "split", P);
- if (P->mustPreserveAnalysisID(LCSSAID))
+ BasicBlock *NewExitBB = SplitBlockPredecessors(
+ DestBB, LoopPreds, "split", AA, DT, LI, Options.PreserveLCSSA);
+ if (Options.PreserveLCSSA)
createPHIsForSplitLoopExit(LoopPreds, NewExitBB, DestBB);
}
}
- // LCSSA form was updated above for the case where LoopSimplify is
- // available, which means that all predecessors of loop exit blocks
- // are within the loop. Without LoopSimplify form, it would be
- // necessary to insert a new phi.
- assert((!P->mustPreserveAnalysisID(LCSSAID) ||
- P->mustPreserveAnalysisID(LoopSimplifyID)) &&
- "SplitCriticalEdge doesn't know how to update LCCSA form "
- "without LoopSimplify!");
}
}
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index 112d26c..762a83f 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -21,7 +21,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
using namespace llvm;
@@ -486,135 +486,3 @@ Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
CI->setCallingConv(Fn->getCallingConv());
return CI;
}
-
-SimplifyFortifiedLibCalls::~SimplifyFortifiedLibCalls() { }
-
-bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const DataLayout *TD,
- const TargetLibraryInfo *TLI) {
- // We really need DataLayout for later.
- if (!TD) return false;
-
- this->CI = CI;
- Function *Callee = CI->getCalledFunction();
- StringRef Name = Callee->getName();
- FunctionType *FT = Callee->getFunctionType();
- LLVMContext &Context = CI->getParent()->getContext();
- IRBuilder<> B(CI);
-
- if (Name == "__memcpy_chk") {
- // Check if this has the right signature.
- if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
- !FT->getParamType(0)->isPointerTy() ||
- !FT->getParamType(1)->isPointerTy() ||
- FT->getParamType(2) != TD->getIntPtrType(Context) ||
- FT->getParamType(3) != TD->getIntPtrType(Context))
- return false;
-
- if (isFoldable(3, 2, false)) {
- B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), 1);
- replaceCall(CI->getArgOperand(0));
- return true;
- }
- return false;
- }
-
- // Should be similar to memcpy.
- if (Name == "__mempcpy_chk") {
- return false;
- }
-
- if (Name == "__memmove_chk") {
- // Check if this has the right signature.
- if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
- !FT->getParamType(0)->isPointerTy() ||
- !FT->getParamType(1)->isPointerTy() ||
- FT->getParamType(2) != TD->getIntPtrType(Context) ||
- FT->getParamType(3) != TD->getIntPtrType(Context))
- return false;
-
- if (isFoldable(3, 2, false)) {
- B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), 1);
- replaceCall(CI->getArgOperand(0));
- return true;
- }
- return false;
- }
-
- if (Name == "__memset_chk") {
- // Check if this has the right signature.
- if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
- !FT->getParamType(0)->isPointerTy() ||
- !FT->getParamType(1)->isIntegerTy() ||
- FT->getParamType(2) != TD->getIntPtrType(Context) ||
- FT->getParamType(3) != TD->getIntPtrType(Context))
- return false;
-
- if (isFoldable(3, 2, false)) {
- Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(),
- false);
- B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
- replaceCall(CI->getArgOperand(0));
- return true;
- }
- return false;
- }
-
- if (Name == "__strcpy_chk" || Name == "__stpcpy_chk") {
- // Check if this has the right signature.
- if (FT->getNumParams() != 3 ||
- FT->getReturnType() != FT->getParamType(0) ||
- FT->getParamType(0) != FT->getParamType(1) ||
- FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
- FT->getParamType(2) != TD->getIntPtrType(Context))
- return 0;
-
-
- // If a) we don't have any length information, or b) we know this will
- // fit then just lower to a plain st[rp]cpy. Otherwise we'll keep our
- // st[rp]cpy_chk call which may fail at runtime if the size is too long.
- // TODO: It might be nice to get a maximum length out of the possible
- // string lengths for varying.
- if (isFoldable(2, 1, true)) {
- Value *Ret = EmitStrCpy(CI->getArgOperand(0), CI->getArgOperand(1), B, TD,
- TLI, Name.substr(2, 6));
- if (!Ret)
- return false;
- replaceCall(Ret);
- return true;
- }
- return false;
- }
-
- if (Name == "__strncpy_chk" || Name == "__stpncpy_chk") {
- // Check if this has the right signature.
- if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
- FT->getParamType(0) != FT->getParamType(1) ||
- FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
- !FT->getParamType(2)->isIntegerTy() ||
- FT->getParamType(3) != TD->getIntPtrType(Context))
- return false;
-
- if (isFoldable(3, 2, false)) {
- Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), B, TD, TLI,
- Name.substr(2, 7));
- if (!Ret)
- return false;
- replaceCall(Ret);
- return true;
- }
- return false;
- }
-
- if (Name == "__strcat_chk") {
- return false;
- }
-
- if (Name == "__strncat_chk") {
- return false;
- }
-
- return false;
-}
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index 6ce22b1..01e811f 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -21,7 +21,6 @@ add_llvm_library(LLVMTransformUtils
LoopSimplify.cpp
LoopUnroll.cpp
LoopUnrollRuntime.cpp
- LowerExpectIntrinsic.cpp
LowerInvoke.cpp
LowerSwitch.cpp
Mem2Reg.cpp
@@ -37,6 +36,10 @@ add_llvm_library(LLVMTransformUtils
UnifyFunctionExitNodes.cpp
Utils.cpp
ValueMapper.cpp
+
+ ADDITIONAL_HEADER_DIRS
+ ${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms
+ ${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms/Utils
)
add_dependencies(LLVMTransformUtils intrinsics_gen)
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 5c8f20d..09279b6 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -164,14 +164,13 @@ static MDNode* FindSubprogram(const Function *F, DebugInfoFinder &Finder) {
// Add an operand to an existing MDNode. The new operand will be added at the
// back of the operand list.
-static void AddOperand(MDNode *Node, Value *Operand) {
- SmallVector<Value*, 16> Operands;
- for (unsigned i = 0; i < Node->getNumOperands(); i++) {
- Operands.push_back(Node->getOperand(i));
- }
- Operands.push_back(Operand);
- MDNode *NewNode = MDNode::get(Node->getContext(), Operands);
- Node->replaceAllUsesWith(NewNode);
+static void AddOperand(DICompileUnit CU, DIArray SPs, Metadata *NewSP) {
+ SmallVector<Metadata *, 16> NewSPs;
+ NewSPs.reserve(SPs->getNumOperands() + 1);
+ for (unsigned I = 0, E = SPs->getNumOperands(); I != E; ++I)
+ NewSPs.push_back(SPs->getOperand(I));
+ NewSPs.push_back(NewSP);
+ CU.replaceSubprograms(DIArray(MDNode::get(CU->getContext(), NewSPs)));
}
// Clone the module-level debug info associated with OldFunc. The cloned data
@@ -187,7 +186,7 @@ static void CloneDebugInfoMetadata(Function *NewFunc, const Function *OldFunc,
// Ensure that OldFunc appears in the map.
// (if it's already there it must point to NewFunc anyway)
VMap[OldFunc] = NewFunc;
- DISubprogram NewSubprogram(MapValue(OldSubprogramMDNode, VMap));
+ DISubprogram NewSubprogram(MapMetadata(OldSubprogramMDNode, VMap));
for (DICompileUnit CU : Finder.compile_units()) {
DIArray Subprograms(CU.getSubprograms());
@@ -196,7 +195,8 @@ static void CloneDebugInfoMetadata(Function *NewFunc, const Function *OldFunc,
// also contain the new one.
for (unsigned i = 0; i < Subprograms.getNumElements(); i++) {
if ((MDNode*)Subprograms.getElement(i) == OldSubprogramMDNode) {
- AddOperand(Subprograms, NewSubprogram);
+ AddOperand(CU, Subprograms, NewSubprogram);
+ break;
}
}
}
@@ -260,21 +260,36 @@ namespace {
const char *NameSuffix;
ClonedCodeInfo *CodeInfo;
const DataLayout *DL;
+ CloningDirector *Director;
+ ValueMapTypeRemapper *TypeMapper;
+ ValueMaterializer *Materializer;
+
public:
PruningFunctionCloner(Function *newFunc, const Function *oldFunc,
ValueToValueMapTy &valueMap,
bool moduleLevelChanges,
const char *nameSuffix,
ClonedCodeInfo *codeInfo,
- const DataLayout *DL)
+ const DataLayout *DL,
+ CloningDirector *Director)
: NewFunc(newFunc), OldFunc(oldFunc),
VMap(valueMap), ModuleLevelChanges(moduleLevelChanges),
- NameSuffix(nameSuffix), CodeInfo(codeInfo), DL(DL) {
+ NameSuffix(nameSuffix), CodeInfo(codeInfo), DL(DL),
+ Director(Director) {
+ // These are optional components. The Director may return null.
+ if (Director) {
+ TypeMapper = Director->getTypeRemapper();
+ Materializer = Director->getValueMaterializer();
+ } else {
+ TypeMapper = nullptr;
+ Materializer = nullptr;
+ }
}
/// CloneBlock - The specified block is found to be reachable, clone it and
/// anything that it can reach.
- void CloneBlock(const BasicBlock *BB,
+ void CloneBlock(const BasicBlock *BB,
+ BasicBlock::const_iterator StartingInst,
std::vector<const BasicBlock*> &ToClone);
};
}
@@ -282,6 +297,7 @@ namespace {
/// CloneBlock - The specified block is found to be reachable, clone it and
/// anything that it can reach.
void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
+ BasicBlock::const_iterator StartingInst,
std::vector<const BasicBlock*> &ToClone){
WeakVH &BBEntry = VMap[BB];
@@ -307,21 +323,39 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
const_cast<BasicBlock*>(BB));
VMap[OldBBAddr] = BlockAddress::get(NewFunc, NewBB);
}
-
bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;
-
+
// Loop over all instructions, and copy them over, DCE'ing as we go. This
// loop doesn't include the terminator.
- for (BasicBlock::const_iterator II = BB->begin(), IE = --BB->end();
+ for (BasicBlock::const_iterator II = StartingInst, IE = --BB->end();
II != IE; ++II) {
+ // If the "Director" remaps the instruction, don't clone it.
+ if (Director) {
+ CloningDirector::CloningAction Action
+ = Director->handleInstruction(VMap, II, NewBB);
+ // If the cloning director says stop, we want to stop everything, not
+ // just break out of the loop (which would cause the terminator to be
+ // cloned). The cloning director is responsible for inserting a proper
+ // terminator into the new basic block in this case.
+ if (Action == CloningDirector::StopCloningBB)
+ return;
+ // If the cloning director says skip, continue to the next instruction.
+ // In this case, the cloning director is responsible for mapping the
+ // skipped instruction to some value that is defined in the new
+ // basic block.
+ if (Action == CloningDirector::SkipInstruction)
+ continue;
+ }
+
Instruction *NewInst = II->clone();
// Eagerly remap operands to the newly cloned instruction, except for PHI
// nodes for which we defer processing until we update the CFG.
if (!isa<PHINode>(NewInst)) {
RemapInstruction(NewInst, VMap,
- ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
+ TypeMapper, Materializer);
// If we can simplify this instruction to some other value, simply add
// a mapping to that value rather than inserting a new instruction into
@@ -354,6 +388,18 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
// Finally, clone over the terminator.
const TerminatorInst *OldTI = BB->getTerminator();
bool TerminatorDone = false;
+ if (Director) {
+ CloningDirector::CloningAction Action
+ = Director->handleInstruction(VMap, OldTI, NewBB);
+ // If the cloning director says stop, we want to stop everything, not
+ // just break out of the loop (which would cause the terminator to be
+ // cloned). The cloning director is responsible for inserting a proper
+ // terminator into the new basic block in this case.
+ if (Action == CloningDirector::StopCloningBB)
+ return;
+ assert(Action != CloningDirector::SkipInstruction &&
+ "SkipInstruction is not valid for terminators.");
+ }
if (const BranchInst *BI = dyn_cast<BranchInst>(OldTI)) {
if (BI->isConditional()) {
// If the condition was a known constant in the callee...
@@ -409,39 +455,55 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
}
}
-/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto,
-/// except that it does some simple constant prop and DCE on the fly. The
-/// effect of this is to copy significantly less code in cases where (for
-/// example) a function call with constant arguments is inlined, and those
-/// constant arguments cause a significant amount of code in the callee to be
-/// dead. Since this doesn't produce an exact copy of the input, it can't be
-/// used for things like CloneFunction or CloneModule.
-void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
+/// CloneAndPruneIntoFromInst - This works like CloneAndPruneFunctionInto, except
+/// that it does not clone the entire function. Instead it starts at an
+/// instruction provided by the caller and copies (and prunes) only the code
+/// reachable from that instruction.
+void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
+ const Instruction *StartingInst,
ValueToValueMapTy &VMap,
bool ModuleLevelChanges,
- SmallVectorImpl<ReturnInst*> &Returns,
+ SmallVectorImpl<ReturnInst *> &Returns,
const char *NameSuffix,
ClonedCodeInfo *CodeInfo,
const DataLayout *DL,
- Instruction *TheCall) {
+ CloningDirector *Director) {
assert(NameSuffix && "NameSuffix cannot be null!");
-
+
+ ValueMapTypeRemapper *TypeMapper = nullptr;
+ ValueMaterializer *Materializer = nullptr;
+
+ if (Director) {
+ TypeMapper = Director->getTypeRemapper();
+ Materializer = Director->getValueMaterializer();
+ }
+
#ifndef NDEBUG
- for (Function::const_arg_iterator II = OldFunc->arg_begin(),
- E = OldFunc->arg_end(); II != E; ++II)
- assert(VMap.count(II) && "No mapping from source argument specified!");
+ // If the cloning starts at the begining of the function, verify that
+ // the function arguments are mapped.
+ if (!StartingInst)
+ for (Function::const_arg_iterator II = OldFunc->arg_begin(),
+ E = OldFunc->arg_end(); II != E; ++II)
+ assert(VMap.count(II) && "No mapping from source argument specified!");
#endif
PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges,
- NameSuffix, CodeInfo, DL);
+ NameSuffix, CodeInfo, DL, Director);
+ const BasicBlock *StartingBB;
+ if (StartingInst)
+ StartingBB = StartingInst->getParent();
+ else {
+ StartingBB = &OldFunc->getEntryBlock();
+ StartingInst = StartingBB->begin();
+ }
// Clone the entry block, and anything recursively reachable from it.
std::vector<const BasicBlock*> CloneWorklist;
- CloneWorklist.push_back(&OldFunc->getEntryBlock());
+ PFC.CloneBlock(StartingBB, StartingInst, CloneWorklist);
while (!CloneWorklist.empty()) {
const BasicBlock *BB = CloneWorklist.back();
CloneWorklist.pop_back();
- PFC.CloneBlock(BB, CloneWorklist);
+ PFC.CloneBlock(BB, BB->begin(), CloneWorklist);
}
// Loop over all of the basic blocks in the old function. If the block was
@@ -470,7 +532,8 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
// Finally, remap the terminator instructions, as those can't be remapped
// until all BBs are mapped.
RemapInstruction(NewBB->getTerminator(), VMap,
- ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
+ TypeMapper, Materializer);
}
// Defer PHI resolution until rest of function is resolved, PHI resolution
@@ -569,7 +632,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
// and zap unconditional fall-through branches. This happen all the time when
// specializing code: code specialization turns conditional branches into
// uncond branches, and this code folds them.
- Function::iterator Begin = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]);
+ Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB]);
Function::iterator I = Begin;
while (I != NewFunc->end()) {
// Check if this block has become dead during inlining or other
@@ -620,9 +683,30 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
// Make a final pass over the basic blocks from theh old function to gather
// any return instructions which survived folding. We have to do this here
// because we can iteratively remove and merge returns above.
- for (Function::iterator I = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]),
+ for (Function::iterator I = cast<BasicBlock>(VMap[StartingBB]),
E = NewFunc->end();
I != E; ++I)
if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator()))
Returns.push_back(RI);
}
+
+
+/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto,
+/// except that it does some simple constant prop and DCE on the fly. The
+/// effect of this is to copy significantly less code in cases where (for
+/// example) a function call with constant arguments is inlined, and those
+/// constant arguments cause a significant amount of code in the callee to be
+/// dead. Since this doesn't produce an exact copy of the input, it can't be
+/// used for things like CloneFunction or CloneModule.
+void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
+ ValueToValueMapTy &VMap,
+ bool ModuleLevelChanges,
+ SmallVectorImpl<ReturnInst*> &Returns,
+ const char *NameSuffix,
+ ClonedCodeInfo *CodeInfo,
+ const DataLayout *DL,
+ Instruction *TheCall) {
+ CloneAndPruneIntoFromInst(NewFunc, OldFunc, OldFunc->front().begin(),
+ VMap, ModuleLevelChanges, Returns, NameSuffix,
+ CodeInfo, DL, nullptr);
+}
diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp
index d078c96..fae9ff5 100644
--- a/lib/Transforms/Utils/CloneModule.cpp
+++ b/lib/Transforms/Utils/CloneModule.cpp
@@ -109,7 +109,7 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
I != E; ++I) {
GlobalAlias *GA = cast<GlobalAlias>(VMap[I]);
if (const Constant *C = I->getAliasee())
- GA->setAliasee(cast<GlobalObject>(MapValue(C, VMap)));
+ GA->setAliasee(MapValue(C, VMap));
}
// And named metadata....
@@ -118,7 +118,7 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
const NamedMDNode &NMD = *I;
NamedMDNode *NewNMD = New->getOrInsertNamedMetadata(NMD.getName());
for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i)
- NewNMD->addOperand(MapValue(NMD.getOperand(i), VMap));
+ NewNMD->addOperand(MapMetadata(NMD.getOperand(i), VMap));
}
return New;
diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp
index 9972b22..003da58 100644
--- a/lib/Transforms/Utils/DemoteRegToStack.cpp
+++ b/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -39,6 +39,19 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
F->getEntryBlock().begin());
}
+ // We cannot demote invoke instructions to the stack if their normal edge
+ // is critical. Therefore, split the critical edge and create a basic block
+ // into which the store can be inserted.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
+ if (!II->getNormalDest()->getSinglePredecessor()) {
+ unsigned SuccNum = GetSuccessorNumber(II->getParent(), II->getNormalDest());
+ assert(isCriticalEdge(II, SuccNum) && "Expected a critical edge!");
+ BasicBlock *BB = SplitCriticalEdge(II, SuccNum);
+ assert(BB && "Unable to split critical edge.");
+ (void)BB;
+ }
+ }
+
// Change all of the users of the instruction to read from the stack slot.
while (!I.use_empty()) {
Instruction *U = cast<Instruction>(I.user_back());
@@ -71,7 +84,6 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
}
}
-
// Insert stores of the computed value into the stack slot. We have to be
// careful if I is an invoke instruction, because we can't insert the store
// AFTER the terminator instruction.
@@ -79,27 +91,13 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
if (!isa<TerminatorInst>(I)) {
InsertPt = &I;
++InsertPt;
+ for (; isa<PHINode>(InsertPt) || isa<LandingPadInst>(InsertPt); ++InsertPt)
+ /* empty */; // Don't insert before PHI nodes or landingpad instrs.
} else {
InvokeInst &II = cast<InvokeInst>(I);
- if (II.getNormalDest()->getSinglePredecessor())
- InsertPt = II.getNormalDest()->getFirstInsertionPt();
- else {
- // We cannot demote invoke instructions to the stack if their normal edge
- // is critical. Therefore, split the critical edge and insert the store
- // in the newly created basic block.
- unsigned SuccNum = GetSuccessorNumber(I.getParent(), II.getNormalDest());
- TerminatorInst *TI = &cast<TerminatorInst>(I);
- assert (isCriticalEdge(TI, SuccNum) &&
- "Expected a critical edge!");
- BasicBlock *BB = SplitCriticalEdge(TI, SuccNum);
- assert (BB && "Unable to split critical edge.");
- InsertPt = BB->getFirstInsertionPt();
- }
+ InsertPt = II.getNormalDest()->getFirstInsertionPt();
}
- for (; isa<PHINode>(InsertPt) || isa<LandingPadInst>(InsertPt); ++InsertPt)
- /* empty */; // Don't insert before PHI nodes or landingpad instrs.
-
new StoreInst(&I, Slot, InsertPt);
return Slot;
}
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 2d0b7dc..c2ef1ac 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -18,7 +18,7 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/AssumptionTracker.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -30,6 +30,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
@@ -308,7 +309,7 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) {
// Walk the existing metadata, adding the complete (perhaps cyclic) chain to
// the set.
- SmallVector<const Value *, 16> Queue(MD.begin(), MD.end());
+ SmallVector<const Metadata *, 16> Queue(MD.begin(), MD.end());
while (!Queue.empty()) {
const MDNode *M = cast<MDNode>(Queue.pop_back_val());
for (unsigned i = 0, ie = M->getNumOperands(); i != ie; ++i)
@@ -319,13 +320,12 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) {
// Now we have a complete set of all metadata in the chains used to specify
// the noalias scopes and the lists of those scopes.
- SmallVector<MDNode *, 16> DummyNodes;
- DenseMap<const MDNode *, TrackingVH<MDNode> > MDMap;
+ SmallVector<TempMDTuple, 16> DummyNodes;
+ DenseMap<const MDNode *, TrackingMDNodeRef> MDMap;
for (SetVector<const MDNode *>::iterator I = MD.begin(), IE = MD.end();
I != IE; ++I) {
- MDNode *Dummy = MDNode::getTemporary(CalledFunc->getContext(), None);
- DummyNodes.push_back(Dummy);
- MDMap[*I] = Dummy;
+ DummyNodes.push_back(MDTuple::getTemporary(CalledFunc->getContext(), None));
+ MDMap[*I].reset(DummyNodes.back().get());
}
// Create new metadata nodes to replace the dummy nodes, replacing old
@@ -333,17 +333,18 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) {
// node.
for (SetVector<const MDNode *>::iterator I = MD.begin(), IE = MD.end();
I != IE; ++I) {
- SmallVector<Value *, 4> NewOps;
+ SmallVector<Metadata *, 4> NewOps;
for (unsigned i = 0, ie = (*I)->getNumOperands(); i != ie; ++i) {
- const Value *V = (*I)->getOperand(i);
+ const Metadata *V = (*I)->getOperand(i);
if (const MDNode *M = dyn_cast<MDNode>(V))
NewOps.push_back(MDMap[M]);
else
- NewOps.push_back(const_cast<Value *>(V));
+ NewOps.push_back(const_cast<Metadata *>(V));
}
- MDNode *NewM = MDNode::get(CalledFunc->getContext(), NewOps),
- *TempM = MDMap[*I];
+ MDNode *NewM = MDNode::get(CalledFunc->getContext(), NewOps);
+ MDTuple *TempM = cast<MDTuple>(MDMap[*I]);
+ assert(TempM->isTemporary() && "Expected temporary node");
TempM->replaceAllUsesWith(NewM);
}
@@ -388,10 +389,6 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) {
NI->setMetadata(LLVMContext::MD_noalias, M);
}
}
-
- // Now that everything has been replaced, delete the dummy nodes.
- for (unsigned i = 0, ie = DummyNodes.size(); i != ie; ++i)
- MDNode::deleteTemporary(DummyNodes[i]);
}
/// AddAliasScopeMetadata - If the inlined function has noalias arguments, then
@@ -516,7 +513,7 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap,
// need to go through several PHIs to see it, and thus could be
// repeated in the Objects list.
SmallPtrSet<const Value *, 4> ObjSet;
- SmallVector<Value *, 4> Scopes, NoAliases;
+ SmallVector<Metadata *, 4> Scopes, NoAliases;
SmallSetVector<const Argument *, 4> NAPtrArgs;
for (unsigned i = 0, ie = PtrArgs.size(); i != ie; ++i) {
@@ -633,9 +630,10 @@ static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) {
DominatorTree DT;
bool DTCalculated = false;
- const Function *CalledFunc = CS.getCalledFunction();
- for (Function::const_arg_iterator I = CalledFunc->arg_begin(),
- E = CalledFunc->arg_end(); I != E; ++I) {
+ Function *CalledFunc = CS.getCalledFunction();
+ for (Function::arg_iterator I = CalledFunc->arg_begin(),
+ E = CalledFunc->arg_end();
+ I != E; ++I) {
unsigned Align = I->getType()->isPointerTy() ? I->getParamAlignment() : 0;
if (Align && !I->hasByValOrInAllocaAttr() && !I->hasNUses(0)) {
if (!DTCalculated) {
@@ -647,8 +645,9 @@ static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) {
// If we can already prove the asserted alignment in the context of the
// caller, then don't bother inserting the assumption.
Value *Arg = CS.getArgument(I->getArgNo());
- if (getKnownAlignment(Arg, IFI.DL, IFI.AT, CS.getInstruction(),
- &DT) >= Align)
+ if (getKnownAlignment(Arg, IFI.DL,
+ &IFI.ACT->getAssumptionCache(*CalledFunc),
+ CS.getInstruction(), &DT) >= Align)
continue;
IRBuilder<>(CS.getInstruction()).CreateAlignmentAssumption(*IFI.DL, Arg,
@@ -748,6 +747,8 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
PointerType *ArgTy = cast<PointerType>(Arg->getType());
Type *AggTy = ArgTy->getElementType();
+ Function *Caller = TheCall->getParent()->getParent();
+
// If the called function is readonly, then it could not mutate the caller's
// copy of the byval'd memory. In this case, it is safe to elide the copy and
// temporary.
@@ -760,8 +761,9 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
// If the pointer is already known to be sufficiently aligned, or if we can
// round it up to a larger alignment, then we don't need a temporary.
- if (getOrEnforceKnownAlignment(Arg, ByValAlignment,
- IFI.DL, IFI.AT, TheCall) >= ByValAlignment)
+ if (getOrEnforceKnownAlignment(Arg, ByValAlignment, IFI.DL,
+ &IFI.ACT->getAssumptionCache(*Caller),
+ TheCall) >= ByValAlignment)
return Arg;
// Otherwise, we have to make a memcpy to get a safe alignment. This is bad
@@ -778,8 +780,6 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
// pointer inside the callee).
Align = std::max(Align, ByValAlignment);
- Function *Caller = TheCall->getParent()->getParent();
-
Value *NewAlloca = new AllocaInst(AggTy, nullptr, Align, Arg->getName(),
&*Caller->begin()->begin());
IFI.StaticAllocas.push_back(cast<AllocaInst>(NewAlloca));
@@ -824,20 +824,42 @@ static bool hasLifetimeMarkers(AllocaInst *AI) {
return false;
}
-/// updateInlinedAtInfo - Helper function used by fixupLineNumbers to
-/// recursively update InlinedAtEntry of a DebugLoc.
-static DebugLoc updateInlinedAtInfo(const DebugLoc &DL,
- const DebugLoc &InlinedAtDL,
- LLVMContext &Ctx) {
- if (MDNode *IA = DL.getInlinedAt(Ctx)) {
- DebugLoc NewInlinedAtDL
- = updateInlinedAtInfo(DebugLoc::getFromDILocation(IA), InlinedAtDL, Ctx);
- return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx),
- NewInlinedAtDL.getAsMDNode(Ctx));
+/// Rebuild the entire inlined-at chain for this instruction so that the top of
+/// the chain now is inlined-at the new call site.
+static DebugLoc
+updateInlinedAtInfo(DebugLoc DL, MDLocation *InlinedAtNode,
+ LLVMContext &Ctx,
+ DenseMap<const MDLocation *, MDLocation *> &IANodes) {
+ SmallVector<MDLocation*, 3> InlinedAtLocations;
+ MDLocation *Last = InlinedAtNode;
+ DebugLoc CurInlinedAt = DL;
+
+ // Gather all the inlined-at nodes
+ while (MDLocation *IA =
+ cast_or_null<MDLocation>(CurInlinedAt.getInlinedAt(Ctx))) {
+ // Skip any we've already built nodes for
+ if (MDLocation *Found = IANodes[IA]) {
+ Last = Found;
+ break;
+ }
+
+ InlinedAtLocations.push_back(IA);
+ CurInlinedAt = DebugLoc::getFromDILocation(IA);
+ }
+
+ // Starting from the top, rebuild the nodes to point to the new inlined-at
+ // location (then rebuilding the rest of the chain behind it) and update the
+ // map of already-constructed inlined-at nodes.
+ for (auto I = InlinedAtLocations.rbegin(), E = InlinedAtLocations.rend();
+ I != E; ++I) {
+ const MDLocation *MD = *I;
+ Last = IANodes[MD] = MDLocation::getDistinct(
+ Ctx, MD->getLine(), MD->getColumn(), MD->getScope(), Last);
}
- return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx),
- InlinedAtDL.getAsMDNode(Ctx));
+ // And finally create the normal location for this instruction, referring to
+ // the new inlined-at chain.
+ return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx), Last);
}
/// fixupLineNumbers - Update inlined instructions' line numbers to
@@ -848,6 +870,20 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
if (TheCallDL.isUnknown())
return;
+ auto &Ctx = Fn->getContext();
+ auto *InlinedAtNode = cast<MDLocation>(TheCallDL.getAsMDNode(Ctx));
+
+ // Create a unique call site, not to be confused with any other call from the
+ // same location.
+ InlinedAtNode = MDLocation::getDistinct(
+ Ctx, InlinedAtNode->getLine(), InlinedAtNode->getColumn(),
+ InlinedAtNode->getScope(), InlinedAtNode->getInlinedAt());
+
+ // Cache the inlined-at nodes as they're built so they are reused, without
+ // this every instruction's inlined-at chain would become distinct from each
+ // other.
+ DenseMap<const MDLocation *, MDLocation *> IANodes;
+
for (; FI != Fn->end(); ++FI) {
for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
BI != BE; ++BI) {
@@ -865,12 +901,19 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
BI->setDebugLoc(TheCallDL);
} else {
- BI->setDebugLoc(updateInlinedAtInfo(DL, TheCallDL, BI->getContext()));
+ BI->setDebugLoc(updateInlinedAtInfo(DL, InlinedAtNode, BI->getContext(), IANodes));
if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(BI)) {
LLVMContext &Ctx = BI->getContext();
MDNode *InlinedAt = BI->getDebugLoc().getInlinedAt(Ctx);
- DVI->setOperand(2, createInlinedVariable(DVI->getVariable(),
- InlinedAt, Ctx));
+ DVI->setOperand(2, MetadataAsValue::get(
+ Ctx, createInlinedVariable(DVI->getVariable(),
+ InlinedAt, Ctx)));
+ } else if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) {
+ LLVMContext &Ctx = BI->getContext();
+ MDNode *InlinedAt = BI->getDebugLoc().getInlinedAt(Ctx);
+ DDI->setOperand(1, MetadataAsValue::get(
+ Ctx, createInlinedVariable(DDI->getVariable(),
+ InlinedAt, Ctx)));
}
}
}
@@ -1026,8 +1069,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// FIXME: We could register any cloned assumptions instead of clearing the
// whole function's cache.
- if (IFI.AT)
- IFI.AT->forgetCachedAssumptions(Caller);
+ if (IFI.ACT)
+ IFI.ACT->getAssumptionCache(*Caller).clear();
}
// If there are any alloca instructions in the block that used to be the entry
@@ -1069,6 +1112,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
FirstNewBlock->getInstList(),
AI, I);
}
+ // Move any dbg.declares describing the allocas into the entry basic block.
+ DIBuilder DIB(*Caller->getParent());
+ for (auto &AI : IFI.StaticAllocas)
+ replaceDbgDeclareForAlloca(AI, AI, DIB, /*Deref=*/false);
}
bool InlinedMustTailCalls = false;
@@ -1398,7 +1445,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// the entries are the same or undef). If so, remove the PHI so it doesn't
// block other optimizations.
if (PHI) {
- if (Value *V = SimplifyInstruction(PHI, IFI.DL, nullptr, nullptr, IFI.AT)) {
+ if (Value *V = SimplifyInstruction(PHI, IFI.DL, nullptr, nullptr,
+ &IFI.ACT->getAssumptionCache(*Caller))) {
PHI->replaceAllUsesWith(V);
PHI->eraseFromParent();
}
diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp
index 51a3d9c..1cba367 100644
--- a/lib/Transforms/Utils/LCSSA.cpp
+++ b/lib/Transforms/Utils/LCSSA.cpp
@@ -61,7 +61,7 @@ static bool isExitBlock(BasicBlock *BB,
/// uses.
static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT,
const SmallVectorImpl<BasicBlock *> &ExitBlocks,
- PredIteratorCache &PredCache) {
+ PredIteratorCache &PredCache, LoopInfo *LI) {
SmallVector<Use *, 16> UsesToRewrite;
BasicBlock *InstBB = Inst.getParent();
@@ -94,6 +94,7 @@ static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT,
DomTreeNode *DomNode = DT.getNode(DomBB);
SmallVector<PHINode *, 16> AddedPHIs;
+ SmallVector<PHINode *, 8> PostProcessPHIs;
SSAUpdater SSAUpdate;
SSAUpdate.Initialize(Inst.getType(), Inst.getName());
@@ -131,6 +132,18 @@ static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT,
// Remember that this phi makes the value alive in this block.
SSAUpdate.AddAvailableValue(ExitBB, PN);
+
+ // LoopSimplify might fail to simplify some loops (e.g. when indirect
+ // branches are involved). In such situations, it might happen that an exit
+ // for Loop L1 is the header of a disjoint Loop L2. Thus, when we create
+ // PHIs in such an exit block, we are also inserting PHIs into L2's header.
+ // This could break LCSSA form for L2 because these inserted PHIs can also
+ // have uses outside of L2. Remember all PHIs in such situation as to
+ // revisit than later on. FIXME: Remove this if indirectbr support into
+ // LoopSimplify gets improved.
+ if (auto *OtherLoop = LI->getLoopFor(ExitBB))
+ if (!L.contains(OtherLoop))
+ PostProcessPHIs.push_back(PN);
}
// Rewrite all uses outside the loop in terms of the new PHIs we just
@@ -157,6 +170,25 @@ static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT,
SSAUpdate.RewriteUse(*UsesToRewrite[i]);
}
+ // Post process PHI instructions that were inserted into another disjoint loop
+ // and update their exits properly.
+ for (auto *I : PostProcessPHIs) {
+ if (I->use_empty())
+ continue;
+
+ BasicBlock *PHIBB = I->getParent();
+ Loop *OtherLoop = LI->getLoopFor(PHIBB);
+ SmallVector<BasicBlock *, 8> EBs;
+ OtherLoop->getExitBlocks(EBs);
+ if (EBs.empty())
+ continue;
+
+ // Recurse and re-process each PHI instruction. FIXME: we should really
+ // convert this entire thing to a worklist approach where we process a
+ // vector of instructions...
+ processInstruction(*OtherLoop, *I, DT, EBs, PredCache, LI);
+ }
+
// Remove PHI nodes that did not have any uses rewritten.
for (unsigned i = 0, e = AddedPHIs.size(); i != e; ++i) {
if (AddedPHIs[i]->use_empty())
@@ -180,7 +212,8 @@ blockDominatesAnExit(BasicBlock *BB,
return false;
}
-bool llvm::formLCSSA(Loop &L, DominatorTree &DT, ScalarEvolution *SE) {
+bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI,
+ ScalarEvolution *SE) {
bool Changed = false;
// Get the set of exiting blocks.
@@ -212,7 +245,7 @@ bool llvm::formLCSSA(Loop &L, DominatorTree &DT, ScalarEvolution *SE) {
!isa<PHINode>(I->user_back())))
continue;
- Changed |= processInstruction(L, *I, DT, ExitBlocks, PredCache);
+ Changed |= processInstruction(L, *I, DT, ExitBlocks, PredCache, LI);
}
}
@@ -228,15 +261,15 @@ bool llvm::formLCSSA(Loop &L, DominatorTree &DT, ScalarEvolution *SE) {
}
/// Process a loop nest depth first.
-bool llvm::formLCSSARecursively(Loop &L, DominatorTree &DT,
+bool llvm::formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI,
ScalarEvolution *SE) {
bool Changed = false;
// Recurse depth-first through inner loops.
- for (Loop::iterator LI = L.begin(), LE = L.end(); LI != LE; ++LI)
- Changed |= formLCSSARecursively(**LI, DT, SE);
+ for (Loop::iterator I = L.begin(), E = L.end(); I != E; ++I)
+ Changed |= formLCSSARecursively(**I, DT, LI, SE);
- Changed |= formLCSSA(L, DT, SE);
+ Changed |= formLCSSA(L, DT, LI, SE);
return Changed;
}
@@ -261,7 +294,7 @@ struct LCSSA : public FunctionPass {
AU.setPreservesCFG();
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfo>();
+ AU.addRequired<LoopInfoWrapperPass>();
AU.addPreservedID(LoopSimplifyID);
AU.addPreserved<AliasAnalysis>();
AU.addPreserved<ScalarEvolution>();
@@ -275,7 +308,7 @@ private:
char LCSSA::ID = 0;
INITIALIZE_PASS_BEGIN(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
Pass *llvm::createLCSSAPass() { return new LCSSA(); }
@@ -285,13 +318,13 @@ char &llvm::LCSSAID = LCSSA::ID;
/// Process all loops in the function, inner-most out.
bool LCSSA::runOnFunction(Function &F) {
bool Changed = false;
- LI = &getAnalysis<LoopInfo>();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
SE = getAnalysisIfAvailable<ScalarEvolution>();
// Simplify each loop nest in the function.
for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
- Changed |= formLCSSARecursively(**I, *DT, SE);
+ Changed |= formLCSSARecursively(**I, *DT, LI, SE);
return Changed;
}
diff --git a/lib/Transforms/Utils/LLVMBuild.txt b/lib/Transforms/Utils/LLVMBuild.txt
index 88b2ffe..6b2d405 100644
--- a/lib/Transforms/Utils/LLVMBuild.txt
+++ b/lib/Transforms/Utils/LLVMBuild.txt
@@ -19,4 +19,4 @@
type = Library
name = TransformUtils
parent = Transforms
-required_libraries = Analysis Core IPA Support Target
+required_libraries = Analysis Core IPA Support
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index c963c51..4830568 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/LibCallSemantics.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -110,11 +111,17 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
}
if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
- // If we are switching on a constant, we can convert the switch into a
- // single branch instruction!
+ // If we are switching on a constant, we can convert the switch to an
+ // unconditional branch.
ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition());
- BasicBlock *TheOnlyDest = SI->getDefaultDest();
- BasicBlock *DefaultDest = TheOnlyDest;
+ BasicBlock *DefaultDest = SI->getDefaultDest();
+ BasicBlock *TheOnlyDest = DefaultDest;
+
+ // If the default is unreachable, ignore it when searching for TheOnlyDest.
+ if (isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg()) &&
+ SI->getNumCases() > 0) {
+ TheOnlyDest = SI->case_begin().getCaseSuccessor();
+ }
// Figure out which case it goes to.
for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
@@ -137,7 +144,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
SmallVector<uint32_t, 8> Weights;
for (unsigned MD_i = 1, MD_e = MD->getNumOperands(); MD_i < MD_e;
++MD_i) {
- ConstantInt* CI = dyn_cast<ConstantInt>(MD->getOperand(MD_i));
+ ConstantInt *CI =
+ mdconst::dyn_extract<ConstantInt>(MD->getOperand(MD_i));
assert(CI);
Weights.push_back(CI->getValue().getZExtValue());
}
@@ -208,8 +216,10 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
SI->getDefaultDest());
MDNode *MD = SI->getMetadata(LLVMContext::MD_prof);
if (MD && MD->getNumOperands() == 3) {
- ConstantInt *SICase = dyn_cast<ConstantInt>(MD->getOperand(2));
- ConstantInt *SIDef = dyn_cast<ConstantInt>(MD->getOperand(1));
+ ConstantInt *SICase =
+ mdconst::dyn_extract<ConstantInt>(MD->getOperand(2));
+ ConstantInt *SIDef =
+ mdconst::dyn_extract<ConstantInt>(MD->getOperand(1));
assert(SICase && SIDef);
// The TrueWeight should be the weight for the single case of SI.
NewBr->setMetadata(LLVMContext::MD_prof,
@@ -486,7 +496,7 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
/// between them, moving the instructions in the predecessor into DestBB and
/// deleting the predecessor block.
///
-void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
+void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT) {
// If BB has single-entry PHI nodes, fold them.
while (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) {
Value *NewVal = PN->getIncomingValue(0);
@@ -522,14 +532,10 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
if (PredBB == &DestBB->getParent()->getEntryBlock())
DestBB->moveAfter(PredBB);
- if (P) {
- if (DominatorTreeWrapperPass *DTWP =
- P->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) {
- DominatorTree &DT = DTWP->getDomTree();
- BasicBlock *PredBBIDom = DT.getNode(PredBB)->getIDom()->getBlock();
- DT.changeImmediateDominator(DestBB, PredBBIDom);
- DT.eraseNode(PredBB);
- }
+ if (DT) {
+ BasicBlock *PredBBIDom = DT->getNode(PredBB)->getIDom()->getBlock();
+ DT->changeImmediateDominator(DestBB, PredBBIDom);
+ DT->eraseNode(PredBB);
}
// Nuke BB.
PredBB->eraseFromParent();
@@ -940,7 +946,7 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align,
/// increase the alignment of the ultimate object, making this check succeed.
unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
const DataLayout *DL,
- AssumptionTracker *AT,
+ AssumptionCache *AC,
const Instruction *CxtI,
const DominatorTree *DT) {
assert(V->getType()->isPointerTy() &&
@@ -948,7 +954,7 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
unsigned BitWidth = DL ? DL->getPointerTypeSizeInBits(V->getType()) : 64;
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- computeKnownBits(V, KnownZero, KnownOne, DL, 0, AT, CxtI, DT);
+ computeKnownBits(V, KnownZero, KnownOne, DL, 0, AC, CxtI, DT);
unsigned TrailZ = KnownZero.countTrailingOnes();
// Avoid trouble with ridiculously large TrailZ values, such as
@@ -1048,7 +1054,7 @@ static bool isArray(AllocaInst *AI) {
/// LowerDbgDeclare - Lowers llvm.dbg.declare intrinsics into appropriate set
/// of llvm.dbg.value intrinsics.
bool llvm::LowerDbgDeclare(Function &F) {
- DIBuilder DIB(*F.getParent());
+ DIBuilder DIB(*F.getParent(), /*AllowUnresolved*/ false);
SmallVector<DbgDeclareInst *, 4> Dbgs;
for (auto &FI : F)
for (BasicBlock::iterator BI : FI)
@@ -1091,19 +1097,21 @@ bool llvm::LowerDbgDeclare(Function &F) {
/// FindAllocaDbgDeclare - Finds the llvm.dbg.declare intrinsic describing the
/// alloca 'V', if any.
DbgDeclareInst *llvm::FindAllocaDbgDeclare(Value *V) {
- if (MDNode *DebugNode = MDNode::getIfExists(V->getContext(), V))
- for (User *U : DebugNode->users())
- if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
- return DDI;
+ if (auto *L = LocalAsMetadata::getIfExists(V))
+ if (auto *MDV = MetadataAsValue::getIfExists(V->getContext(), L))
+ for (User *U : MDV->users())
+ if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
+ return DDI;
return nullptr;
}
bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
- DIBuilder &Builder) {
+ DIBuilder &Builder, bool Deref) {
DbgDeclareInst *DDI = FindAllocaDbgDeclare(AI);
if (!DDI)
return false;
+ DebugLoc Loc = DDI->getDebugLoc();
DIVariable DIVar(DDI->getVariable());
DIExpression DIExpr(DDI->getExpression());
assert((!DIVar || DIVar.isVariable()) &&
@@ -1111,23 +1119,24 @@ bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
if (!DIVar)
return false;
- // Create a copy of the original DIDescriptor for user variable, appending
- // "deref" operation to a list of address elements, as new llvm.dbg.declare
- // will take a value storing address of the memory for variable, not
- // alloca itself.
- SmallVector<int64_t, 4> NewDIExpr;
- if (DIExpr) {
- for (unsigned i = 0, n = DIExpr.getNumElements(); i < n; ++i) {
- NewDIExpr.push_back(DIExpr.getElement(i));
- }
+ if (Deref) {
+ // Create a copy of the original DIDescriptor for user variable, prepending
+ // "deref" operation to a list of address elements, as new llvm.dbg.declare
+ // will take a value storing address of the memory for variable, not
+ // alloca itself.
+ SmallVector<uint64_t, 4> NewDIExpr;
+ NewDIExpr.push_back(dwarf::DW_OP_deref);
+ if (DIExpr)
+ for (unsigned i = 0, n = DIExpr.getNumElements(); i < n; ++i)
+ NewDIExpr.push_back(DIExpr.getElement(i));
+ DIExpr = Builder.createExpression(NewDIExpr);
}
- NewDIExpr.push_back(dwarf::DW_OP_deref);
// Insert llvm.dbg.declare in the same basic block as the original alloca,
// and remove old llvm.dbg.declare.
BasicBlock *BB = AI->getParent();
- Builder.insertDeclare(NewAllocaAddress, DIVar,
- Builder.createExpression(NewDIExpr), BB);
+ Builder.insertDeclare(NewAllocaAddress, DIVar, DIExpr, BB)
+ ->setDebugLoc(Loc);
DDI->eraseFromParent();
return true;
}
@@ -1252,7 +1261,7 @@ static bool markAliveBlocks(BasicBlock *BB,
if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
changeToUnreachable(II, true);
Changed = true;
- } else if (II->doesNotThrow()) {
+ } else if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(II)) {
if (II->use_empty() && II->onlyReadsMemory()) {
// jump to the normal destination branch.
BranchInst::Create(II->getNormalDest(), II);
@@ -1326,6 +1335,8 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J, ArrayRef<unsign
K->setMetadata(Kind, MDNode::getMostGenericTBAA(JMD, KMD));
break;
case LLVMContext::MD_alias_scope:
+ K->setMetadata(Kind, MDNode::getMostGenericAliasScope(JMD, KMD));
+ break;
case LLVMContext::MD_noalias:
K->setMetadata(Kind, MDNode::intersect(JMD, KMD));
break;
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index af0501f..a0f8268 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -44,7 +44,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/AssumptionTracker.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/DependenceAnalysis.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -113,6 +113,14 @@ static void placeSplitBlockCarefully(BasicBlock *NewBB,
BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) {
BasicBlock *Header = L->getHeader();
+ // Get analyses that we try to update.
+ auto *AA = PP->getAnalysisIfAvailable<AliasAnalysis>();
+ auto *DTWP = PP->getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ auto *LIWP = PP->getAnalysisIfAvailable<LoopInfoWrapperPass>();
+ auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
+ bool PreserveLCSSA = PP->mustPreserveAnalysisID(LCSSAID);
+
// Compute the set of predecessors of the loop that are not in the loop.
SmallVector<BasicBlock*, 8> OutsideBlocks;
for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
@@ -131,15 +139,8 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) {
// Split out the loop pre-header.
BasicBlock *PreheaderBB;
- if (!Header->isLandingPad()) {
- PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader",
- PP);
- } else {
- SmallVector<BasicBlock*, 2> NewBBs;
- SplitLandingPadPredecessors(Header, OutsideBlocks, ".preheader",
- ".split-lp", PP, NewBBs);
- PreheaderBB = NewBBs[0];
- }
+ PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader",
+ AA, DT, LI, PreserveLCSSA);
PreheaderBB->getTerminator()->setDebugLoc(
Header->getFirstNonPHI()->getDebugLoc());
@@ -157,7 +158,9 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) {
///
/// This method is used to split exit blocks that have predecessors outside of
/// the loop.
-static BasicBlock *rewriteLoopExitBlock(Loop *L, BasicBlock *Exit, Pass *PP) {
+static BasicBlock *rewriteLoopExitBlock(Loop *L, BasicBlock *Exit,
+ AliasAnalysis *AA, DominatorTree *DT,
+ LoopInfo *LI, Pass *PP) {
SmallVector<BasicBlock*, 8> LoopBlocks;
for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) {
BasicBlock *P = *I;
@@ -172,15 +175,10 @@ static BasicBlock *rewriteLoopExitBlock(Loop *L, BasicBlock *Exit, Pass *PP) {
assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?");
BasicBlock *NewExitBB = nullptr;
- if (Exit->isLandingPad()) {
- SmallVector<BasicBlock*, 2> NewBBs;
- SplitLandingPadPredecessors(Exit, LoopBlocks,
- ".loopexit", ".nonloopexit",
- PP, NewBBs);
- NewExitBB = NewBBs[0];
- } else {
- NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", PP);
- }
+ bool PreserveLCSSA = PP->mustPreserveAnalysisID(LCSSAID);
+
+ NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", AA, DT,
+ LI, PreserveLCSSA);
DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
<< NewExitBB->getName() << "\n");
@@ -210,11 +208,11 @@ static void addBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock,
/// us how to partition the loops.
static PHINode *findPHIToPartitionLoops(Loop *L, AliasAnalysis *AA,
DominatorTree *DT,
- AssumptionTracker *AT) {
+ AssumptionCache *AC) {
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) {
PHINode *PN = cast<PHINode>(I);
++I;
- if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, DT, AT)) {
+ if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, DT, AC)) {
// This is a degenerate PHI already, don't modify it!
PN->replaceAllUsesWith(V);
if (AA) AA->deleteValue(PN);
@@ -254,7 +252,7 @@ static PHINode *findPHIToPartitionLoops(Loop *L, AliasAnalysis *AA,
static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
AliasAnalysis *AA, DominatorTree *DT,
LoopInfo *LI, ScalarEvolution *SE, Pass *PP,
- AssumptionTracker *AT) {
+ AssumptionCache *AC) {
// Don't try to separate loops without a preheader.
if (!Preheader)
return nullptr;
@@ -263,7 +261,7 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
assert(!L->getHeader()->isLandingPad() &&
"Can't insert backedge to landing pad");
- PHINode *PN = findPHIToPartitionLoops(L, AA, DT, AT);
+ PHINode *PN = findPHIToPartitionLoops(L, AA, DT, AC);
if (!PN) return nullptr; // No known way to partition.
// Pull out all predecessors that have varying values in the loop. This
@@ -287,9 +285,11 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
if (SE)
SE->forgetLoop(L);
+ bool PreserveLCSSA = PP->mustPreserveAnalysisID(LCSSAID);
+
BasicBlock *Header = L->getHeader();
- BasicBlock *NewBB =
- SplitBlockPredecessors(Header, OuterLoopPreds, ".outer", PP);
+ BasicBlock *NewBB = SplitBlockPredecessors(Header, OuterLoopPreds, ".outer",
+ AA, DT, LI, PreserveLCSSA);
// Make sure that NewBB is put someplace intelligent, which doesn't mess up
// code layout too horribly.
@@ -460,7 +460,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
// Update Loop Information - we know that this block is now in the current
// loop and all parent loops.
- L->addBasicBlockToLoop(BEBlock, LI->getBase());
+ L->addBasicBlockToLoop(BEBlock, *LI);
// Update dominator information
DT->splitBlock(BEBlock);
@@ -476,8 +476,8 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
/// explicit if they accepted the analysis directly and then updated it.
static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist,
AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI,
- ScalarEvolution *SE, Pass *PP,
- const DataLayout *DL, AssumptionTracker *AT) {
+ ScalarEvolution *SE, Pass *PP, const DataLayout *DL,
+ AssumptionCache *AC) {
bool Changed = false;
ReprocessLoop:
@@ -567,7 +567,7 @@ ReprocessLoop:
// Must be exactly this loop: no subloops, parent loops, or non-loop preds
// allowed.
if (!L->contains(*PI)) {
- if (rewriteLoopExitBlock(L, ExitBlock, PP)) {
+ if (rewriteLoopExitBlock(L, ExitBlock, AA, DT, LI, PP)) {
++NumInserted;
Changed = true;
}
@@ -583,8 +583,8 @@ ReprocessLoop:
// this for loops with a giant number of backedges, just factor them into a
// common backedge instead.
if (L->getNumBackEdges() < 8) {
- if (Loop *OuterL = separateNestedLoop(L, Preheader, AA, DT, LI, SE,
- PP, AT)) {
+ if (Loop *OuterL =
+ separateNestedLoop(L, Preheader, AA, DT, LI, SE, PP, AC)) {
++NumNested;
// Enqueue the outer loop as it should be processed next in our
// depth-first nest walk.
@@ -614,7 +614,7 @@ ReprocessLoop:
PHINode *PN;
for (BasicBlock::iterator I = L->getHeader()->begin();
(PN = dyn_cast<PHINode>(I++)); )
- if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, DT, AT)) {
+ if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, DT, AC)) {
if (AA) AA->deleteValue(PN);
if (SE) SE->forgetValue(PN);
PN->replaceAllUsesWith(V);
@@ -714,7 +714,7 @@ ReprocessLoop:
bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP,
AliasAnalysis *AA, ScalarEvolution *SE,
- const DataLayout *DL, AssumptionTracker *AT) {
+ const DataLayout *DL, AssumptionCache *AC) {
bool Changed = false;
// Worklist maintains our depth-first queue of loops in this nest to process.
@@ -726,13 +726,12 @@ bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP,
// order. We can use this simple process because loops form a tree.
for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) {
Loop *L2 = Worklist[Idx];
- for (Loop::iterator I = L2->begin(), E = L2->end(); I != E; ++I)
- Worklist.push_back(*I);
+ Worklist.append(L2->begin(), L2->end());
}
while (!Worklist.empty())
Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, AA, DT, LI,
- SE, PP, DL, AT);
+ SE, PP, DL, AC);
return Changed;
}
@@ -751,19 +750,19 @@ namespace {
LoopInfo *LI;
ScalarEvolution *SE;
const DataLayout *DL;
- AssumptionTracker *AT;
+ AssumptionCache *AC;
bool runOnFunction(Function &F) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionTracker>();
+ AU.addRequired<AssumptionCacheTracker>();
// We need loop information to identify the loops...
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfo>();
- AU.addPreserved<LoopInfo>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
AU.addPreserved<AliasAnalysis>();
AU.addPreserved<ScalarEvolution>();
@@ -779,9 +778,9 @@ namespace {
char LoopSimplify::ID = 0;
INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify",
"Canonicalize natural loops", false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(LoopSimplify, "loop-simplify",
"Canonicalize natural loops", false, false)
@@ -795,16 +794,16 @@ Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
bool LoopSimplify::runOnFunction(Function &F) {
bool Changed = false;
AA = getAnalysisIfAvailable<AliasAnalysis>();
- LI = &getAnalysis<LoopInfo>();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
SE = getAnalysisIfAvailable<ScalarEvolution>();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
DL = DLP ? &DLP->getDataLayout() : nullptr;
- AT = &getAnalysis<AssumptionTracker>();
+ AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
// Simplify each loop nest in the function.
for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
- Changed |= simplifyLoop(*I, DT, LI, this, AA, SE, DL, AT);
+ Changed |= simplifyLoop(*I, DT, LI, this, AA, SE, DL, AC);
return Changed;
}
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index 0e1baa1..accb731 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -19,7 +19,7 @@
#include "llvm/Transforms/Utils/UnrollLoop.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AssumptionTracker.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/LoopPass.h"
@@ -154,9 +154,8 @@ FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, LPPassManager *LPM,
/// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are
/// available from the Pass it must also preserve those analyses.
bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
- bool AllowRuntime, unsigned TripMultiple,
- LoopInfo *LI, Pass *PP, LPPassManager *LPM,
- AssumptionTracker *AT) {
+ bool AllowRuntime, unsigned TripMultiple, LoopInfo *LI,
+ Pass *PP, LPPassManager *LPM, AssumptionCache *AC) {
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) {
DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n");
@@ -312,7 +311,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
// Tell LI about New.
if (*BB == Header) {
assert(LI->getLoopFor(*BB) == L && "Header should not be in a sub-loop");
- L->addBasicBlockToLoop(New, LI->getBase());
+ L->addBasicBlockToLoop(New, *LI);
} else {
// Figure out which loop New is in.
const Loop *OldLoop = LI->getLoopFor(*BB);
@@ -334,7 +333,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
if (SE)
SE->forgetLoop(OldLoop);
}
- NewLoop->addBasicBlockToLoop(New, LI->getBase());
+ NewLoop->addBasicBlockToLoop(New, *LI);
}
if (*BB == Header)
@@ -473,7 +472,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
// FIXME: We could register any cloned assumptions instead of clearing the
// whole function's cache.
- AT->forgetCachedAssumptions(F);
+ AC->clear();
DominatorTree *DT = nullptr;
if (PP) {
@@ -534,7 +533,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
if (OuterL) {
DataLayoutPass *DLP = PP->getAnalysisIfAvailable<DataLayoutPass>();
const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
- simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ nullptr, SE, DL, AT);
+ simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ nullptr, SE, DL, AC);
// LCSSA must be performed on the outermost affected loop. The unrolled
// loop's last loop latch is guaranteed to be in the outermost loop after
@@ -544,9 +543,32 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
while (OuterL->getParentLoop() != LatchLoop)
OuterL = OuterL->getParentLoop();
- formLCSSARecursively(*OuterL, *DT, SE);
+ formLCSSARecursively(*OuterL, *DT, LI, SE);
}
}
return true;
}
+
+/// Given an llvm.loop loop id metadata node, returns the loop hint metadata
+/// node with the given name (for example, "llvm.loop.unroll.count"). If no
+/// such metadata node exists, then nullptr is returned.
+MDNode *llvm::GetUnrollMetadata(MDNode *LoopID, StringRef Name) {
+ // First operand should refer to the loop id itself.
+ assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
+ assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
+
+ for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
+ MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
+ if (!MD)
+ continue;
+
+ MDString *S = dyn_cast<MDString>(MD->getOperand(0));
+ if (!S)
+ continue;
+
+ if (Name.equals(S->getString()))
+ return MD;
+ }
+ return nullptr;
+}
diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 3d91336..91b688c 100644
--- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -23,14 +23,17 @@
#include "llvm/Transforms/Utils/UnrollLoop.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Metadata.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include <algorithm>
@@ -55,10 +58,11 @@ STATISTIC(NumRuntimeUnrolled,
/// - Branch around the original loop if the trip count is less
/// than the unroll factor.
///
-static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count,
+static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
BasicBlock *LastPrologBB, BasicBlock *PrologEnd,
BasicBlock *OrigPH, BasicBlock *NewPH,
- ValueToValueMapTy &VMap, Pass *P) {
+ ValueToValueMapTy &VMap, AliasAnalysis *AA,
+ DominatorTree *DT, LoopInfo *LI, Pass *P) {
BasicBlock *Latch = L->getLoopLatch();
assert(Latch && "Loop must have a latch");
@@ -105,23 +109,25 @@ static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count,
}
}
- // Create a branch around the orignal loop, which is taken if the
- // trip count is less than the unroll factor.
+ // Create a branch around the orignal loop, which is taken if there are no
+ // iterations remaining to be executed after running the prologue.
Instruction *InsertPt = PrologEnd->getTerminator();
+
+ assert(Count != 0 && "nonsensical Count!");
+
+ // If BECount <u (Count - 1) then (BECount + 1) & (Count - 1) == (BECount + 1)
+ // (since Count is a power of 2). This means %xtraiter is (BECount + 1) and
+ // and all of the iterations of this loop were executed by the prologue. Note
+ // that if BECount <u (Count - 1) then (BECount + 1) cannot unsigned-overflow.
Instruction *BrLoopExit =
- new ICmpInst(InsertPt, ICmpInst::ICMP_ULT, TripCount,
- ConstantInt::get(TripCount->getType(), Count));
+ new ICmpInst(InsertPt, ICmpInst::ICMP_ULT, BECount,
+ ConstantInt::get(BECount->getType(), Count - 1));
BasicBlock *Exit = L->getUniqueExitBlock();
assert(Exit && "Loop must have a single exit block only");
// Split the exit to maintain loop canonicalization guarantees
SmallVector<BasicBlock*, 4> Preds(pred_begin(Exit), pred_end(Exit));
- if (!Exit->isLandingPad()) {
- SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", P);
- } else {
- SmallVector<BasicBlock*, 2> NewBBs;
- SplitLandingPadPredecessors(Exit, Preds, ".unr1-lcssa", ".unr2-lcssa",
- P, NewBBs);
- }
+ SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", AA, DT, LI,
+ P->mustPreserveAnalysisID(LCSSAID));
// Add the branch to the exit block (around the unrolled loop)
BranchInst::Create(Exit, NewPH, BrLoopExit, InsertPt);
InsertPt->eraseFromParent();
@@ -160,9 +166,9 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
NewBlocks.push_back(NewBB);
if (NewLoop)
- NewLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+ NewLoop->addBasicBlockToLoop(NewBB, *LI);
else if (ParentLoop)
- ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+ ParentLoop->addBasicBlockToLoop(NewBB, *LI);
VMap[*BB] = NewBB;
if (Header == *BB) {
@@ -217,9 +223,9 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
}
if (NewLoop) {
// Add unroll disable metadata to disable future unrolling for this loop.
- SmallVector<Value *, 4> Vals;
+ SmallVector<Metadata *, 4> MDs;
// Reserve first location for self reference to the LoopID metadata node.
- Vals.push_back(nullptr);
+ MDs.push_back(nullptr);
MDNode *LoopID = NewLoop->getLoopID();
if (LoopID) {
// First remove any existing loop unrolling metadata.
@@ -230,17 +236,18 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
const MDString *S = dyn_cast<MDString>(MD->getOperand(0));
IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll.");
}
- if (!IsUnrollMetadata) Vals.push_back(LoopID->getOperand(i));
+ if (!IsUnrollMetadata)
+ MDs.push_back(LoopID->getOperand(i));
}
}
LLVMContext &Context = NewLoop->getHeader()->getContext();
- SmallVector<Value *, 1> DisableOperands;
+ SmallVector<Metadata *, 1> DisableOperands;
DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable"));
MDNode *DisableNode = MDNode::get(Context, DisableOperands);
- Vals.push_back(DisableNode);
+ MDs.push_back(DisableNode);
- MDNode *NewLoopID = MDNode::get(Context, Vals);
+ MDNode *NewLoopID = MDNode::get(Context, MDs);
// Set operand 0 to refer to the loop id itself.
NewLoopID->replaceOperandWith(0, NewLoopID);
NewLoop->setLoopID(NewLoopID);
@@ -291,23 +298,28 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
// Only unroll loops with a computable trip count and the trip count needs
// to be an int value (allowing a pointer type is a TODO item)
- const SCEV *BECount = SE->getBackedgeTakenCount(L);
- if (isa<SCEVCouldNotCompute>(BECount) || !BECount->getType()->isIntegerTy())
+ const SCEV *BECountSC = SE->getBackedgeTakenCount(L);
+ if (isa<SCEVCouldNotCompute>(BECountSC) ||
+ !BECountSC->getType()->isIntegerTy())
return false;
- // If BECount is INT_MAX, we can't compute trip-count without overflow.
- if (BECount->isAllOnesValue())
- return false;
+ unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth();
// Add 1 since the backedge count doesn't include the first loop iteration
const SCEV *TripCountSC =
- SE->getAddExpr(BECount, SE->getConstant(BECount->getType(), 1));
+ SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));
if (isa<SCEVCouldNotCompute>(TripCountSC))
return false;
// We only handle cases when the unroll factor is a power of 2.
// Count is the loop unroll factor, the number of extra copies added + 1.
- if ((Count & (Count-1)) != 0)
+ if (!isPowerOf2_32(Count))
+ return false;
+
+ // This constraint lets us deal with an overflowing trip count easily; see the
+ // comment on ModVal below. This check is equivalent to `Log2(Count) <
+ // BEWidth`.
+ if (static_cast<uint64_t>(Count) > (1ULL << BEWidth))
return false;
// If this loop is nested, then the loop unroller changes the code in
@@ -315,13 +327,17 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
if (Loop *ParentLoop = L->getParentLoop())
SE->forgetLoop(ParentLoop);
+ // Grab analyses that we preserve.
+ auto *DTWP = LPM->getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+
BasicBlock *PH = L->getLoopPreheader();
BasicBlock *Header = L->getHeader();
BasicBlock *Latch = L->getLoopLatch();
// It helps to splits the original preheader twice, one for the end of the
// prolog code and one for a new loop preheader
- BasicBlock *PEnd = SplitEdge(PH, Header, LPM->getAsPass());
- BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), LPM->getAsPass());
+ BasicBlock *PEnd = SplitEdge(PH, Header, DT, LI);
+ BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), DT, LI);
BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator());
// Compute the number of extra iterations required, which is:
@@ -329,16 +345,23 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
SCEVExpander Expander(*SE, "loop-unroll");
Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
PreHeaderBR);
+ Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(),
+ PreHeaderBR);
IRBuilder<> B(PreHeaderBR);
Value *ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter");
- // Check if for no extra iterations, then jump to cloned/unrolled loop.
- // We have to check that the trip count computation didn't overflow when
- // adding one to the backedge taken count.
- Value *LCmp = B.CreateIsNotNull(ModVal, "lcmp.mod");
- Value *OverflowCheck = B.CreateIsNull(TripCount, "lcmp.overflow");
- Value *BranchVal = B.CreateOr(OverflowCheck, LCmp, "lcmp.or");
+ // If ModVal is zero, we know that either
+ // 1. there are no iteration to be run in the prologue loop
+ // OR
+ // 2. the addition computing TripCount overflowed
+ //
+ // If (2) is true, we know that TripCount really is (1 << BEWidth) and so the
+ // number of iterations that remain to be run in the original loop is a
+ // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we
+ // explicitly check this above).
+
+ Value *BranchVal = B.CreateIsNotNull(ModVal, "lcmp.mod");
// Branch to either the extra iterations or the cloned/unrolled loop
// We will fix up the true branch label when adding loop body copies
@@ -361,10 +384,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
std::vector<BasicBlock *> NewBlocks;
ValueToValueMapTy VMap;
- // If unroll count is 2 and we can't overflow in tripcount computation (which
- // is BECount + 1), then we don't need a loop for prologue, and we can unroll
- // it. We can be sure that we don't overflow only if tripcount is a constant.
- bool UnrollPrologue = (Count == 2 && isa<ConstantInt>(TripCount));
+ bool UnrollPrologue = Count == 2;
// Clone all the basic blocks in the loop. If Count is 2, we don't clone
// the loop, otherwise we create a cloned loop to execute the extra
@@ -390,8 +410,8 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
// Connect the prolog code to the original loop and update the
// PHI functions.
BasicBlock *LastLoopBB = cast<BasicBlock>(VMap[Latch]);
- ConnectProlog(L, TripCount, Count, LastLoopBB, PEnd, PH, NewPH, VMap,
- LPM->getAsPass());
+ ConnectProlog(L, BECount, Count, LastLoopBB, PEnd, PH, NewPH, VMap,
+ /*AliasAnalysis*/ nullptr, DT, LI, LPM->getAsPass());
NumRuntimeUnrolled++;
return true;
}
diff --git a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
deleted file mode 100644
index ff89e74..0000000
--- a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
+++ /dev/null
@@ -1,188 +0,0 @@
-//===- LowerExpectIntrinsic.cpp - Lower expect intrinsic ------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass lowers the 'expect' intrinsic to LLVM metadata.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/MDBuilder.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include <vector>
-
-using namespace llvm;
-
-#define DEBUG_TYPE "lower-expect-intrinsic"
-
-STATISTIC(IfHandled, "Number of 'expect' intrinsic instructions handled");
-
-static cl::opt<uint32_t>
-LikelyBranchWeight("likely-branch-weight", cl::Hidden, cl::init(64),
- cl::desc("Weight of the branch likely to be taken (default = 64)"));
-static cl::opt<uint32_t>
-UnlikelyBranchWeight("unlikely-branch-weight", cl::Hidden, cl::init(4),
- cl::desc("Weight of the branch unlikely to be taken (default = 4)"));
-
-namespace {
-
- class LowerExpectIntrinsic : public FunctionPass {
-
- bool HandleSwitchExpect(SwitchInst *SI);
-
- bool HandleIfExpect(BranchInst *BI);
-
- public:
- static char ID;
- LowerExpectIntrinsic() : FunctionPass(ID) {
- initializeLowerExpectIntrinsicPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override;
- };
-}
-
-
-bool LowerExpectIntrinsic::HandleSwitchExpect(SwitchInst *SI) {
- CallInst *CI = dyn_cast<CallInst>(SI->getCondition());
- if (!CI)
- return false;
-
- Function *Fn = CI->getCalledFunction();
- if (!Fn || Fn->getIntrinsicID() != Intrinsic::expect)
- return false;
-
- Value *ArgValue = CI->getArgOperand(0);
- ConstantInt *ExpectedValue = dyn_cast<ConstantInt>(CI->getArgOperand(1));
- if (!ExpectedValue)
- return false;
-
- SwitchInst::CaseIt Case = SI->findCaseValue(ExpectedValue);
- unsigned n = SI->getNumCases(); // +1 for default case.
- std::vector<uint32_t> Weights(n + 1);
-
- Weights[0] = Case == SI->case_default() ? LikelyBranchWeight
- : UnlikelyBranchWeight;
- for (unsigned i = 0; i != n; ++i)
- Weights[i + 1] = i == Case.getCaseIndex() ? LikelyBranchWeight
- : UnlikelyBranchWeight;
-
- SI->setMetadata(LLVMContext::MD_prof,
- MDBuilder(CI->getContext()).createBranchWeights(Weights));
-
- SI->setCondition(ArgValue);
- return true;
-}
-
-
-bool LowerExpectIntrinsic::HandleIfExpect(BranchInst *BI) {
- if (BI->isUnconditional())
- return false;
-
- // Handle non-optimized IR code like:
- // %expval = call i64 @llvm.expect.i64(i64 %conv1, i64 1)
- // %tobool = icmp ne i64 %expval, 0
- // br i1 %tobool, label %if.then, label %if.end
- //
- // Or the following simpler case:
- // %expval = call i1 @llvm.expect.i1(i1 %cmp, i1 1)
- // br i1 %expval, label %if.then, label %if.end
-
- CallInst *CI;
-
- ICmpInst *CmpI = dyn_cast<ICmpInst>(BI->getCondition());
- if (!CmpI) {
- CI = dyn_cast<CallInst>(BI->getCondition());
- } else {
- if (CmpI->getPredicate() != CmpInst::ICMP_NE)
- return false;
- CI = dyn_cast<CallInst>(CmpI->getOperand(0));
- }
-
- if (!CI)
- return false;
-
- Function *Fn = CI->getCalledFunction();
- if (!Fn || Fn->getIntrinsicID() != Intrinsic::expect)
- return false;
-
- Value *ArgValue = CI->getArgOperand(0);
- ConstantInt *ExpectedValue = dyn_cast<ConstantInt>(CI->getArgOperand(1));
- if (!ExpectedValue)
- return false;
-
- MDBuilder MDB(CI->getContext());
- MDNode *Node;
-
- // If expect value is equal to 1 it means that we are more likely to take
- // branch 0, in other case more likely is branch 1.
- if (ExpectedValue->isOne())
- Node = MDB.createBranchWeights(LikelyBranchWeight, UnlikelyBranchWeight);
- else
- Node = MDB.createBranchWeights(UnlikelyBranchWeight, LikelyBranchWeight);
-
- BI->setMetadata(LLVMContext::MD_prof, Node);
-
- if (CmpI)
- CmpI->setOperand(0, ArgValue);
- else
- BI->setCondition(ArgValue);
- return true;
-}
-
-
-bool LowerExpectIntrinsic::runOnFunction(Function &F) {
- for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
- BasicBlock *BB = I++;
-
- // Create "block_weights" metadata.
- if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
- if (HandleIfExpect(BI))
- IfHandled++;
- } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
- if (HandleSwitchExpect(SI))
- IfHandled++;
- }
-
- // remove llvm.expect intrinsics.
- for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
- BI != BE; ) {
- CallInst *CI = dyn_cast<CallInst>(BI++);
- if (!CI)
- continue;
-
- Function *Fn = CI->getCalledFunction();
- if (Fn && Fn->getIntrinsicID() == Intrinsic::expect) {
- Value *Exp = CI->getArgOperand(0);
- CI->replaceAllUsesWith(Exp);
- CI->eraseFromParent();
- }
- }
- }
-
- return false;
-}
-
-
-char LowerExpectIntrinsic::ID = 0;
-INITIALIZE_PASS(LowerExpectIntrinsic, "lower-expect", "Lower 'expect' "
- "Intrinsics", false, false)
-
-FunctionPass *llvm::createLowerExpectIntrinsicPass() {
- return new LowerExpectIntrinsic();
-}
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index a0105c2..b3bdae4 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -32,6 +32,23 @@ using namespace llvm;
#define DEBUG_TYPE "lower-switch"
namespace {
+ struct IntRange {
+ int64_t Low, High;
+ };
+ // Return true iff R is covered by Ranges.
+ static bool IsInRanges(const IntRange &R,
+ const std::vector<IntRange> &Ranges) {
+ // Note: Ranges must be sorted, non-overlapping and non-adjacent.
+
+ // Find the first range whose High field is >= R.High,
+ // then check if the Low field is <= R.Low. If so, we
+ // have a Range that covers R.
+ auto I = std::lower_bound(
+ Ranges.begin(), Ranges.end(), R,
+ [](const IntRange &A, const IntRange &B) { return A.High < B.High; });
+ return I != Ranges.end() && I->Low <= R.Low;
+ }
+
/// LowerSwitch Pass - Replace all SwitchInst instructions with chained branch
/// instructions.
class LowerSwitch : public FunctionPass {
@@ -46,18 +63,16 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
// This is a cluster of orthogonal Transforms
AU.addPreserved<UnifyFunctionExitNodes>();
- AU.addPreserved("mem2reg");
AU.addPreservedID(LowerInvokePassID);
}
struct CaseRange {
- Constant* Low;
- Constant* High;
+ ConstantInt* Low;
+ ConstantInt* High;
BasicBlock* BB;
- CaseRange(Constant *low = nullptr, Constant *high = nullptr,
- BasicBlock *bb = nullptr) :
- Low(low), High(high), BB(bb) { }
+ CaseRange(ConstantInt *low, ConstantInt *high, BasicBlock *bb)
+ : Low(low), High(high), BB(bb) {}
};
typedef std::vector<CaseRange> CaseVector;
@@ -68,7 +83,8 @@ namespace {
BasicBlock *switchConvert(CaseItr Begin, CaseItr End,
ConstantInt *LowerBound, ConstantInt *UpperBound,
Value *Val, BasicBlock *Predecessor,
- BasicBlock *OrigBlock, BasicBlock *Default);
+ BasicBlock *OrigBlock, BasicBlock *Default,
+ const std::vector<IntRange> &UnreachableRanges);
BasicBlock *newLeafBlock(CaseRange &Leaf, Value *Val, BasicBlock *OrigBlock,
BasicBlock *Default);
unsigned Clusterify(CaseVector &Cases, SwitchInst *SI);
@@ -131,25 +147,39 @@ static raw_ostream& operator<<(raw_ostream &O,
return O << "]";
}
-/// \brief Update the first occurrence of the "switch statement" BB in the PHI
-/// node with the "new" BB. The other occurrences will be updated by subsequent
-/// calls to this function.
-///
-/// Switch statements may have more than one incoming edge into the same BB if
-/// they all have the same value. When the switch statement is converted these
-/// incoming edges are now coming from multiple BBs.
-static void fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB) {
- for (BasicBlock::iterator I = SuccBB->begin(), E = SuccBB->getFirstNonPHI();
- I != E; ++I) {
+// \brief Update the first occurrence of the "switch statement" BB in the PHI
+// node with the "new" BB. The other occurrences will:
+//
+// 1) Be updated by subsequent calls to this function. Switch statements may
+// have more than one outcoming edge into the same BB if they all have the same
+// value. When the switch statement is converted these incoming edges are now
+// coming from multiple BBs.
+// 2) Removed if subsequent incoming values now share the same case, i.e.,
+// multiple outcome edges are condensed into one. This is necessary to keep the
+// number of phi values equal to the number of branches to SuccBB.
+static void fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB,
+ unsigned NumMergedCases) {
+ for (BasicBlock::iterator I = SuccBB->begin(), IE = SuccBB->getFirstNonPHI();
+ I != IE; ++I) {
PHINode *PN = cast<PHINode>(I);
// Only update the first occurence.
- for (unsigned Idx = 0, E = PN->getNumIncomingValues(); Idx != E; ++Idx) {
+ unsigned Idx = 0, E = PN->getNumIncomingValues();
+ unsigned LocalNumMergedCases = NumMergedCases;
+ for (; Idx != E; ++Idx) {
if (PN->getIncomingBlock(Idx) == OrigBB) {
PN->setIncomingBlock(Idx, NewBB);
break;
}
}
+
+ // Remove additional occurences coming from condensed cases and keep the
+ // number of incoming values equal to the number of branches to SuccBB.
+ for (++Idx; LocalNumMergedCases > 0 && Idx < E; ++Idx)
+ if (PN->getIncomingBlock(Idx) == OrigBB) {
+ PN->removeIncomingValue(Idx);
+ LocalNumMergedCases--;
+ }
}
}
@@ -158,12 +188,12 @@ static void fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB) {
// LowerBound and UpperBound are used to keep track of the bounds for Val
// that have already been checked by a block emitted by one of the previous
// calls to switchConvert in the call stack.
-BasicBlock *LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,
- ConstantInt *LowerBound,
- ConstantInt *UpperBound, Value *Val,
- BasicBlock *Predecessor,
- BasicBlock *OrigBlock,
- BasicBlock *Default) {
+BasicBlock *
+LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
+ ConstantInt *UpperBound, Value *Val,
+ BasicBlock *Predecessor, BasicBlock *OrigBlock,
+ BasicBlock *Default,
+ const std::vector<IntRange> &UnreachableRanges) {
unsigned Size = End - Begin;
if (Size == 1) {
@@ -172,7 +202,11 @@ BasicBlock *LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,
// emitting the code that checks if the value actually falls in the range
// because the bounds already tell us so.
if (Begin->Low == LowerBound && Begin->High == UpperBound) {
- fixPhis(Begin->BB, OrigBlock, Predecessor);
+ unsigned NumMergedCases = 0;
+ if (LowerBound && UpperBound)
+ NumMergedCases =
+ UpperBound->getSExtValue() - LowerBound->getSExtValue();
+ fixPhis(Begin->BB, OrigBlock, Predecessor, NumMergedCases);
return Begin->BB;
}
return newLeafBlock(*Begin, Val, OrigBlock, Default);
@@ -186,32 +220,32 @@ BasicBlock *LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,
CaseRange &Pivot = *(Begin + Mid);
DEBUG(dbgs() << "Pivot ==> "
- << cast<ConstantInt>(Pivot.Low)->getValue()
- << " -" << cast<ConstantInt>(Pivot.High)->getValue() << "\n");
+ << Pivot.Low->getValue()
+ << " -" << Pivot.High->getValue() << "\n");
// NewLowerBound here should never be the integer minimal value.
// This is because it is computed from a case range that is never
// the smallest, so there is always a case range that has at least
// a smaller value.
- ConstantInt *NewLowerBound = cast<ConstantInt>(Pivot.Low);
- ConstantInt *NewUpperBound;
-
- // If we don't have a Default block then it means that we can never
- // have a value outside of a case range, so set the UpperBound to the highest
- // value in the LHS part of the case ranges.
- if (Default != nullptr) {
- // Because NewLowerBound is never the smallest representable integer
- // it is safe here to subtract one.
- NewUpperBound = ConstantInt::get(NewLowerBound->getContext(),
- NewLowerBound->getValue() - 1);
- } else {
- CaseItr LastLHS = LHS.begin() + LHS.size() - 1;
- NewUpperBound = cast<ConstantInt>(LastLHS->High);
+ ConstantInt *NewLowerBound = Pivot.Low;
+
+ // Because NewLowerBound is never the smallest representable integer
+ // it is safe here to subtract one.
+ ConstantInt *NewUpperBound = ConstantInt::get(NewLowerBound->getContext(),
+ NewLowerBound->getValue() - 1);
+
+ if (!UnreachableRanges.empty()) {
+ // Check if the gap between LHS's highest and NewLowerBound is unreachable.
+ int64_t GapLow = LHS.back().High->getSExtValue() + 1;
+ int64_t GapHigh = NewLowerBound->getSExtValue() - 1;
+ IntRange Gap = { GapLow, GapHigh };
+ if (GapHigh >= GapLow && IsInRanges(Gap, UnreachableRanges))
+ NewUpperBound = LHS.back().High;
}
DEBUG(dbgs() << "LHS Bounds ==> ";
if (LowerBound) {
- dbgs() << cast<ConstantInt>(LowerBound)->getSExtValue();
+ dbgs() << LowerBound->getSExtValue();
} else {
dbgs() << "NONE";
}
@@ -219,7 +253,7 @@ BasicBlock *LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,
dbgs() << "RHS Bounds ==> ";
dbgs() << NewLowerBound->getSExtValue() << " - ";
if (UpperBound) {
- dbgs() << cast<ConstantInt>(UpperBound)->getSExtValue() << "\n";
+ dbgs() << UpperBound->getSExtValue() << "\n";
} else {
dbgs() << "NONE\n";
});
@@ -234,10 +268,10 @@ BasicBlock *LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,
BasicBlock *LBranch = switchConvert(LHS.begin(), LHS.end(), LowerBound,
NewUpperBound, Val, NewNode, OrigBlock,
- Default);
+ Default, UnreachableRanges);
BasicBlock *RBranch = switchConvert(RHS.begin(), RHS.end(), NewLowerBound,
UpperBound, Val, NewNode, OrigBlock,
- Default);
+ Default, UnreachableRanges);
Function::iterator FI = OrigBlock;
F->getBasicBlockList().insert(++FI, NewNode);
@@ -270,11 +304,11 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
Leaf.Low, "SwitchLeaf");
} else {
// Make range comparison
- if (cast<ConstantInt>(Leaf.Low)->isMinValue(true /*isSigned*/)) {
+ if (Leaf.Low->isMinValue(true /*isSigned*/)) {
// Val >= Min && Val <= Hi --> Val <= Hi
Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SLE, Val, Leaf.High,
"SwitchLeaf");
- } else if (cast<ConstantInt>(Leaf.Low)->isZero()) {
+ } else if (Leaf.Low->isZero()) {
// Val >= 0 && Val <= Hi --> Val <=u Hi
Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High,
"SwitchLeaf");
@@ -299,8 +333,8 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
PHINode* PN = cast<PHINode>(I);
// Remove all but one incoming entries from the cluster
- uint64_t Range = cast<ConstantInt>(Leaf.High)->getSExtValue() -
- cast<ConstantInt>(Leaf.Low)->getSExtValue();
+ uint64_t Range = Leaf.High->getSExtValue() -
+ Leaf.Low->getSExtValue();
for (uint64_t j = 0; j < Range; ++j) {
PN->removeIncomingValue(OrigBlock);
}
@@ -328,8 +362,8 @@ unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
if (Cases.size()>=2)
for (CaseItr I = Cases.begin(), J = std::next(Cases.begin());
J != Cases.end();) {
- int64_t nextValue = cast<ConstantInt>(J->Low)->getSExtValue();
- int64_t currentValue = cast<ConstantInt>(I->High)->getSExtValue();
+ int64_t nextValue = J->Low->getSExtValue();
+ int64_t currentValue = I->High->getSExtValue();
BasicBlock* nextBB = J->BB;
BasicBlock* currentBB = I->BB;
@@ -362,26 +396,102 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) {
Value *Val = SI->getCondition(); // The value we are switching on...
BasicBlock* Default = SI->getDefaultDest();
- // If there is only the default destination, don't bother with the code below.
+ // If there is only the default destination, just branch.
if (!SI->getNumCases()) {
- BranchInst::Create(SI->getDefaultDest(), CurBlock);
- CurBlock->getInstList().erase(SI);
+ BranchInst::Create(Default, CurBlock);
+ SI->eraseFromParent();
return;
}
- const bool DefaultIsUnreachable =
- Default->size() == 1 && isa<UnreachableInst>(Default->getTerminator());
+ // Prepare cases vector.
+ CaseVector Cases;
+ unsigned numCmps = Clusterify(Cases, SI);
+ DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
+ << ". Total compares: " << numCmps << "\n");
+ DEBUG(dbgs() << "Cases: " << Cases << "\n");
+ (void)numCmps;
+
+ ConstantInt *LowerBound = nullptr;
+ ConstantInt *UpperBound = nullptr;
+ std::vector<IntRange> UnreachableRanges;
+
+ if (isa<UnreachableInst>(Default->getFirstNonPHIOrDbg())) {
+ // Make the bounds tightly fitted around the case value range, becase we
+ // know that the value passed to the switch must be exactly one of the case
+ // values.
+ assert(!Cases.empty());
+ LowerBound = Cases.front().Low;
+ UpperBound = Cases.back().High;
+
+ DenseMap<BasicBlock *, unsigned> Popularity;
+ unsigned MaxPop = 0;
+ BasicBlock *PopSucc = nullptr;
+
+ IntRange R = { INT64_MIN, INT64_MAX };
+ UnreachableRanges.push_back(R);
+ for (const auto &I : Cases) {
+ int64_t Low = I.Low->getSExtValue();
+ int64_t High = I.High->getSExtValue();
+
+ IntRange &LastRange = UnreachableRanges.back();
+ if (LastRange.Low == Low) {
+ // There is nothing left of the previous range.
+ UnreachableRanges.pop_back();
+ } else {
+ // Terminate the previous range.
+ assert(Low > LastRange.Low);
+ LastRange.High = Low - 1;
+ }
+ if (High != INT64_MAX) {
+ IntRange R = { High + 1, INT64_MAX };
+ UnreachableRanges.push_back(R);
+ }
+
+ // Count popularity.
+ int64_t N = High - Low + 1;
+ unsigned &Pop = Popularity[I.BB];
+ if ((Pop += N) > MaxPop) {
+ MaxPop = Pop;
+ PopSucc = I.BB;
+ }
+ }
+#ifndef NDEBUG
+ /* UnreachableRanges should be sorted and the ranges non-adjacent. */
+ for (auto I = UnreachableRanges.begin(), E = UnreachableRanges.end();
+ I != E; ++I) {
+ assert(I->Low <= I->High);
+ auto Next = I + 1;
+ if (Next != E) {
+ assert(Next->Low > I->High);
+ }
+ }
+#endif
+
+ // Use the most popular block as the new default, reducing the number of
+ // cases.
+ assert(MaxPop > 0 && PopSucc);
+ Default = PopSucc;
+ for (CaseItr I = Cases.begin(); I != Cases.end();) {
+ if (I->BB == PopSucc)
+ I = Cases.erase(I);
+ else
+ ++I;
+ }
+
+ // If there are no cases left, just branch.
+ if (Cases.empty()) {
+ BranchInst::Create(Default, CurBlock);
+ SI->eraseFromParent();
+ return;
+ }
+ }
+
// Create a new, empty default block so that the new hierarchy of
// if-then statements go to this and the PHI nodes are happy.
- // if the default block is set as an unreachable we avoid creating one
- // because will never be a valid target.
- BasicBlock *NewDefault = nullptr;
- if (!DefaultIsUnreachable) {
- NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault");
- F->getBasicBlockList().insert(Default, NewDefault);
-
- BranchInst::Create(Default, NewDefault);
- }
+ BasicBlock *NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault");
+ F->getBasicBlockList().insert(Default, NewDefault);
+ BranchInst::Create(Default, NewDefault);
+
// If there is an entry in any PHI nodes for the default edge, make sure
// to update them as well.
for (BasicBlock::iterator I = Default->begin(); isa<PHINode>(I); ++I) {
@@ -391,40 +501,18 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) {
PN->setIncomingBlock((unsigned)BlockIdx, NewDefault);
}
- // Prepare cases vector.
- CaseVector Cases;
- unsigned numCmps = Clusterify(Cases, SI);
-
- DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
- << ". Total compares: " << numCmps << "\n");
- DEBUG(dbgs() << "Cases: " << Cases << "\n");
- (void)numCmps;
-
- ConstantInt *UpperBound = nullptr;
- ConstantInt *LowerBound = nullptr;
-
- // Optimize the condition where Default is an unreachable block. In this case
- // we can make the bounds tightly fitted around the case value ranges,
- // because we know that the value passed to the switch should always be
- // exactly one of the case values.
- if (DefaultIsUnreachable) {
- CaseItr LastCase = Cases.begin() + Cases.size() - 1;
- UpperBound = cast<ConstantInt>(LastCase->High);
- LowerBound = cast<ConstantInt>(Cases.begin()->Low);
- }
BasicBlock *SwitchBlock =
switchConvert(Cases.begin(), Cases.end(), LowerBound, UpperBound, Val,
- OrigBlock, OrigBlock, NewDefault);
+ OrigBlock, OrigBlock, NewDefault, UnreachableRanges);
// Branch to our shiny new if-then stuff...
BranchInst::Create(SwitchBlock, OrigBlock);
// We are now done with the switch instruction, delete it.
+ BasicBlock *OldDefault = SI->getDefaultDest();
CurBlock->getInstList().erase(SI);
- pred_iterator PI = pred_begin(Default), E = pred_end(Default);
- // If the Default block has no more predecessors just remove it
- if (PI == E) {
- DeleteDeadBlock(Default);
- }
+ // If the Default block has no more predecessors just remove it.
+ if (pred_begin(OldDefault) == pred_end(OldDefault))
+ DeleteDeadBlock(OldDefault);
}
diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp
index 477ee7a..00cf4e6 100644
--- a/lib/Transforms/Utils/Mem2Reg.cpp
+++ b/lib/Transforms/Utils/Mem2Reg.cpp
@@ -14,7 +14,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AssumptionTracker.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
@@ -39,7 +39,7 @@ namespace {
bool runOnFunction(Function &F) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionTracker>();
+ AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.setPreservesCFG();
// This is a cluster of orthogonal Transforms
@@ -53,7 +53,7 @@ namespace {
char PromotePass::ID = 0;
INITIALIZE_PASS_BEGIN(PromotePass, "mem2reg", "Promote Memory to Register",
false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(PromotePass, "mem2reg", "Promote Memory to Register",
false, false)
@@ -66,7 +66,8 @@ bool PromotePass::runOnFunction(Function &F) {
bool Changed = false;
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- AssumptionTracker *AT = &getAnalysis<AssumptionTracker>();
+ AssumptionCache &AC =
+ getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
while (1) {
Allocas.clear();
@@ -80,7 +81,7 @@ bool PromotePass::runOnFunction(Function &F) {
if (Allocas.empty()) break;
- PromoteMemToReg(Allocas, DT, nullptr, AT);
+ PromoteMemToReg(Allocas, DT, nullptr, &AC);
NumPromoted += Allocas.size();
Changed = true;
}
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 1fd7071..dabadb7 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -239,7 +239,7 @@ struct PromoteMem2Reg {
AliasSetTracker *AST;
/// A cache of @llvm.assume intrinsics used by SimplifyInstruction.
- AssumptionTracker *AT;
+ AssumptionCache *AC;
/// Reverse mapping of Allocas.
DenseMap<AllocaInst *, unsigned> AllocaLookup;
@@ -282,9 +282,10 @@ struct PromoteMem2Reg {
public:
PromoteMem2Reg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
- AliasSetTracker *AST, AssumptionTracker *AT)
+ AliasSetTracker *AST, AssumptionCache *AC)
: Allocas(Allocas.begin(), Allocas.end()), DT(DT),
- DIB(*DT.getRoot()->getParent()->getParent()), AST(AST), AT(AT) {}
+ DIB(*DT.getRoot()->getParent()->getParent(), /*AllowUnresolved*/ false),
+ AST(AST), AC(AC) {}
void run();
@@ -415,7 +416,8 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
// Record debuginfo for the store and remove the declaration's
// debuginfo.
if (DbgDeclareInst *DDI = Info.DbgDeclare) {
- DIBuilder DIB(*AI->getParent()->getParent()->getParent());
+ DIBuilder DIB(*AI->getParent()->getParent()->getParent(),
+ /*AllowUnresolved*/ false);
ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, DIB);
DDI->eraseFromParent();
LBI.deleteValue(DDI);
@@ -498,7 +500,8 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
StoreInst *SI = cast<StoreInst>(AI->user_back());
// Record debuginfo for the store before removing it.
if (DbgDeclareInst *DDI = Info.DbgDeclare) {
- DIBuilder DIB(*AI->getParent()->getParent()->getParent());
+ DIBuilder DIB(*AI->getParent()->getParent()->getParent(),
+ /*AllowUnresolved*/ false);
ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
}
SI->eraseFromParent();
@@ -688,7 +691,7 @@ void PromoteMem2Reg::run() {
PHINode *PN = I->second;
// If this PHI node merges one value and/or undefs, get the value.
- if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, &DT, AT)) {
+ if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, &DT, AC)) {
if (AST && PN->getType()->isPointerTy())
AST->deleteValue(PN);
PN->replaceAllUsesWith(V);
@@ -1068,10 +1071,10 @@ NextIteration:
}
void llvm::PromoteMemToReg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
- AliasSetTracker *AST, AssumptionTracker *AT) {
+ AliasSetTracker *AST, AssumptionCache *AC) {
// If there is nothing to do, bail out...
if (Allocas.empty())
return;
- PromoteMem2Reg(Allocas, DT, AST, AT).run();
+ PromoteMem2Reg(Allocas, DT, AST, AC).run();
}
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index 3fcb789..c057b06 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -150,8 +150,8 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
ProtoName, &BB->front());
// Fill in all the predecessors of the PHI.
- for (unsigned i = 0, e = PredValues.size(); i != e; ++i)
- InsertedPHI->addIncoming(PredValues[i].second, PredValues[i].first);
+ for (const auto &PredValue : PredValues)
+ InsertedPHI->addIncoming(PredValue.second, PredValue.first);
// See if the PHI node can be merged to a single value. This can happen in
// loop cases when we get a PHI of itself and one other value.
@@ -245,8 +245,7 @@ public:
// but it is relatively slow. If we already have PHI nodes in this
// block, walk one of them to get the predecessor list instead.
if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) {
- for (unsigned PI = 0, E = SomePhi->getNumIncomingValues(); PI != E; ++PI)
- Preds->push_back(SomePhi->getIncomingBlock(PI));
+ Preds->append(SomePhi->block_begin(), SomePhi->block_end());
} else {
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
Preds->push_back(*PI);
@@ -344,20 +343,17 @@ run(const SmallVectorImpl<Instruction*> &Insts) const {
// This is important because we have to handle multiple defs/uses in a block
// ourselves: SSAUpdater is purely for cross-block references.
DenseMap<BasicBlock*, TinyPtrVector<Instruction*> > UsesByBlock;
-
- for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
- Instruction *User = Insts[i];
+
+ for (Instruction *User : Insts)
UsesByBlock[User->getParent()].push_back(User);
- }
// Okay, now we can iterate over all the blocks in the function with uses,
// processing them. Keep track of which loads are loading a live-in value.
// Walk the uses in the use-list order to be determinstic.
SmallVector<LoadInst*, 32> LiveInLoads;
DenseMap<Value*, Value*> ReplacedLoads;
-
- for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
- Instruction *User = Insts[i];
+
+ for (Instruction *User : Insts) {
BasicBlock *BB = User->getParent();
TinyPtrVector<Instruction*> &BlockUses = UsesByBlock[BB];
@@ -380,8 +376,8 @@ run(const SmallVectorImpl<Instruction*> &Insts) const {
// Otherwise, check to see if this block is all loads.
bool HasStore = false;
- for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) {
- if (isa<StoreInst>(BlockUses[i])) {
+ for (Instruction *I : BlockUses) {
+ if (isa<StoreInst>(I)) {
HasStore = true;
break;
}
@@ -391,8 +387,8 @@ run(const SmallVectorImpl<Instruction*> &Insts) const {
// efficient way to tell which on is first in the block and don't want to
// scan large blocks, so just add all loads as live ins.
if (!HasStore) {
- for (unsigned i = 0, e = BlockUses.size(); i != e; ++i)
- LiveInLoads.push_back(cast<LoadInst>(BlockUses[i]));
+ for (Instruction *I : BlockUses)
+ LiveInLoads.push_back(cast<LoadInst>(I));
BlockUses.clear();
continue;
}
@@ -403,8 +399,8 @@ run(const SmallVectorImpl<Instruction*> &Insts) const {
// block is a load, then it uses the live in value. The last store defines
// the live out value. We handle this by doing a linear scan of the block.
Value *StoredValue = nullptr;
- for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
- if (LoadInst *L = dyn_cast<LoadInst>(II)) {
+ for (Instruction &I : *BB) {
+ if (LoadInst *L = dyn_cast<LoadInst>(&I)) {
// If this is a load from an unrelated pointer, ignore it.
if (!isInstInList(L, Insts)) continue;
@@ -419,8 +415,8 @@ run(const SmallVectorImpl<Instruction*> &Insts) const {
}
continue;
}
-
- if (StoreInst *SI = dyn_cast<StoreInst>(II)) {
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
// If this is a store to an unrelated pointer, ignore it.
if (!isInstInList(SI, Insts)) continue;
updateDebugInfo(SI);
@@ -438,8 +434,7 @@ run(const SmallVectorImpl<Instruction*> &Insts) const {
// Okay, now we rewrite all loads that use live-in values in the loop,
// inserting PHI nodes as necessary.
- for (unsigned i = 0, e = LiveInLoads.size(); i != e; ++i) {
- LoadInst *ALoad = LiveInLoads[i];
+ for (LoadInst *ALoad : LiveInLoads) {
Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent());
replaceLoadWithValue(ALoad, NewVal);
@@ -454,9 +449,7 @@ run(const SmallVectorImpl<Instruction*> &Insts) const {
// Now that everything is rewritten, delete the old instructions from the
// function. They should all be dead now.
- for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
- Instruction *User = Insts[i];
-
+ for (Instruction *User : Insts) {
// If this is a load that still has uses, then the load must have been added
// as a live value in the SSAUpdate data structure for a block (e.g. because
// the loaded value was stored later). In this case, we need to recursively
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 92fd56a..3248a83 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -53,9 +53,13 @@ using namespace PatternMatch;
#define DEBUG_TYPE "simplifycfg"
+// Chosen as 2 so as to be cheap, but still to have enough power to fold
+// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
+// To catch this, we need to fold a compare and a select, hence '2' being the
+// minimum reasonable default.
static cl::opt<unsigned>
-PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(1),
- cl::desc("Control the amount of phi node folding to perform (default = 1)"));
+PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(2),
+ cl::desc("Control the amount of phi node folding to perform (default = 2)"));
static cl::opt<bool>
DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false),
@@ -73,6 +77,7 @@ STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
STATISTIC(NumLinearMaps, "Number of switch instructions turned into linear mapping");
STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables");
STATISTIC(NumLookupTablesHoles, "Number of switch instructions turned into lookup tables (holes checked)");
+STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block");
STATISTIC(NumSpeculations, "Number of speculative executed instructions");
@@ -107,7 +112,7 @@ class SimplifyCFGOpt {
const TargetTransformInfo &TTI;
unsigned BonusInstThreshold;
const DataLayout *const DL;
- AssumptionTracker *AT;
+ AssumptionCache *AC;
Value *isValueEqualityComparison(TerminatorInst *TI);
BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI,
std::vector<ValueEqualityComparisonCase> &Cases);
@@ -127,8 +132,8 @@ class SimplifyCFGOpt {
public:
SimplifyCFGOpt(const TargetTransformInfo &TTI, unsigned BonusInstThreshold,
- const DataLayout *DL, AssumptionTracker *AT)
- : TTI(TTI), BonusInstThreshold(BonusInstThreshold), DL(DL), AT(AT) {}
+ const DataLayout *DL, AssumptionCache *AC)
+ : TTI(TTI), BonusInstThreshold(BonusInstThreshold), DL(DL), AC(AC) {}
bool run(BasicBlock *BB);
};
}
@@ -215,45 +220,15 @@ static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
}
/// ComputeSpeculationCost - Compute an abstract "cost" of speculating the
-/// given instruction, which is assumed to be safe to speculate. 1 means
-/// cheap, 2 means less cheap, and UINT_MAX means prohibitively expensive.
-static unsigned ComputeSpeculationCost(const User *I, const DataLayout *DL) {
+/// given instruction, which is assumed to be safe to speculate. TCC_Free means
+/// cheap, TCC_Basic means less cheap, and TCC_Expensive means prohibitively
+/// expensive.
+static unsigned ComputeSpeculationCost(const User *I, const DataLayout *DL,
+ const TargetTransformInfo &TTI) {
assert(isSafeToSpeculativelyExecute(I, DL) &&
"Instruction is not safe to speculatively execute!");
- switch (Operator::getOpcode(I)) {
- default:
- // In doubt, be conservative.
- return UINT_MAX;
- case Instruction::GetElementPtr:
- // GEPs are cheap if all indices are constant.
- if (!cast<GEPOperator>(I)->hasAllConstantIndices())
- return UINT_MAX;
- return 1;
- case Instruction::ExtractValue:
- case Instruction::Load:
- case Instruction::Add:
- case Instruction::Sub:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor:
- case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr:
- case Instruction::ICmp:
- case Instruction::Trunc:
- case Instruction::ZExt:
- case Instruction::SExt:
- case Instruction::BitCast:
- case Instruction::ExtractElement:
- case Instruction::InsertElement:
- return 1; // These are all cheap.
-
- case Instruction::Call:
- case Instruction::Select:
- return 2;
- }
+ return TTI.getUserCost(I);
}
-
/// DominatesMergePoint - If we have a merge point of an "if condition" as
/// accepted above, return true if the specified value dominates the block. We
/// don't handle the true generality of domination here, just a special case
@@ -274,7 +249,8 @@ static unsigned ComputeSpeculationCost(const User *I, const DataLayout *DL) {
static bool DominatesMergePoint(Value *V, BasicBlock *BB,
SmallPtrSetImpl<Instruction*> *AggressiveInsts,
unsigned &CostRemaining,
- const DataLayout *DL) {
+ const DataLayout *DL,
+ const TargetTransformInfo &TTI) {
Instruction *I = dyn_cast<Instruction>(V);
if (!I) {
// Non-instructions all dominate instructions, but not all constantexprs
@@ -310,7 +286,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
if (!isSafeToSpeculativelyExecute(I, DL))
return false;
- unsigned Cost = ComputeSpeculationCost(I, DL);
+ unsigned Cost = ComputeSpeculationCost(I, DL, TTI);
if (Cost > CostRemaining)
return false;
@@ -320,7 +296,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
// Okay, we can only really hoist these out if their operands do
// not take us over the cost threshold.
for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
- if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, DL))
+ if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, DL, TTI))
return false;
// Okay, it's safe to do this! Remember this instruction.
AggressiveInsts->insert(I);
@@ -383,10 +359,9 @@ struct ConstantComparesGatherer {
}
/// Prevent copy
- ConstantComparesGatherer(const ConstantComparesGatherer &)
- LLVM_DELETED_FUNCTION;
+ ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
ConstantComparesGatherer &
- operator=(const ConstantComparesGatherer &) LLVM_DELETED_FUNCTION;
+ operator=(const ConstantComparesGatherer &) = delete;
private:
@@ -712,8 +687,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
if (HasWeight)
for (unsigned MD_i = 1, MD_e = MD->getNumOperands(); MD_i < MD_e;
++MD_i) {
- ConstantInt* CI = dyn_cast<ConstantInt>(MD->getOperand(MD_i));
- assert(CI);
+ ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(MD_i));
Weights.push_back(CI->getValue().getZExtValue());
}
for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
@@ -818,7 +792,7 @@ static void GetBranchWeights(TerminatorInst *TI,
MDNode *MD = TI->getMetadata(LLVMContext::MD_prof);
assert(MD);
for (unsigned i = 1, e = MD->getNumOperands(); i < e; ++i) {
- ConstantInt *CI = cast<ConstantInt>(MD->getOperand(i));
+ ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(i));
Weights.push_back(CI->getValue().getZExtValue());
}
@@ -1079,7 +1053,8 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I);
/// HoistThenElseCodeToIf - Given a conditional branch that goes to BB1 and
/// BB2, hoist any common code in the two blocks up into the branch block. The
/// caller of this function guarantees that BI's block dominates BB1 and BB2.
-static bool HoistThenElseCodeToIf(BranchInst *BI, const DataLayout *DL) {
+static bool HoistThenElseCodeToIf(BranchInst *BI, const DataLayout *DL,
+ const TargetTransformInfo &TTI) {
// This does very trivial matching, with limited scanning, to find identical
// instructions in the two blocks. In particular, we don't want to get into
// O(M*N) situations here where M and N are the sizes of BB1 and BB2. As
@@ -1114,6 +1089,9 @@ static bool HoistThenElseCodeToIf(BranchInst *BI, const DataLayout *DL) {
if (isa<TerminatorInst>(I1))
goto HoistTerminator;
+ if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2))
+ return Changed;
+
// For a normal instruction, we just move one to right before the branch,
// then replace all uses of the other with the first. Finally, we remove
// the now redundant second instruction.
@@ -1244,14 +1222,13 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
return false;
// Gather the PHI nodes in BBEnd.
- std::map<Value*, std::pair<Value*, PHINode*> > MapValueFromBB1ToBB2;
+ SmallDenseMap<std::pair<Value *, Value *>, PHINode *> JointValueMap;
Instruction *FirstNonPhiInBBEnd = nullptr;
- for (BasicBlock::iterator I = BBEnd->begin(), E = BBEnd->end();
- I != E; ++I) {
+ for (BasicBlock::iterator I = BBEnd->begin(), E = BBEnd->end(); I != E; ++I) {
if (PHINode *PN = dyn_cast<PHINode>(I)) {
Value *BB1V = PN->getIncomingValueForBlock(BB1);
Value *BB2V = PN->getIncomingValueForBlock(BB2);
- MapValueFromBB1ToBB2[BB1V] = std::make_pair(BB2V, PN);
+ JointValueMap[std::make_pair(BB1V, BB2V)] = PN;
} else {
FirstNonPhiInBBEnd = &*I;
break;
@@ -1260,13 +1237,13 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
if (!FirstNonPhiInBBEnd)
return false;
-
// This does very trivial matching, with limited scanning, to find identical
// instructions in the two blocks. We scan backward for obviously identical
// instructions in an identical order.
BasicBlock::InstListType::reverse_iterator RI1 = BB1->getInstList().rbegin(),
- RE1 = BB1->getInstList().rend(), RI2 = BB2->getInstList().rbegin(),
- RE2 = BB2->getInstList().rend();
+ RE1 = BB1->getInstList().rend(),
+ RI2 = BB2->getInstList().rbegin(),
+ RE2 = BB2->getInstList().rend();
// Skip debug info.
while (RI1 != RE1 && isa<DbgInfoIntrinsic>(&*RI1)) ++RI1;
if (RI1 == RE1)
@@ -1289,6 +1266,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
return Changed;
Instruction *I1 = &*RI1, *I2 = &*RI2;
+ auto InstPair = std::make_pair(I1, I2);
// I1 and I2 should have a single use in the same PHI node, and they
// perform the same operation.
// Cannot move control-flow-involving, volatile loads, vaarg, etc.
@@ -1299,11 +1277,11 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
I1->mayHaveSideEffects() || I2->mayHaveSideEffects() ||
I1->mayReadOrWriteMemory() || I2->mayReadOrWriteMemory() ||
!I1->hasOneUse() || !I2->hasOneUse() ||
- MapValueFromBB1ToBB2.find(I1) == MapValueFromBB1ToBB2.end() ||
- MapValueFromBB1ToBB2[I1].first != I2)
+ !JointValueMap.count(InstPair))
return Changed;
// Check whether we should swap the operands of ICmpInst.
+ // TODO: Add support of communativity.
ICmpInst *ICmp1 = dyn_cast<ICmpInst>(I1), *ICmp2 = dyn_cast<ICmpInst>(I2);
bool SwapOpnds = false;
if (ICmp1 && ICmp2 &&
@@ -1324,16 +1302,13 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
// with a PHI node after sinking. We only handle the case where there is
// a single pair of different operands.
Value *DifferentOp1 = nullptr, *DifferentOp2 = nullptr;
- unsigned Op1Idx = 0;
+ unsigned Op1Idx = ~0U;
for (unsigned I = 0, E = I1->getNumOperands(); I != E; ++I) {
if (I1->getOperand(I) == I2->getOperand(I))
continue;
- // Early exit if we have more-than one pair of different operands or
- // the different operand is already in MapValueFromBB1ToBB2.
- // Early exit if we need a PHI node to replace a constant.
- if (DifferentOp1 ||
- MapValueFromBB1ToBB2.find(I1->getOperand(I)) !=
- MapValueFromBB1ToBB2.end() ||
+ // Early exit if we have more-than one pair of different operands or if
+ // we need a PHI node to replace a constant.
+ if (Op1Idx != ~0U ||
isa<Constant>(I1->getOperand(I)) ||
isa<Constant>(I2->getOperand(I))) {
// If we can't sink the instructions, undo the swapping.
@@ -1346,24 +1321,27 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
DifferentOp2 = I2->getOperand(I);
}
- // We insert the pair of different operands to MapValueFromBB1ToBB2 and
- // remove (I1, I2) from MapValueFromBB1ToBB2.
- if (DifferentOp1) {
- PHINode *NewPN = PHINode::Create(DifferentOp1->getType(), 2,
- DifferentOp1->getName() + ".sink",
- BBEnd->begin());
- MapValueFromBB1ToBB2[DifferentOp1] = std::make_pair(DifferentOp2, NewPN);
+ DEBUG(dbgs() << "SINK common instructions " << *I1 << "\n");
+ DEBUG(dbgs() << " " << *I2 << "\n");
+
+ // We insert the pair of different operands to JointValueMap and
+ // remove (I1, I2) from JointValueMap.
+ if (Op1Idx != ~0U) {
+ auto &NewPN = JointValueMap[std::make_pair(DifferentOp1, DifferentOp2)];
+ if (!NewPN) {
+ NewPN =
+ PHINode::Create(DifferentOp1->getType(), 2,
+ DifferentOp1->getName() + ".sink", BBEnd->begin());
+ NewPN->addIncoming(DifferentOp1, BB1);
+ NewPN->addIncoming(DifferentOp2, BB2);
+ DEBUG(dbgs() << "Create PHI node " << *NewPN << "\n";);
+ }
// I1 should use NewPN instead of DifferentOp1.
I1->setOperand(Op1Idx, NewPN);
- NewPN->addIncoming(DifferentOp1, BB1);
- NewPN->addIncoming(DifferentOp2, BB2);
- DEBUG(dbgs() << "Create PHI node " << *NewPN << "\n";);
}
- PHINode *OldPN = MapValueFromBB1ToBB2[I1].second;
- MapValueFromBB1ToBB2.erase(I1);
+ PHINode *OldPN = JointValueMap[InstPair];
+ JointValueMap.erase(InstPair);
- DEBUG(dbgs() << "SINK common instructions " << *I1 << "\n";);
- DEBUG(dbgs() << " " << *I2 << "\n";);
// We need to update RE1 and RE2 if we are going to sink the first
// instruction in the basic block down.
bool UpdateRE1 = (I1 == BB1->begin()), UpdateRE2 = (I2 == BB2->begin());
@@ -1489,7 +1467,8 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
///
/// \returns true if the conditional block is removed.
static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
- const DataLayout *DL) {
+ const DataLayout *DL,
+ const TargetTransformInfo &TTI) {
// Be conservative for now. FP select instruction can often be expensive.
Value *BrCond = BI->getCondition();
if (isa<FCmpInst>(BrCond))
@@ -1538,7 +1517,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
EndBB))))
return false;
if (!SpeculatedStoreValue &&
- ComputeSpeculationCost(I, DL) > PHINodeFoldingThreshold)
+ ComputeSpeculationCost(I, DL, TTI) > PHINodeFoldingThreshold *
+ TargetTransformInfo::TCC_Basic)
return false;
// Store the store speculation candidate.
@@ -1597,9 +1577,11 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE, DL)) ||
(OrigCE && !isSafeToSpeculativelyExecute(OrigCE, DL)))
return false;
- unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, DL) : 0;
- unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, DL) : 0;
- if (OrigCost + ThenCost > 2 * PHINodeFoldingThreshold)
+ unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, DL, TTI) : 0;
+ unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, DL, TTI) : 0;
+ unsigned MaxCost = 2 * PHINodeFoldingThreshold *
+ TargetTransformInfo::TCC_Basic;
+ if (OrigCost + ThenCost > MaxCost)
return false;
// Account for the cost of an unfolded ConstantExpr which could end up
@@ -1804,7 +1786,8 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *DL) {
/// FoldTwoEntryPHINode - Given a BB that starts with the specified two-entry
/// PHI node, see if we can eliminate it.
-static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL) {
+static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL,
+ const TargetTransformInfo &TTI) {
// Ok, this is a two entry PHI node. Check to see if this is a simple "if
// statement", which has a very simple dominance structure. Basically, we
// are trying to find the condition that is being branched on, which
@@ -1835,6 +1818,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL) {
SmallPtrSet<Instruction*, 4> AggressiveInsts;
unsigned MaxCostVal0 = PHINodeFoldingThreshold,
MaxCostVal1 = PHINodeFoldingThreshold;
+ MaxCostVal0 *= TargetTransformInfo::TCC_Basic;
+ MaxCostVal1 *= TargetTransformInfo::TCC_Basic;
for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
PHINode *PN = cast<PHINode>(II++);
@@ -1845,9 +1830,9 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL) {
}
if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts,
- MaxCostVal0, DL) ||
+ MaxCostVal0, DL, TTI) ||
!DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts,
- MaxCostVal1, DL))
+ MaxCostVal1, DL, TTI))
return false;
}
@@ -2036,8 +2021,10 @@ static bool ExtractBranchMetadata(BranchInst *BI,
"Looking for probabilities on unconditional branch?");
MDNode *ProfileData = BI->getMetadata(LLVMContext::MD_prof);
if (!ProfileData || ProfileData->getNumOperands() != 3) return false;
- ConstantInt *CITrue = dyn_cast<ConstantInt>(ProfileData->getOperand(1));
- ConstantInt *CIFalse = dyn_cast<ConstantInt>(ProfileData->getOperand(2));
+ ConstantInt *CITrue =
+ mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(1));
+ ConstantInt *CIFalse =
+ mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(2));
if (!CITrue || !CIFalse) return false;
ProbTrue = CITrue->getValue().getZExtValue();
ProbFalse = CIFalse->getValue().getZExtValue();
@@ -2534,17 +2521,15 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
// The weight to CommonDest should be PredCommon * SuccTotal +
// PredOther * SuccCommon.
// The weight to OtherDest should be PredOther * SuccOther.
- SmallVector<uint64_t, 2> NewWeights;
- NewWeights.push_back(PredCommon * (SuccCommon + SuccOther) +
- PredOther * SuccCommon);
- NewWeights.push_back(PredOther * SuccOther);
+ uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
+ PredOther * SuccCommon,
+ PredOther * SuccOther};
// Halve the weights if any of them cannot fit in an uint32_t
FitWeights(NewWeights);
- SmallVector<uint32_t, 2> MDWeights(NewWeights.begin(),NewWeights.end());
PBI->setMetadata(LLVMContext::MD_prof,
- MDBuilder(BI->getContext()).
- createBranchWeights(MDWeights));
+ MDBuilder(BI->getContext())
+ .createBranchWeights(NewWeights[0], NewWeights[1]));
}
// OtherDest may have phi nodes. If so, add an entry from PBI's
@@ -2718,7 +2703,7 @@ static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) {
/// the PHI, merging the third icmp into the switch.
static bool TryToSimplifyUncondBranchWithICmpInIt(
ICmpInst *ICI, IRBuilder<> &Builder, const TargetTransformInfo &TTI,
- unsigned BonusInstThreshold, const DataLayout *DL, AssumptionTracker *AT) {
+ unsigned BonusInstThreshold, const DataLayout *DL, AssumptionCache *AC) {
BasicBlock *BB = ICI->getParent();
// If the block has any PHIs in it or the icmp has multiple uses, it is too
@@ -2751,7 +2736,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(
ICI->eraseFromParent();
}
// BB is now empty, so it is likely to simplify away.
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
}
// Ok, the block is reachable from the default dest. If the constant we're
@@ -2767,7 +2752,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(
ICI->replaceAllUsesWith(V);
ICI->eraseFromParent();
// BB is now empty, so it is likely to simplify away.
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
}
// The use of the icmp has to be in the 'end' block, by the only PHI node in
@@ -2947,20 +2932,9 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
return false;
// Turn all invokes that unwind here into calls and delete the basic block.
- bool InvokeRequiresTableEntry = false;
- bool Changed = false;
for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) {
InvokeInst *II = cast<InvokeInst>((*PI++)->getTerminator());
-
- if (II->hasFnAttr(Attribute::UWTable)) {
- // Don't remove an `invoke' instruction if the ABI requires an entry into
- // the table.
- InvokeRequiresTableEntry = true;
- continue;
- }
-
SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
-
// Insert a call instruction before the invoke.
CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II);
Call->takeName(II);
@@ -2980,14 +2954,11 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
// Finally, delete the invoke instruction!
II->eraseFromParent();
- Changed = true;
}
- if (!InvokeRequiresTableEntry)
- // The landingpad is now unreachable. Zap it.
- BB->eraseFromParent();
-
- return Changed;
+ // The landingpad is now unreachable. Zap it.
+ BB->eraseFromParent();
+ return true;
}
bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
@@ -3018,7 +2989,7 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
}
// If we eliminated all predecessors of the block, delete the block now.
- if (pred_begin(BB) == pred_end(BB))
+ if (pred_empty(BB))
// We know there are no successors, so just nuke the block.
BB->eraseFromParent();
@@ -3119,55 +3090,6 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
--i; --e;
Changed = true;
}
- // If the default value is unreachable, figure out the most popular
- // destination and make it the default.
- if (SI->getDefaultDest() == BB) {
- std::map<BasicBlock*, std::pair<unsigned, unsigned> > Popularity;
- for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
- i != e; ++i) {
- std::pair<unsigned, unsigned> &entry =
- Popularity[i.getCaseSuccessor()];
- if (entry.first == 0) {
- entry.first = 1;
- entry.second = i.getCaseIndex();
- } else {
- entry.first++;
- }
- }
-
- // Find the most popular block.
- unsigned MaxPop = 0;
- unsigned MaxIndex = 0;
- BasicBlock *MaxBlock = nullptr;
- for (std::map<BasicBlock*, std::pair<unsigned, unsigned> >::iterator
- I = Popularity.begin(), E = Popularity.end(); I != E; ++I) {
- if (I->second.first > MaxPop ||
- (I->second.first == MaxPop && MaxIndex > I->second.second)) {
- MaxPop = I->second.first;
- MaxIndex = I->second.second;
- MaxBlock = I->first;
- }
- }
- if (MaxBlock) {
- // Make this the new default, allowing us to delete any explicit
- // edges to it.
- SI->setDefaultDest(MaxBlock);
- Changed = true;
-
- // If MaxBlock has phinodes in it, remove MaxPop-1 entries from
- // it.
- if (isa<PHINode>(MaxBlock->begin()))
- for (unsigned i = 0; i != MaxPop-1; ++i)
- MaxBlock->removePredecessor(SI->getParent());
-
- for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
- i != e; ++i)
- if (i.getCaseSuccessor() == MaxBlock) {
- SI->removeCase(i);
- --i; --e;
- }
- }
- }
} else if (InvokeInst *II = dyn_cast<InvokeInst>(TI)) {
if (II->getUnwindDest() == BB) {
// Convert the invoke to a call instruction. This would be a good
@@ -3191,7 +3113,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
}
// If this block is now dead, remove it.
- if (pred_begin(BB) == pred_end(BB) &&
+ if (pred_empty(BB) &&
BB != &BB->getParent()->getEntryBlock()) {
// We know there are no successors, so just nuke the block.
BB->eraseFromParent();
@@ -3201,70 +3123,122 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
return Changed;
}
-/// TurnSwitchRangeIntoICmp - Turns a switch with that contains only a
-/// integer range comparison into a sub, an icmp and a branch.
-static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
- assert(SI->getNumCases() > 1 && "Degenerate switch?");
+static bool CasesAreContiguous(SmallVectorImpl<ConstantInt *> &Cases) {
+ assert(Cases.size() >= 1);
- // Make sure all cases point to the same destination and gather the values.
- SmallVector<ConstantInt *, 16> Cases;
- SwitchInst::CaseIt I = SI->case_begin();
- Cases.push_back(I.getCaseValue());
- SwitchInst::CaseIt PrevI = I++;
- for (SwitchInst::CaseIt E = SI->case_end(); I != E; PrevI = I++) {
- if (PrevI.getCaseSuccessor() != I.getCaseSuccessor())
+ array_pod_sort(Cases.begin(), Cases.end(), ConstantIntSortPredicate);
+ for (size_t I = 1, E = Cases.size(); I != E; ++I) {
+ if (Cases[I - 1]->getValue() != Cases[I]->getValue() + 1)
return false;
- Cases.push_back(I.getCaseValue());
}
- assert(Cases.size() == SI->getNumCases() && "Not all cases gathered");
+ return true;
+}
- // Sort the case values, then check if they form a range we can transform.
- array_pod_sort(Cases.begin(), Cases.end(), ConstantIntSortPredicate);
- for (unsigned I = 1, E = Cases.size(); I != E; ++I) {
- if (Cases[I-1]->getValue() != Cases[I]->getValue()+1)
- return false;
+/// Turn a switch with two reachable destinations into an integer range
+/// comparison and branch.
+static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
+ assert(SI->getNumCases() > 1 && "Degenerate switch?");
+
+ bool HasDefault =
+ !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
+
+ // Partition the cases into two sets with different destinations.
+ BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
+ BasicBlock *DestB = nullptr;
+ SmallVector <ConstantInt *, 16> CasesA;
+ SmallVector <ConstantInt *, 16> CasesB;
+
+ for (SwitchInst::CaseIt I : SI->cases()) {
+ BasicBlock *Dest = I.getCaseSuccessor();
+ if (!DestA) DestA = Dest;
+ if (Dest == DestA) {
+ CasesA.push_back(I.getCaseValue());
+ continue;
+ }
+ if (!DestB) DestB = Dest;
+ if (Dest == DestB) {
+ CasesB.push_back(I.getCaseValue());
+ continue;
+ }
+ return false; // More than two destinations.
}
- Constant *Offset = ConstantExpr::getNeg(Cases.back());
- Constant *NumCases = ConstantInt::get(Offset->getType(), SI->getNumCases());
+ assert(DestA && DestB && "Single-destination switch should have been folded.");
+ assert(DestA != DestB);
+ assert(DestB != SI->getDefaultDest());
+ assert(!CasesB.empty() && "There must be non-default cases.");
+ assert(!CasesA.empty() || HasDefault);
+
+ // Figure out if one of the sets of cases form a contiguous range.
+ SmallVectorImpl<ConstantInt *> *ContiguousCases = nullptr;
+ BasicBlock *ContiguousDest = nullptr;
+ BasicBlock *OtherDest = nullptr;
+ if (!CasesA.empty() && CasesAreContiguous(CasesA)) {
+ ContiguousCases = &CasesA;
+ ContiguousDest = DestA;
+ OtherDest = DestB;
+ } else if (CasesAreContiguous(CasesB)) {
+ ContiguousCases = &CasesB;
+ ContiguousDest = DestB;
+ OtherDest = DestA;
+ } else
+ return false;
+
+ // Start building the compare and branch.
+
+ Constant *Offset = ConstantExpr::getNeg(ContiguousCases->back());
+ Constant *NumCases = ConstantInt::get(Offset->getType(), ContiguousCases->size());
Value *Sub = SI->getCondition();
if (!Offset->isNullValue())
- Sub = Builder.CreateAdd(Sub, Offset, Sub->getName()+".off");
+ Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off");
+
Value *Cmp;
// If NumCases overflowed, then all possible values jump to the successor.
- if (NumCases->isNullValue() && SI->getNumCases() != 0)
+ if (NumCases->isNullValue() && !ContiguousCases->empty())
Cmp = ConstantInt::getTrue(SI->getContext());
else
Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
- BranchInst *NewBI = Builder.CreateCondBr(
- Cmp, SI->case_begin().getCaseSuccessor(), SI->getDefaultDest());
+ BranchInst *NewBI = Builder.CreateCondBr(Cmp, ContiguousDest, OtherDest);
// Update weight for the newly-created conditional branch.
- SmallVector<uint64_t, 8> Weights;
- bool HasWeights = HasBranchWeights(SI);
- if (HasWeights) {
+ if (HasBranchWeights(SI)) {
+ SmallVector<uint64_t, 8> Weights;
GetBranchWeights(SI, Weights);
if (Weights.size() == 1 + SI->getNumCases()) {
- // Combine all weights for the cases to be the true weight of NewBI.
- // We assume that the sum of all weights for a Terminator can fit into 32
- // bits.
- uint32_t NewTrueWeight = 0;
- for (unsigned I = 1, E = Weights.size(); I != E; ++I)
- NewTrueWeight += (uint32_t)Weights[I];
+ uint64_t TrueWeight = 0;
+ uint64_t FalseWeight = 0;
+ for (size_t I = 0, E = Weights.size(); I != E; ++I) {
+ if (SI->getSuccessor(I) == ContiguousDest)
+ TrueWeight += Weights[I];
+ else
+ FalseWeight += Weights[I];
+ }
+ while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
+ TrueWeight /= 2;
+ FalseWeight /= 2;
+ }
NewBI->setMetadata(LLVMContext::MD_prof,
- MDBuilder(SI->getContext()).
- createBranchWeights(NewTrueWeight,
- (uint32_t)Weights[0]));
+ MDBuilder(SI->getContext()).createBranchWeights(
+ (uint32_t)TrueWeight, (uint32_t)FalseWeight));
}
}
- // Prune obsolete incoming values off the successor's PHI nodes.
- for (BasicBlock::iterator BBI = SI->case_begin().getCaseSuccessor()->begin();
- isa<PHINode>(BBI); ++BBI) {
- for (unsigned I = 0, E = SI->getNumCases()-1; I != E; ++I)
+ // Prune obsolete incoming values off the successors' PHI nodes.
+ for (auto BBI = ContiguousDest->begin(); isa<PHINode>(BBI); ++BBI) {
+ unsigned PreviousEdges = ContiguousCases->size();
+ if (ContiguousDest == SI->getDefaultDest()) ++PreviousEdges;
+ for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
+ cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
+ }
+ for (auto BBI = OtherDest->begin(); isa<PHINode>(BBI); ++BBI) {
+ unsigned PreviousEdges = SI->getNumCases() - ContiguousCases->size();
+ if (OtherDest == SI->getDefaultDest()) ++PreviousEdges;
+ for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
}
+
+ // Drop the switch.
SI->eraseFromParent();
return true;
@@ -3273,11 +3247,11 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
/// EliminateDeadSwitchCases - Compute masked bits for the condition of a switch
/// and use it to remove dead cases.
static bool EliminateDeadSwitchCases(SwitchInst *SI, const DataLayout *DL,
- AssumptionTracker *AT) {
+ AssumptionCache *AC) {
Value *Cond = SI->getCondition();
unsigned Bits = Cond->getType()->getIntegerBitWidth();
APInt KnownZero(Bits, 0), KnownOne(Bits, 0);
- computeKnownBits(Cond, KnownZero, KnownOne, DL, 0, AT, SI);
+ computeKnownBits(Cond, KnownZero, KnownOne, DL, 0, AC, SI);
// Gather dead cases.
SmallVector<ConstantInt*, 8> DeadCases;
@@ -3484,6 +3458,21 @@ GetCaseResults(SwitchInst *SI,
continue;
} else if (Constant *C = ConstantFold(I, ConstantPool, DL)) {
// Instruction is side-effect free and constant.
+
+ // If the instruction has uses outside this block or a phi node slot for
+ // the block, it is not safe to bypass the instruction since it would then
+ // no longer dominate all its uses.
+ for (auto &Use : I->uses()) {
+ User *User = Use.getUser();
+ if (Instruction *I = dyn_cast<Instruction>(User))
+ if (I->getParent() == CaseDest)
+ continue;
+ if (PHINode *Phi = dyn_cast<PHINode>(User))
+ if (Phi->getIncomingBlock(Use) == CaseDest)
+ continue;
+ return false;
+ }
+
ConstantPool.insert(std::make_pair(I, C));
} else {
break;
@@ -3509,12 +3498,6 @@ GetCaseResults(SwitchInst *SI,
if (!ConstVal)
return false;
- // Note: If the constant comes from constant-propagating the case value
- // through the CaseDest basic block, it will be safe to remove the
- // instructions in that block. They cannot be used (except in the phi nodes
- // we visit) outside CaseDest, because that block does not dominate its
- // successor. If it did, we would not be in this phi node.
-
// Be conservative about which kinds of constants we support.
if (!ValidLookupTableConstant(ConstVal))
return false;
@@ -3655,7 +3638,7 @@ static void RemoveSwitchAfterSelectConversion(SwitchInst *SI, PHINode *PHI,
/// phi nodes in a common successor block with only two different
/// constant values, replace the switch with select.
static bool SwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
- const DataLayout *DL, AssumptionTracker *AT) {
+ const DataLayout *DL, AssumptionCache *AC) {
Value *const Cond = SI->getCondition();
PHINode *PHI = nullptr;
BasicBlock *CommonDest = nullptr;
@@ -3982,6 +3965,89 @@ static bool ShouldBuildLookupTable(SwitchInst *SI,
return SI->getNumCases() * 10 >= TableSize * 4;
}
+/// Try to reuse the switch table index compare. Following pattern:
+/// \code
+/// if (idx < tablesize)
+/// r = table[idx]; // table does not contain default_value
+/// else
+/// r = default_value;
+/// if (r != default_value)
+/// ...
+/// \endcode
+/// Is optimized to:
+/// \code
+/// cond = idx < tablesize;
+/// if (cond)
+/// r = table[idx];
+/// else
+/// r = default_value;
+/// if (cond)
+/// ...
+/// \endcode
+/// Jump threading will then eliminate the second if(cond).
+static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock,
+ BranchInst *RangeCheckBranch, Constant *DefaultValue,
+ const SmallVectorImpl<std::pair<ConstantInt*, Constant*> >& Values) {
+
+ ICmpInst *CmpInst = dyn_cast<ICmpInst>(PhiUser);
+ if (!CmpInst)
+ return;
+
+ // We require that the compare is in the same block as the phi so that jump
+ // threading can do its work afterwards.
+ if (CmpInst->getParent() != PhiBlock)
+ return;
+
+ Constant *CmpOp1 = dyn_cast<Constant>(CmpInst->getOperand(1));
+ if (!CmpOp1)
+ return;
+
+ Value *RangeCmp = RangeCheckBranch->getCondition();
+ Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType());
+ Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType());
+
+ // Check if the compare with the default value is constant true or false.
+ Constant *DefaultConst = ConstantExpr::getICmp(CmpInst->getPredicate(),
+ DefaultValue, CmpOp1, true);
+ if (DefaultConst != TrueConst && DefaultConst != FalseConst)
+ return;
+
+ // Check if the compare with the case values is distinct from the default
+ // compare result.
+ for (auto ValuePair : Values) {
+ Constant *CaseConst = ConstantExpr::getICmp(CmpInst->getPredicate(),
+ ValuePair.second, CmpOp1, true);
+ if (!CaseConst || CaseConst == DefaultConst)
+ return;
+ assert((CaseConst == TrueConst || CaseConst == FalseConst) &&
+ "Expect true or false as compare result.");
+ }
+
+ // Check if the branch instruction dominates the phi node. It's a simple
+ // dominance check, but sufficient for our needs.
+ // Although this check is invariant in the calling loops, it's better to do it
+ // at this late stage. Practically we do it at most once for a switch.
+ BasicBlock *BranchBlock = RangeCheckBranch->getParent();
+ for (auto PI = pred_begin(PhiBlock), E = pred_end(PhiBlock); PI != E; ++PI) {
+ BasicBlock *Pred = *PI;
+ if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
+ return;
+ }
+
+ if (DefaultConst == FalseConst) {
+ // The compare yields the same result. We can replace it.
+ CmpInst->replaceAllUsesWith(RangeCmp);
+ ++NumTableCmpReuses;
+ } else {
+ // The compare yields the same result, just inverted. We can replace it.
+ Value *InvertedTableCmp = BinaryOperator::CreateXor(RangeCmp,
+ ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp",
+ RangeCheckBranch);
+ CmpInst->replaceAllUsesWith(InvertedTableCmp);
+ ++NumTableCmpReuses;
+ }
+}
+
/// SwitchToLookupTable - If the switch is only used to initialize one or more
/// phi nodes in a common successor block with different constant values,
/// replace the switch with lookup tables.
@@ -4058,11 +4124,8 @@ static bool SwitchToLookupTable(SwitchInst *SI,
// If the table has holes, we need a constant result for the default case
// or a bitmask that fits in a register.
SmallVector<std::pair<PHINode*, Constant*>, 4> DefaultResultsList;
- bool HasDefaultResults = false;
- if (TableHasHoles) {
- HasDefaultResults = GetCaseResults(SI, nullptr, SI->getDefaultDest(),
+ bool HasDefaultResults = GetCaseResults(SI, nullptr, SI->getDefaultDest(),
&CommonDest, DefaultResultsList, DL);
- }
bool NeedMask = (TableHasHoles && !HasDefaultResults);
if (NeedMask) {
@@ -4102,21 +4165,24 @@ static bool SwitchToLookupTable(SwitchInst *SI,
"It is impossible for a switch to have more entries than the max "
"representable value of its input integer type's size.");
- // If we have a fully covered lookup table, unconditionally branch to the
- // lookup table BB. Otherwise, check if the condition value is within the case
- // range. If it is so, branch to the new BB. Otherwise branch to SI's default
- // destination.
- const bool GeneratingCoveredLookupTable = MaxTableSize == TableSize;
- if (GeneratingCoveredLookupTable) {
+ // If the default destination is unreachable, or if the lookup table covers
+ // all values of the conditional variable, branch directly to the lookup table
+ // BB. Otherwise, check that the condition is within the case range.
+ const bool DefaultIsReachable =
+ !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
+ const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
+ BranchInst *RangeCheckBranch = nullptr;
+
+ if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
Builder.CreateBr(LookupBB);
// We cached PHINodes in PHIs, to avoid accessing deleted PHINodes later,
// do not delete PHINodes here.
SI->getDefaultDest()->removePredecessor(SI->getParent(),
- true/*DontDeleteUselessPHIs*/);
+ /*DontDeleteUselessPHIs=*/true);
} else {
Value *Cmp = Builder.CreateICmpULT(TableIndex, ConstantInt::get(
MinCaseVal->getType(), TableSize));
- Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
+ RangeCheckBranch = Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
}
// Populate the BB that does the lookups.
@@ -4167,11 +4233,11 @@ static bool SwitchToLookupTable(SwitchInst *SI,
bool ReturnedEarly = false;
for (size_t I = 0, E = PHIs.size(); I != E; ++I) {
PHINode *PHI = PHIs[I];
+ const ResultListTy &ResultList = ResultLists[PHI];
// If using a bitmask, use any value to fill the lookup table holes.
Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI];
- SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultLists[PHI],
- DV, DL);
+ SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultList, DV, DL);
Value *Result = Table.BuildLookup(TableIndex, Builder);
@@ -4184,6 +4250,16 @@ static bool SwitchToLookupTable(SwitchInst *SI,
break;
}
+ // Do a small peephole optimization: re-use the switch table compare if
+ // possible.
+ if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
+ BasicBlock *PhiBlock = PHI->getParent();
+ // Search for compare instructions which use the phi.
+ for (auto *User : PHI->users()) {
+ reuseTableCompare(User, PhiBlock, RangeCheckBranch, DV, ResultList);
+ }
+ }
+
PHI->addIncoming(Result, LookupBB);
}
@@ -4214,12 +4290,12 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
// see if that predecessor totally determines the outcome of this switch.
if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
Value *Cond = SI->getCondition();
if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
if (SimplifySwitchOnSelect(SI, Select))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
// If the block only contains the switch, see if we can fold the block
// away into any preds.
@@ -4229,25 +4305,25 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
++BBI;
if (SI == &*BBI)
if (FoldValueComparisonIntoPredecessors(SI, Builder))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
}
// Try to transform the switch into an icmp and a branch.
if (TurnSwitchRangeIntoICmp(SI, Builder))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
// Remove unreachable cases.
- if (EliminateDeadSwitchCases(SI, DL, AT))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true;
+ if (EliminateDeadSwitchCases(SI, DL, AC))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
- if (SwitchToSelect(SI, Builder, DL, AT))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true;
+ if (SwitchToSelect(SI, Builder, DL, AC))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
if (ForwardSwitchConditionToPHI(SI))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
if (SwitchToLookupTable(SI, Builder, TTI, DL))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
return false;
}
@@ -4284,7 +4360,7 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
if (SimplifyIndirectBrOnSelect(IBI, SI))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
}
return Changed;
}
@@ -4309,7 +4385,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
;
if (I->isTerminator() &&
TryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, TTI,
- BonusInstThreshold, DL, AT))
+ BonusInstThreshold, DL, AC))
return true;
}
@@ -4318,7 +4394,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
// predecessor and use logical operations to update the incoming value
// for PHI nodes in common successor.
if (FoldBranchToCommonDest(BI, DL, BonusInstThreshold))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
return false;
}
@@ -4333,7 +4409,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
// switch.
if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
// This block must be empty, except for the setcond inst, if it exists.
// Ignore dbg intrinsics.
@@ -4343,14 +4419,14 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
++I;
if (&*I == BI) {
if (FoldValueComparisonIntoPredecessors(BI, Builder))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
} else if (&*I == cast<Instruction>(BI->getCondition())){
++I;
// Ignore dbg intrinsics.
while (isa<DbgInfoIntrinsic>(I))
++I;
if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
}
}
@@ -4362,7 +4438,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
// branches to us and one of our successors, fold the comparison into the
// predecessor and use logical operations to pick the right destination.
if (FoldBranchToCommonDest(BI, DL, BonusInstThreshold))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
// We have a conditional branch to two blocks that are only reachable
// from BI. We know that the condbr dominates the two blocks, so see if
@@ -4370,16 +4446,16 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
// can hoist it up to the branching block.
if (BI->getSuccessor(0)->getSinglePredecessor()) {
if (BI->getSuccessor(1)->getSinglePredecessor()) {
- if (HoistThenElseCodeToIf(BI, DL))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true;
+ if (HoistThenElseCodeToIf(BI, DL, TTI))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
} else {
// If Successor #1 has multiple preds, we may be able to conditionally
// execute Successor #0 if it branches to Successor #1.
TerminatorInst *Succ0TI = BI->getSuccessor(0)->getTerminator();
if (Succ0TI->getNumSuccessors() == 1 &&
Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
- if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), DL))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true;
+ if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), DL, TTI))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
}
} else if (BI->getSuccessor(1)->getSinglePredecessor()) {
// If Successor #0 has multiple preds, we may be able to conditionally
@@ -4387,8 +4463,8 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
TerminatorInst *Succ1TI = BI->getSuccessor(1)->getTerminator();
if (Succ1TI->getNumSuccessors() == 1 &&
Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
- if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), DL))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true;
+ if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), DL, TTI))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
}
// If this is a branch on a phi node in the current block, thread control
@@ -4396,14 +4472,14 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
if (PN->getParent() == BI->getParent())
if (FoldCondBranchOnPHI(BI, DL))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
// Scan predecessor blocks for conditional branches.
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
if (PBI != BI && PBI->isConditional())
if (SimplifyCondBranchToCondBranch(PBI, BI))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
return false;
}
@@ -4484,7 +4560,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
// Remove basic blocks that have no predecessors (except the entry block)...
// or that just have themself as a predecessor. These are unreachable.
- if ((pred_begin(BB) == pred_end(BB) &&
+ if ((pred_empty(BB) &&
BB != &BB->getParent()->getEntryBlock()) ||
BB->getSinglePredecessor() == BB) {
DEBUG(dbgs() << "Removing BB: \n" << *BB);
@@ -4515,7 +4591,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
// eliminate it, do so now.
if (PHINode *PN = dyn_cast<PHINode>(BB->begin()))
if (PN->getNumIncomingValues() == 2)
- Changed |= FoldTwoEntryPHINode(PN, DL);
+ Changed |= FoldTwoEntryPHINode(PN, DL, TTI);
Builder.SetInsertPoint(BB->getTerminator());
if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
@@ -4547,7 +4623,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
/// of the CFG. It returns true if a modification was made.
///
bool llvm::SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
- unsigned BonusInstThreshold,
- const DataLayout *DL, AssumptionTracker *AT) {
- return SimplifyCFGOpt(TTI, BonusInstThreshold, DL, AT).run(BB);
+ unsigned BonusInstThreshold, const DataLayout *DL,
+ AssumptionCache *AC) {
+ return SimplifyCFGOpt(TTI, BonusInstThreshold, DL, AC).run(BB);
}
diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp
index a4fdd55..6a5d885 100644
--- a/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -48,22 +48,15 @@ namespace {
Loop *L;
LoopInfo *LI;
ScalarEvolution *SE;
- const DataLayout *DL; // May be NULL
SmallVectorImpl<WeakVH> &DeadInsts;
bool Changed;
public:
- SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, LPPassManager *LPM,
- SmallVectorImpl<WeakVH> &Dead, IVUsers *IVU = nullptr) :
- L(Loop),
- LI(LPM->getAnalysisIfAvailable<LoopInfo>()),
- SE(SE),
- DeadInsts(Dead),
- Changed(false) {
- DataLayoutPass *DLP = LPM->getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
+ SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, LoopInfo *LI,
+ SmallVectorImpl<WeakVH> &Dead, IVUsers *IVU = nullptr)
+ : L(Loop), LI(LI), SE(SE), DeadInsts(Dead), Changed(false) {
assert(LI && "IV simplification requires LoopInfo");
}
@@ -80,6 +73,7 @@ namespace {
void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand);
void eliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand,
bool IsSigned);
+ bool strengthenOverflowingOperation(BinaryOperator *OBO, Value *IVOperand);
Instruction *splitOverflowIntrinsic(Instruction *IVUser,
const DominatorTree *DT);
@@ -271,6 +265,107 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,
return true;
}
+/// Annotate BO with nsw / nuw if it provably does not signed-overflow /
+/// unsigned-overflow. Returns true if anything changed, false otherwise.
+bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO,
+ Value *IVOperand) {
+
+ // Currently we only handle instructions of the form "add <indvar> <value>"
+ unsigned Op = BO->getOpcode();
+ if (Op != Instruction::Add)
+ return false;
+
+ // If BO is already both nuw and nsw then there is nothing left to do
+ if (BO->hasNoUnsignedWrap() && BO->hasNoSignedWrap())
+ return false;
+
+ IntegerType *IT = cast<IntegerType>(IVOperand->getType());
+ Value *OtherOperand = nullptr;
+ if (BO->getOperand(0) == IVOperand) {
+ OtherOperand = BO->getOperand(1);
+ } else {
+ assert(BO->getOperand(1) == IVOperand && "only other use!");
+ OtherOperand = BO->getOperand(0);
+ }
+
+ bool Changed = false;
+ const SCEV *OtherOpSCEV = SE->getSCEV(OtherOperand);
+ if (OtherOpSCEV == SE->getCouldNotCompute())
+ return false;
+
+ const SCEV *IVOpSCEV = SE->getSCEV(IVOperand);
+ const SCEV *ZeroSCEV = SE->getConstant(IVOpSCEV->getType(), 0);
+
+ if (!BO->hasNoSignedWrap()) {
+ // Upgrade the add to an "add nsw" if we can prove that it will never
+ // sign-overflow or sign-underflow.
+
+ const SCEV *SignedMax =
+ SE->getConstant(APInt::getSignedMaxValue(IT->getBitWidth()));
+ const SCEV *SignedMin =
+ SE->getConstant(APInt::getSignedMinValue(IT->getBitWidth()));
+
+ // The addition "IVOperand + OtherOp" does not sign-overflow if the result
+ // is sign-representable in 2's complement in the given bit-width.
+ //
+ // If OtherOp is SLT 0, then for an IVOperand in [SignedMin - OtherOp,
+ // SignedMax], "IVOperand + OtherOp" is in [SignedMin, SignedMax + OtherOp].
+ // Everything in [SignedMin, SignedMax + OtherOp] is representable since
+ // SignedMax + OtherOp is at least -1.
+ //
+ // If OtherOp is SGE 0, then for an IVOperand in [SignedMin, SignedMax -
+ // OtherOp], "IVOperand + OtherOp" is in [SignedMin + OtherOp, SignedMax].
+ // Everything in [SignedMin + OtherOp, SignedMax] is representable since
+ // SignedMin + OtherOp is at most -1.
+ //
+ // It follows that for all values of IVOperand in [SignedMin - smin(0,
+ // OtherOp), SignedMax - smax(0, OtherOp)] the result of the add is
+ // representable (i.e. there is no sign-overflow).
+
+ const SCEV *UpperDelta = SE->getSMaxExpr(ZeroSCEV, OtherOpSCEV);
+ const SCEV *UpperLimit = SE->getMinusSCEV(SignedMax, UpperDelta);
+
+ bool NeverSignedOverflows =
+ SE->isKnownPredicate(ICmpInst::ICMP_SLE, IVOpSCEV, UpperLimit);
+
+ if (NeverSignedOverflows) {
+ const SCEV *LowerDelta = SE->getSMinExpr(ZeroSCEV, OtherOpSCEV);
+ const SCEV *LowerLimit = SE->getMinusSCEV(SignedMin, LowerDelta);
+
+ bool NeverSignedUnderflows =
+ SE->isKnownPredicate(ICmpInst::ICMP_SGE, IVOpSCEV, LowerLimit);
+ if (NeverSignedUnderflows) {
+ BO->setHasNoSignedWrap(true);
+ Changed = true;
+ }
+ }
+ }
+
+ if (!BO->hasNoUnsignedWrap()) {
+ // Upgrade the add computing "IVOperand + OtherOp" to an "add nuw" if we can
+ // prove that it will never unsigned-overflow (i.e. the result will always
+ // be representable in the given bit-width).
+ //
+ // "IVOperand + OtherOp" is unsigned-representable in 2's complement iff it
+ // does not produce a carry. "IVOperand + OtherOp" produces no carry iff
+ // IVOperand ULE (UnsignedMax - OtherOp).
+
+ const SCEV *UnsignedMax =
+ SE->getConstant(APInt::getMaxValue(IT->getBitWidth()));
+ const SCEV *UpperLimit = SE->getMinusSCEV(UnsignedMax, OtherOpSCEV);
+
+ bool NeverUnsignedOverflows =
+ SE->isKnownPredicate(ICmpInst::ICMP_ULE, IVOpSCEV, UpperLimit);
+
+ if (NeverUnsignedOverflows) {
+ BO->setHasNoUnsignedWrap(true);
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
/// \brief Split sadd.with.overflow into add + sadd.with.overflow to allow
/// analysis and optimization.
///
@@ -430,6 +525,16 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
pushIVUsers(IVOperand, Simplified, SimpleIVUsers);
continue;
}
+
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(UseOper.first)) {
+ if (isa<OverflowingBinaryOperator>(BO) &&
+ strengthenOverflowingOperation(BO, IVOperand)) {
+ // re-queue uses of the now modified binary operator and fall
+ // through to the checks that remain.
+ pushIVUsers(IVOperand, Simplified, SimpleIVUsers);
+ }
+ }
+
CastInst *Cast = dyn_cast<CastInst>(UseOper.first);
if (V && Cast) {
V->visitCast(Cast);
@@ -450,8 +555,8 @@ void IVVisitor::anchor() { }
bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, LPPassManager *LPM,
SmallVectorImpl<WeakVH> &Dead, IVVisitor *V)
{
- LoopInfo *LI = &LPM->getAnalysis<LoopInfo>();
- SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, LPM, Dead);
+ LoopInfo *LI = &LPM->getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, LI, Dead);
SIV.simplifyUsers(CurrIV, V);
return SIV.hasChanged();
}
diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp
index 5632095..55a4455 100644
--- a/lib/Transforms/Utils/SimplifyInstructions.cpp
+++ b/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -18,14 +18,14 @@
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AssumptionTracker.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Type.h"
#include "llvm/Pass.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -42,8 +42,8 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
- AU.addRequired<AssumptionTracker>();
- AU.addRequired<TargetLibraryInfo>();
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
}
/// runOnFunction - Remove instructions that simplify.
@@ -53,8 +53,10 @@ namespace {
const DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
- const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
- AssumptionTracker *AT = &getAnalysis<AssumptionTracker>();
+ const TargetLibraryInfo *TLI =
+ &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ AssumptionCache *AC =
+ &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
bool Changed = false;
@@ -71,7 +73,7 @@ namespace {
continue;
// Don't waste time simplifying unused instructions.
if (!I->use_empty())
- if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AT)) {
+ if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AC)) {
// Mark all uses for resimplification next time round the loop.
for (User *U : I->users())
Next->insert(cast<Instruction>(U));
@@ -104,8 +106,8 @@ namespace {
char InstSimplifier::ID = 0;
INITIALIZE_PASS_BEGIN(InstSimplifier, "instsimplify",
"Remove redundant instructions", false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(InstSimplifier, "instsimplify",
"Remove redundant instructions", false, false)
char &llvm::InstructionSimplifierID = InstSimplifier::ID;
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index a39f128..fb1d83f 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -30,7 +30,7 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
using namespace llvm;
@@ -116,207 +116,68 @@ static bool hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
}
}
-//===----------------------------------------------------------------------===//
-// Fortified Library Call Optimizations
-//===----------------------------------------------------------------------===//
-
-static bool isFortifiedCallFoldable(CallInst *CI, unsigned SizeCIOp, unsigned SizeArgOp,
- bool isString) {
- if (CI->getArgOperand(SizeCIOp) == CI->getArgOperand(SizeArgOp))
- return true;
- if (ConstantInt *SizeCI =
- dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) {
- if (SizeCI->isAllOnesValue())
- return true;
- if (isString) {
- uint64_t Len = GetStringLength(CI->getArgOperand(SizeArgOp));
- // If the length is 0 we don't know how long it is and so we can't
- // remove the check.
- if (Len == 0)
- return false;
- return SizeCI->getZExtValue() >= Len;
- }
- if (ConstantInt *Arg = dyn_cast<ConstantInt>(CI->getArgOperand(SizeArgOp)))
- return SizeCI->getZExtValue() >= Arg->getZExtValue();
- }
- return false;
-}
-
-Value *LibCallSimplifier::optimizeMemCpyChk(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
- FunctionType *FT = Callee->getFunctionType();
- LLVMContext &Context = CI->getContext();
-
- // Check if this has the right signature.
- if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
- !FT->getParamType(0)->isPointerTy() ||
- !FT->getParamType(1)->isPointerTy() ||
- FT->getParamType(2) != DL->getIntPtrType(Context) ||
- FT->getParamType(3) != DL->getIntPtrType(Context))
- return nullptr;
-
- if (isFortifiedCallFoldable(CI, 3, 2, false)) {
- B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), 1);
- return CI->getArgOperand(0);
- }
- return nullptr;
-}
-
-Value *LibCallSimplifier::optimizeMemMoveChk(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
- FunctionType *FT = Callee->getFunctionType();
- LLVMContext &Context = CI->getContext();
-
- // Check if this has the right signature.
- if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
- !FT->getParamType(0)->isPointerTy() ||
- !FT->getParamType(1)->isPointerTy() ||
- FT->getParamType(2) != DL->getIntPtrType(Context) ||
- FT->getParamType(3) != DL->getIntPtrType(Context))
- return nullptr;
-
- if (isFortifiedCallFoldable(CI, 3, 2, false)) {
- B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), 1);
- return CI->getArgOperand(0);
- }
- return nullptr;
-}
-
-Value *LibCallSimplifier::optimizeMemSetChk(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
- FunctionType *FT = Callee->getFunctionType();
- LLVMContext &Context = CI->getContext();
-
- // Check if this has the right signature.
- if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
- !FT->getParamType(0)->isPointerTy() ||
- !FT->getParamType(1)->isIntegerTy() ||
- FT->getParamType(2) != DL->getIntPtrType(Context) ||
- FT->getParamType(3) != DL->getIntPtrType(Context))
- return nullptr;
-
- if (isFortifiedCallFoldable(CI, 3, 2, false)) {
- Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
- B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
- return CI->getArgOperand(0);
- }
- return nullptr;
-}
-
-Value *LibCallSimplifier::optimizeStrCpyChk(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
- StringRef Name = Callee->getName();
- FunctionType *FT = Callee->getFunctionType();
- LLVMContext &Context = CI->getContext();
-
- // Check if this has the right signature.
- if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
- FT->getParamType(0) != FT->getParamType(1) ||
- FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
- FT->getParamType(2) != DL->getIntPtrType(Context))
- return nullptr;
-
- Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
- if (Dst == Src) // __strcpy_chk(x,x) -> x
- return Src;
-
- // If a) we don't have any length information, or b) we know this will
- // fit then just lower to a plain strcpy. Otherwise we'll keep our
- // strcpy_chk call which may fail at runtime if the size is too long.
- // TODO: It might be nice to get a maximum length out of the possible
- // string lengths for varying.
- if (isFortifiedCallFoldable(CI, 2, 1, true)) {
- Value *Ret = EmitStrCpy(Dst, Src, B, DL, TLI, Name.substr(2, 6));
- return Ret;
- } else {
- // Maybe we can stil fold __strcpy_chk to __memcpy_chk.
- uint64_t Len = GetStringLength(Src);
- if (Len == 0)
- return nullptr;
-
- // This optimization require DataLayout.
- if (!DL)
- return nullptr;
-
- Value *Ret = EmitMemCpyChk(
- Dst, Src, ConstantInt::get(DL->getIntPtrType(Context), Len),
- CI->getArgOperand(2), B, DL, TLI);
- return Ret;
- }
- return nullptr;
-}
-
-Value *LibCallSimplifier::optimizeStpCpyChk(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
- StringRef Name = Callee->getName();
- FunctionType *FT = Callee->getFunctionType();
- LLVMContext &Context = CI->getContext();
-
- // Check if this has the right signature.
- if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
- FT->getParamType(0) != FT->getParamType(1) ||
- FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
- FT->getParamType(2) != DL->getIntPtrType(FT->getParamType(0)))
- return nullptr;
+/// \brief Returns whether \p F matches the signature expected for the
+/// string/memory copying library function \p Func.
+/// Acceptable functions are st[rp][n]?cpy, memove, memcpy, and memset.
+/// Their fortified (_chk) counterparts are also accepted.
+static bool checkStringCopyLibFuncSignature(Function *F, LibFunc::Func Func,
+ const DataLayout *DL) {
+ FunctionType *FT = F->getFunctionType();
+ LLVMContext &Context = F->getContext();
+ Type *PCharTy = Type::getInt8PtrTy(Context);
+ Type *SizeTTy = DL ? DL->getIntPtrType(Context) : nullptr;
+ unsigned NumParams = FT->getNumParams();
- Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
- if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x)
- Value *StrLen = EmitStrLen(Src, B, DL, TLI);
- return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : nullptr;
- }
-
- // If a) we don't have any length information, or b) we know this will
- // fit then just lower to a plain stpcpy. Otherwise we'll keep our
- // stpcpy_chk call which may fail at runtime if the size is too long.
- // TODO: It might be nice to get a maximum length out of the possible
- // string lengths for varying.
- if (isFortifiedCallFoldable(CI, 2, 1, true)) {
- Value *Ret = EmitStrCpy(Dst, Src, B, DL, TLI, Name.substr(2, 6));
- return Ret;
- } else {
- // Maybe we can stil fold __stpcpy_chk to __memcpy_chk.
- uint64_t Len = GetStringLength(Src);
- if (Len == 0)
- return nullptr;
-
- // This optimization require DataLayout.
- if (!DL)
- return nullptr;
-
- Type *PT = FT->getParamType(0);
- Value *LenV = ConstantInt::get(DL->getIntPtrType(PT), Len);
- Value *DstEnd =
- B.CreateGEP(Dst, ConstantInt::get(DL->getIntPtrType(PT), Len - 1));
- if (!EmitMemCpyChk(Dst, Src, LenV, CI->getArgOperand(2), B, DL, TLI))
- return nullptr;
- return DstEnd;
- }
- return nullptr;
-}
-
-Value *LibCallSimplifier::optimizeStrNCpyChk(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
- StringRef Name = Callee->getName();
- FunctionType *FT = Callee->getFunctionType();
- LLVMContext &Context = CI->getContext();
-
- // Check if this has the right signature.
- if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
- FT->getParamType(0) != FT->getParamType(1) ||
- FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
- !FT->getParamType(2)->isIntegerTy() ||
- FT->getParamType(3) != DL->getIntPtrType(Context))
- return nullptr;
+ // All string libfuncs return the same type as the first parameter.
+ if (FT->getReturnType() != FT->getParamType(0))
+ return false;
- if (isFortifiedCallFoldable(CI, 3, 2, false)) {
- Value *Ret =
- EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), B, DL, TLI, Name.substr(2, 7));
- return Ret;
- }
- return nullptr;
+ switch (Func) {
+ default:
+ llvm_unreachable("Can't check signature for non-string-copy libfunc.");
+ case LibFunc::stpncpy_chk:
+ case LibFunc::strncpy_chk:
+ --NumParams; // fallthrough
+ case LibFunc::stpncpy:
+ case LibFunc::strncpy: {
+ if (NumParams != 3 || FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != PCharTy || !FT->getParamType(2)->isIntegerTy())
+ return false;
+ break;
+ }
+ case LibFunc::strcpy_chk:
+ case LibFunc::stpcpy_chk:
+ --NumParams; // fallthrough
+ case LibFunc::stpcpy:
+ case LibFunc::strcpy: {
+ if (NumParams != 2 || FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != PCharTy)
+ return false;
+ break;
+ }
+ case LibFunc::memmove_chk:
+ case LibFunc::memcpy_chk:
+ --NumParams; // fallthrough
+ case LibFunc::memmove:
+ case LibFunc::memcpy: {
+ if (NumParams != 3 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() || FT->getParamType(2) != SizeTTy)
+ return false;
+ break;
+ }
+ case LibFunc::memset_chk:
+ --NumParams; // fallthrough
+ case LibFunc::memset: {
+ if (NumParams != 3 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isIntegerTy() || FT->getParamType(2) != SizeTTy)
+ return false;
+ break;
+ }
+ }
+ // If this is a fortified libcall, the last parameter is a size_t.
+ if (NumParams == FT->getNumParams() - 1)
+ return FT->getParamType(FT->getNumParams() - 1) == SizeTTy;
+ return true;
}
//===----------------------------------------------------------------------===//
@@ -600,11 +461,8 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) {
Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
- // Verify the "strcpy" function prototype.
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
- FT->getParamType(0) != FT->getParamType(1) ||
- FT->getParamType(0) != B.getInt8PtrTy())
+
+ if (!checkStringCopyLibFuncSignature(Callee, LibFunc::strcpy, DL))
return nullptr;
Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
@@ -631,9 +489,8 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
// Verify the "stpcpy" function prototype.
FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
- FT->getParamType(0) != FT->getParamType(1) ||
- FT->getParamType(0) != B.getInt8PtrTy())
+
+ if (!checkStringCopyLibFuncSignature(Callee, LibFunc::stpcpy, DL))
return nullptr;
// These optimizations require DataLayout.
@@ -665,10 +522,8 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) {
Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
- FT->getParamType(0) != FT->getParamType(1) ||
- FT->getParamType(0) != B.getInt8PtrTy() ||
- !FT->getParamType(2)->isIntegerTy())
+
+ if (!checkStringCopyLibFuncSignature(Callee, LibFunc::strncpy, DL))
return nullptr;
Value *Dst = CI->getArgOperand(0);
@@ -976,11 +831,7 @@ Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) {
if (!DL)
return nullptr;
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
- !FT->getParamType(0)->isPointerTy() ||
- !FT->getParamType(1)->isPointerTy() ||
- FT->getParamType(2) != DL->getIntPtrType(CI->getContext()))
+ if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memcpy, DL))
return nullptr;
// memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1)
@@ -995,11 +846,7 @@ Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B) {
if (!DL)
return nullptr;
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
- !FT->getParamType(0)->isPointerTy() ||
- !FT->getParamType(1)->isPointerTy() ||
- FT->getParamType(2) != DL->getIntPtrType(CI->getContext()))
+ if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memmove, DL))
return nullptr;
// memmove(x, y, n) -> llvm.memmove(x, y, n, 1)
@@ -1014,11 +861,7 @@ Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilder<> &B) {
if (!DL)
return nullptr;
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
- !FT->getParamType(0)->isPointerTy() ||
- !FT->getParamType(1)->isIntegerTy() ||
- FT->getParamType(2) != DL->getIntPtrType(FT->getParamType(0)))
+ if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memset, DL))
return nullptr;
// memset(p, v, n) -> llvm.memset(p, v, n, 1)
@@ -1031,6 +874,28 @@ Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilder<> &B) {
// Math Library Optimizations
//===----------------------------------------------------------------------===//
+/// Return a variant of Val with float type.
+/// Currently this works in two cases: If Val is an FPExtension of a float
+/// value to something bigger, simply return the operand.
+/// If Val is a ConstantFP but can be converted to a float ConstantFP without
+/// loss of precision do so.
+static Value *valueHasFloatPrecision(Value *Val) {
+ if (FPExtInst *Cast = dyn_cast<FPExtInst>(Val)) {
+ Value *Op = Cast->getOperand(0);
+ if (Op->getType()->isFloatTy())
+ return Op;
+ }
+ if (ConstantFP *Const = dyn_cast<ConstantFP>(Val)) {
+ APFloat F = Const->getValueAPF();
+ bool losesInfo;
+ (void)F.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven,
+ &losesInfo);
+ if (!losesInfo)
+ return ConstantFP::get(Const->getContext(), F);
+ }
+ return nullptr;
+}
+
//===----------------------------------------------------------------------===//
// Double -> Float Shrinking Optimizations for Unary Functions like 'floor'
@@ -1052,12 +917,11 @@ Value *LibCallSimplifier::optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,
}
// If this is something like 'floor((double)floatval)', convert to floorf.
- FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0));
- if (!Cast || !Cast->getOperand(0)->getType()->isFloatTy())
+ Value *V = valueHasFloatPrecision(CI->getArgOperand(0));
+ if (V == nullptr)
return nullptr;
// floor((double)floatval) -> (double)floorf(floatval)
- Value *V = Cast->getOperand(0);
if (Callee->isIntrinsic()) {
Module *M = CI->getParent()->getParent()->getParent();
Intrinsic::ID IID = (Intrinsic::ID) Callee->getIntrinsicID();
@@ -1083,21 +947,19 @@ Value *LibCallSimplifier::optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) {
return nullptr;
// If this is something like 'fmin((double)floatval1, (double)floatval2)',
- // we convert it to fminf.
- FPExtInst *Cast1 = dyn_cast<FPExtInst>(CI->getArgOperand(0));
- FPExtInst *Cast2 = dyn_cast<FPExtInst>(CI->getArgOperand(1));
- if (!Cast1 || !Cast1->getOperand(0)->getType()->isFloatTy() || !Cast2 ||
- !Cast2->getOperand(0)->getType()->isFloatTy())
+ // or fmin(1.0, (double)floatval), then we convert it to fminf.
+ Value *V1 = valueHasFloatPrecision(CI->getArgOperand(0));
+ if (V1 == nullptr)
+ return nullptr;
+ Value *V2 = valueHasFloatPrecision(CI->getArgOperand(1));
+ if (V2 == nullptr)
return nullptr;
// fmin((double)floatval1, (double)floatval2)
- // -> (double)fmin(floatval1, floatval2)
- Value *V = nullptr;
- Value *V1 = Cast1->getOperand(0);
- Value *V2 = Cast2->getOperand(0);
+ // -> (double)fminf(floatval1, floatval2)
// TODO: Handle intrinsics in the same way as in optimizeUnaryDoubleFP().
- V = EmitBinaryFloatFnCall(V1, V2, Callee->getName(), B,
- Callee->getAttributes());
+ Value *V = EmitBinaryFloatFnCall(V1, V2, Callee->getName(), B,
+ Callee->getAttributes());
return B.CreateFPExt(V, B.getDoubleTy());
}
@@ -1995,53 +1857,18 @@ bool LibCallSimplifier::hasFloatVersion(StringRef FuncName) {
return false;
}
-Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
- if (CI->isNoBuiltin())
- return nullptr;
-
+Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
+ IRBuilder<> &Builder) {
LibFunc::Func Func;
Function *Callee = CI->getCalledFunction();
StringRef FuncName = Callee->getName();
- IRBuilder<> Builder(CI);
- bool isCallingConvC = CI->getCallingConv() == llvm::CallingConv::C;
-
- // Command-line parameter overrides function attribute.
- if (EnableUnsafeFPShrink.getNumOccurrences() > 0)
- UnsafeFPShrink = EnableUnsafeFPShrink;
- else if (Callee->hasFnAttribute("unsafe-fp-math")) {
- // FIXME: This is the same problem as described in optimizeSqrt().
- // If calls gain access to IR-level FMF, then use that instead of a
- // function attribute.
- // Check for unsafe-fp-math = true.
- Attribute Attr = Callee->getFnAttribute("unsafe-fp-math");
- if (Attr.getValueAsString() == "true")
- UnsafeFPShrink = true;
- }
-
- // First, check for intrinsics.
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
- if (!isCallingConvC)
- return nullptr;
- switch (II->getIntrinsicID()) {
- case Intrinsic::pow:
- return optimizePow(CI, Builder);
- case Intrinsic::exp2:
- return optimizeExp2(CI, Builder);
- case Intrinsic::fabs:
- return optimizeFabs(CI, Builder);
- case Intrinsic::sqrt:
- return optimizeSqrt(CI, Builder);
- default:
- return nullptr;
- }
- }
-
- // Then check for known library functions.
+ // Check for string/memory library functions.
if (TLI->getLibFunc(FuncName, Func) && TLI->has(Func)) {
- // We never change the calling convention.
- if (!ignoreCallingConv(Func) && !isCallingConvC)
- return nullptr;
+ // Make sure we never change the calling convention.
+ assert((ignoreCallingConv(Func) ||
+ CI->getCallingConv() == llvm::CallingConv::C) &&
+ "Optimizing string/memory libcall would change the calling convention");
switch (Func) {
case LibFunc::strcat:
return optimizeStrCat(CI, Builder);
@@ -2087,6 +1914,77 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
return optimizeMemMove(CI, Builder);
case LibFunc::memset:
return optimizeMemSet(CI, Builder);
+ default:
+ break;
+ }
+ }
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
+ if (CI->isNoBuiltin())
+ return nullptr;
+
+ LibFunc::Func Func;
+ Function *Callee = CI->getCalledFunction();
+ StringRef FuncName = Callee->getName();
+ IRBuilder<> Builder(CI);
+ bool isCallingConvC = CI->getCallingConv() == llvm::CallingConv::C;
+
+ // Command-line parameter overrides function attribute.
+ if (EnableUnsafeFPShrink.getNumOccurrences() > 0)
+ UnsafeFPShrink = EnableUnsafeFPShrink;
+ else if (Callee->hasFnAttribute("unsafe-fp-math")) {
+ // FIXME: This is the same problem as described in optimizeSqrt().
+ // If calls gain access to IR-level FMF, then use that instead of a
+ // function attribute.
+
+ // Check for unsafe-fp-math = true.
+ Attribute Attr = Callee->getFnAttribute("unsafe-fp-math");
+ if (Attr.getValueAsString() == "true")
+ UnsafeFPShrink = true;
+ }
+
+ // First, check for intrinsics.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
+ if (!isCallingConvC)
+ return nullptr;
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::pow:
+ return optimizePow(CI, Builder);
+ case Intrinsic::exp2:
+ return optimizeExp2(CI, Builder);
+ case Intrinsic::fabs:
+ return optimizeFabs(CI, Builder);
+ case Intrinsic::sqrt:
+ return optimizeSqrt(CI, Builder);
+ default:
+ return nullptr;
+ }
+ }
+
+ // Also try to simplify calls to fortified library functions.
+ if (Value *SimplifiedFortifiedCI = FortifiedSimplifier.optimizeCall(CI)) {
+ // Try to further simplify the result.
+ CallInst *SimplifiedCI = dyn_cast<CallInst>(SimplifiedFortifiedCI);
+ if (SimplifiedCI && SimplifiedCI->getCalledFunction())
+ if (Value *V = optimizeStringMemoryLibCall(SimplifiedCI, Builder)) {
+ // If we were able to further simplify, remove the now redundant call.
+ SimplifiedCI->replaceAllUsesWith(V);
+ SimplifiedCI->eraseFromParent();
+ return V;
+ }
+ return SimplifiedFortifiedCI;
+ }
+
+ // Then check for known library functions.
+ if (TLI->getLibFunc(FuncName, Func) && TLI->has(Func)) {
+ // We never change the calling convention.
+ if (!ignoreCallingConv(Func) && !isCallingConvC)
+ return nullptr;
+ if (Value *V = optimizeStringMemoryLibCall(CI, Builder))
+ return V;
+ switch (Func) {
case LibFunc::cosf:
case LibFunc::cos:
case LibFunc::cosl:
@@ -2177,40 +2075,32 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
if (UnsafeFPShrink && hasFloatVersion(FuncName))
return optimizeUnaryDoubleFP(CI, Builder, true);
return nullptr;
+ case LibFunc::copysign:
case LibFunc::fmin:
case LibFunc::fmax:
if (hasFloatVersion(FuncName))
return optimizeBinaryDoubleFP(CI, Builder);
return nullptr;
- case LibFunc::memcpy_chk:
- return optimizeMemCpyChk(CI, Builder);
- case LibFunc::memmove_chk:
- return optimizeMemMoveChk(CI, Builder);
- case LibFunc::memset_chk:
- return optimizeMemSetChk(CI, Builder);
- case LibFunc::strcpy_chk:
- return optimizeStrCpyChk(CI, Builder);
- case LibFunc::stpcpy_chk:
- return optimizeStpCpyChk(CI, Builder);
- case LibFunc::stpncpy_chk:
- case LibFunc::strncpy_chk:
- return optimizeStrNCpyChk(CI, Builder);
default:
return nullptr;
}
}
-
return nullptr;
}
-LibCallSimplifier::LibCallSimplifier(const DataLayout *DL,
- const TargetLibraryInfo *TLI) :
- DL(DL),
- TLI(TLI),
- UnsafeFPShrink(false) {
+LibCallSimplifier::LibCallSimplifier(
+ const DataLayout *DL, const TargetLibraryInfo *TLI,
+ function_ref<void(Instruction *, Value *)> Replacer)
+ : FortifiedSimplifier(DL, TLI), DL(DL), TLI(TLI), UnsafeFPShrink(false),
+ Replacer(Replacer) {}
+
+void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) {
+ // Indirect through the replacer used in this instance.
+ Replacer(I, With);
}
-void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) const {
+/*static*/ void LibCallSimplifier::replaceAllUsesWithDefault(Instruction *I,
+ Value *With) {
I->replaceAllUsesWith(With);
I->eraseFromParent();
}
@@ -2262,3 +2152,184 @@ void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) const {
// * trunc(cnst) -> cnst'
//
//
+
+//===----------------------------------------------------------------------===//
+// Fortified Library Call Optimizations
+//===----------------------------------------------------------------------===//
+
+bool FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI,
+ unsigned ObjSizeOp,
+ unsigned SizeOp,
+ bool isString) {
+ if (CI->getArgOperand(ObjSizeOp) == CI->getArgOperand(SizeOp))
+ return true;
+ if (ConstantInt *ObjSizeCI =
+ dyn_cast<ConstantInt>(CI->getArgOperand(ObjSizeOp))) {
+ if (ObjSizeCI->isAllOnesValue())
+ return true;
+ // If the object size wasn't -1 (unknown), bail out if we were asked to.
+ if (OnlyLowerUnknownSize)
+ return false;
+ if (isString) {
+ uint64_t Len = GetStringLength(CI->getArgOperand(SizeOp));
+ // If the length is 0 we don't know how long it is and so we can't
+ // remove the check.
+ if (Len == 0)
+ return false;
+ return ObjSizeCI->getZExtValue() >= Len;
+ }
+ if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getArgOperand(SizeOp)))
+ return ObjSizeCI->getZExtValue() >= SizeCI->getZExtValue();
+ }
+ return false;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+
+ if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memcpy_chk, DL))
+ return nullptr;
+
+ if (isFortifiedCallFoldable(CI, 3, 2, false)) {
+ B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1);
+ return CI->getArgOperand(0);
+ }
+ return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+
+ if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memmove_chk, DL))
+ return nullptr;
+
+ if (isFortifiedCallFoldable(CI, 3, 2, false)) {
+ B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1);
+ return CI->getArgOperand(0);
+ }
+ return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+
+ if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memset_chk, DL))
+ return nullptr;
+
+ if (isFortifiedCallFoldable(CI, 3, 2, false)) {
+ Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
+ B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
+ return CI->getArgOperand(0);
+ }
+ return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
+ IRBuilder<> &B,
+ LibFunc::Func Func) {
+ Function *Callee = CI->getCalledFunction();
+ StringRef Name = Callee->getName();
+
+ if (!checkStringCopyLibFuncSignature(Callee, Func, DL))
+ return nullptr;
+
+ Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1),
+ *ObjSize = CI->getArgOperand(2);
+
+ // __stpcpy_chk(x,x,...) -> x+strlen(x)
+ if (Func == LibFunc::stpcpy_chk && !OnlyLowerUnknownSize && Dst == Src) {
+ Value *StrLen = EmitStrLen(Src, B, DL, TLI);
+ return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : nullptr;
+ }
+
+ // If a) we don't have any length information, or b) we know this will
+ // fit then just lower to a plain st[rp]cpy. Otherwise we'll keep our
+ // st[rp]cpy_chk call which may fail at runtime if the size is too long.
+ // TODO: It might be nice to get a maximum length out of the possible
+ // string lengths for varying.
+ if (isFortifiedCallFoldable(CI, 2, 1, true)) {
+ Value *Ret = EmitStrCpy(Dst, Src, B, DL, TLI, Name.substr(2, 6));
+ return Ret;
+ } else if (!OnlyLowerUnknownSize) {
+ // Maybe we can stil fold __st[rp]cpy_chk to __memcpy_chk.
+ uint64_t Len = GetStringLength(Src);
+ if (Len == 0)
+ return nullptr;
+
+ // This optimization requires DataLayout.
+ if (!DL)
+ return nullptr;
+
+ Type *SizeTTy = DL->getIntPtrType(CI->getContext());
+ Value *LenV = ConstantInt::get(SizeTTy, Len);
+ Value *Ret = EmitMemCpyChk(Dst, Src, LenV, ObjSize, B, DL, TLI);
+ // If the function was an __stpcpy_chk, and we were able to fold it into
+ // a __memcpy_chk, we still need to return the correct end pointer.
+ if (Ret && Func == LibFunc::stpcpy_chk)
+ return B.CreateGEP(Dst, ConstantInt::get(SizeTTy, Len - 1));
+ return Ret;
+ }
+ return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI,
+ IRBuilder<> &B,
+ LibFunc::Func Func) {
+ Function *Callee = CI->getCalledFunction();
+ StringRef Name = Callee->getName();
+
+ if (!checkStringCopyLibFuncSignature(Callee, Func, DL))
+ return nullptr;
+ if (isFortifiedCallFoldable(CI, 3, 2, false)) {
+ Value *Ret =
+ EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), B, DL, TLI, Name.substr(2, 7));
+ return Ret;
+ }
+ return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) {
+ if (CI->isNoBuiltin())
+ return nullptr;
+
+ LibFunc::Func Func;
+ Function *Callee = CI->getCalledFunction();
+ StringRef FuncName = Callee->getName();
+ IRBuilder<> Builder(CI);
+ bool isCallingConvC = CI->getCallingConv() == llvm::CallingConv::C;
+
+ // First, check that this is a known library functions.
+ if (!TLI->getLibFunc(FuncName, Func) || !TLI->has(Func))
+ return nullptr;
+
+ // We never change the calling convention.
+ if (!ignoreCallingConv(Func) && !isCallingConvC)
+ return nullptr;
+
+ switch (Func) {
+ case LibFunc::memcpy_chk:
+ return optimizeMemCpyChk(CI, Builder);
+ case LibFunc::memmove_chk:
+ return optimizeMemMoveChk(CI, Builder);
+ case LibFunc::memset_chk:
+ return optimizeMemSetChk(CI, Builder);
+ case LibFunc::stpcpy_chk:
+ case LibFunc::strcpy_chk:
+ return optimizeStrpCpyChk(CI, Builder, Func);
+ case LibFunc::stpncpy_chk:
+ case LibFunc::strncpy_chk:
+ return optimizeStrpNCpyChk(CI, Builder, Func);
+ default:
+ break;
+ }
+ return nullptr;
+}
+
+FortifiedLibCallSimplifier::
+FortifiedLibCallSimplifier(const DataLayout *DL, const TargetLibraryInfo *TLI,
+ bool OnlyLowerUnknownSize)
+ : DL(DL), TLI(TLI), OnlyLowerUnknownSize(OnlyLowerUnknownSize) {
+}
diff --git a/lib/Transforms/Utils/SymbolRewriter.cpp b/lib/Transforms/Utils/SymbolRewriter.cpp
index aacc945..b343cc4 100644
--- a/lib/Transforms/Utils/SymbolRewriter.cpp
+++ b/lib/Transforms/Utils/SymbolRewriter.cpp
@@ -60,7 +60,7 @@
#define DEBUG_TYPE "symbol-rewriter"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Pass.h"
-#include "llvm/PassManager.h"
+#include "llvm/IR/LegacyPassManager.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MemoryBuffer.h"
@@ -79,6 +79,19 @@ static cl::list<std::string> RewriteMapFiles("rewrite-map-file",
namespace llvm {
namespace SymbolRewriter {
+void rewriteComdat(Module &M, GlobalObject *GO, const std::string &Source,
+ const std::string &Target) {
+ if (Comdat *CD = GO->getComdat()) {
+ auto &Comdats = M.getComdatSymbolTable();
+
+ Comdat *C = M.getOrInsertComdat(Target);
+ C->setSelectionKind(CD->getSelectionKind());
+ GO->setComdat(C);
+
+ Comdats.erase(Comdats.find(Source));
+ }
+}
+
template <RewriteDescriptor::Type DT, typename ValueType,
ValueType *(llvm::Module::*Get)(StringRef) const>
class ExplicitRewriteDescriptor : public RewriteDescriptor {
@@ -102,10 +115,14 @@ template <RewriteDescriptor::Type DT, typename ValueType,
bool ExplicitRewriteDescriptor<DT, ValueType, Get>::performOnModule(Module &M) {
bool Changed = false;
if (ValueType *S = (M.*Get)(Source)) {
+ if (GlobalObject *GO = dyn_cast<GlobalObject>(S))
+ rewriteComdat(M, GO, Source, Target);
+
if (Value *T = (M.*Get)(Target))
S->setValueName(T->getValueName());
else
S->setName(Target);
+
Changed = true;
}
return Changed;
@@ -113,7 +130,8 @@ bool ExplicitRewriteDescriptor<DT, ValueType, Get>::performOnModule(Module &M) {
template <RewriteDescriptor::Type DT, typename ValueType,
ValueType *(llvm::Module::*Get)(StringRef) const,
- iterator_range<typename iplist<ValueType>::iterator> (llvm::Module::*Iterator)()>
+ iterator_range<typename iplist<ValueType>::iterator>
+ (llvm::Module::*Iterator)()>
class PatternRewriteDescriptor : public RewriteDescriptor {
public:
const std::string Pattern;
@@ -131,7 +149,8 @@ public:
template <RewriteDescriptor::Type DT, typename ValueType,
ValueType *(llvm::Module::*Get)(StringRef) const,
- iterator_range<typename iplist<ValueType>::iterator> (llvm::Module::*Iterator)()>
+ iterator_range<typename iplist<ValueType>::iterator>
+ (llvm::Module::*Iterator)()>
bool PatternRewriteDescriptor<DT, ValueType, Get, Iterator>::
performOnModule(Module &M) {
bool Changed = false;
@@ -143,6 +162,12 @@ performOnModule(Module &M) {
report_fatal_error("unable to transforn " + C.getName() + " in " +
M.getModuleIdentifier() + ": " + Error);
+ if (C.getName() == Name)
+ continue;
+
+ if (GlobalObject *GO = dyn_cast<GlobalObject>(&C))
+ rewriteComdat(M, GO, C.getName(), Name);
+
if (Value *V = (M.*Get)(Name))
C.setValueName(V->getValueName());
else
@@ -492,7 +517,7 @@ RewriteSymbols::RewriteSymbols() : ModulePass(ID) {
RewriteSymbols::RewriteSymbols(SymbolRewriter::RewriteDescriptorList &DL)
: ModulePass(ID) {
- std::swap(Descriptors, DL);
+ Descriptors.splice(Descriptors.begin(), DL);
}
bool RewriteSymbols::runOnModule(Module &M) {
diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
index 0c2fc0a..7e00a80 100644
--- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
+++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -35,7 +35,6 @@ void UnifyFunctionExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{
// We preserve the non-critical-edgeness property
AU.addPreservedID(BreakCriticalEdgesID);
// This is a cluster of orthogonal Transforms
- AU.addPreserved("mem2reg");
AU.addPreservedID(LowerSwitchID);
}
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index a2f69d1..49c0902 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -40,7 +40,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
// Global values do not need to be seeded into the VM if they
// are using the identity mapping.
- if (isa<GlobalValue>(V) || isa<MDString>(V))
+ if (isa<GlobalValue>(V))
return VM[V] = const_cast<Value*>(V);
if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) {
@@ -56,57 +56,24 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
return VM[V] = const_cast<Value*>(V);
}
-
- if (const MDNode *MD = dyn_cast<MDNode>(V)) {
+ if (const auto *MDV = dyn_cast<MetadataAsValue>(V)) {
+ const Metadata *MD = MDV->getMetadata();
// If this is a module-level metadata and we know that nothing at the module
// level is changing, then use an identity mapping.
- if (!MD->isFunctionLocal() && (Flags & RF_NoModuleLevelChanges))
- return VM[V] = const_cast<Value*>(V);
-
- // Create a dummy node in case we have a metadata cycle.
- MDNode *Dummy = MDNode::getTemporary(V->getContext(), None);
- VM[V] = Dummy;
-
- // Check all operands to see if any need to be remapped.
- for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) {
- Value *OP = MD->getOperand(i);
- if (!OP) continue;
- Value *Mapped_OP = MapValue(OP, VM, Flags, TypeMapper, Materializer);
- // Use identity map if Mapped_Op is null and we can ignore missing
- // entries.
- if (Mapped_OP == OP ||
- (Mapped_OP == nullptr && (Flags & RF_IgnoreMissingEntries)))
- continue;
-
- // Ok, at least one operand needs remapping.
- SmallVector<Value*, 4> Elts;
- Elts.reserve(MD->getNumOperands());
- for (i = 0; i != e; ++i) {
- Value *Op = MD->getOperand(i);
- if (!Op)
- Elts.push_back(nullptr);
- else {
- Value *Mapped_Op = MapValue(Op, VM, Flags, TypeMapper, Materializer);
- // Use identity map if Mapped_Op is null and we can ignore missing
- // entries.
- if (Mapped_Op == nullptr && (Flags & RF_IgnoreMissingEntries))
- Mapped_Op = Op;
- Elts.push_back(Mapped_Op);
- }
- }
- MDNode *NewMD = MDNode::get(V->getContext(), Elts);
- Dummy->replaceAllUsesWith(NewMD);
- VM[V] = NewMD;
- MDNode::deleteTemporary(Dummy);
- return NewMD;
- }
+ if (!isa<LocalAsMetadata>(MD) && (Flags & RF_NoModuleLevelChanges))
+ return VM[V] = const_cast<Value *>(V);
- VM[V] = const_cast<Value*>(V);
- MDNode::deleteTemporary(Dummy);
+ auto *MappedMD = MapMetadata(MD, VM, Flags, TypeMapper, Materializer);
+ if (MD == MappedMD || (!MappedMD && (Flags & RF_IgnoreMissingEntries)))
+ return VM[V] = const_cast<Value *>(V);
- // No operands needed remapping. Use an identity mapping.
- return const_cast<Value*>(V);
+ // FIXME: This assert crashes during bootstrap, but I think it should be
+ // correct. For now, just match behaviour from before the metadata/value
+ // split.
+ //
+ // assert(MappedMD && "Referenced metadata value not in value map");
+ return VM[V] = MetadataAsValue::get(V->getContext(), MappedMD);
}
// Okay, this either must be a constant (which may or may not be mappable) or
@@ -177,6 +144,198 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
return VM[V] = ConstantPointerNull::get(cast<PointerType>(NewTy));
}
+static Metadata *mapToMetadata(ValueToValueMapTy &VM, const Metadata *Key,
+ Metadata *Val) {
+ VM.MD()[Key].reset(Val);
+ return Val;
+}
+
+static Metadata *mapToSelf(ValueToValueMapTy &VM, const Metadata *MD) {
+ return mapToMetadata(VM, MD, const_cast<Metadata *>(MD));
+}
+
+static Metadata *MapMetadataImpl(const Metadata *MD,
+ SmallVectorImpl<MDNode *> &Cycles,
+ ValueToValueMapTy &VM, RemapFlags Flags,
+ ValueMapTypeRemapper *TypeMapper,
+ ValueMaterializer *Materializer);
+
+static Metadata *mapMetadataOp(Metadata *Op, SmallVectorImpl<MDNode *> &Cycles,
+ ValueToValueMapTy &VM, RemapFlags Flags,
+ ValueMapTypeRemapper *TypeMapper,
+ ValueMaterializer *Materializer) {
+ if (!Op)
+ return nullptr;
+ if (Metadata *MappedOp =
+ MapMetadataImpl(Op, Cycles, VM, Flags, TypeMapper, Materializer))
+ return MappedOp;
+ // Use identity map if MappedOp is null and we can ignore missing entries.
+ if (Flags & RF_IgnoreMissingEntries)
+ return Op;
+
+ // FIXME: This assert crashes during bootstrap, but I think it should be
+ // correct. For now, just match behaviour from before the metadata/value
+ // split.
+ //
+ // llvm_unreachable("Referenced metadata not in value map!");
+ return nullptr;
+}
+
+/// \brief Remap nodes.
+///
+/// Insert \c NewNode in the value map, and then remap \c OldNode's operands.
+/// Assumes that \c NewNode is already a clone of \c OldNode.
+///
+/// \pre \c NewNode is a clone of \c OldNode.
+static bool remap(const MDNode *OldNode, MDNode *NewNode,
+ SmallVectorImpl<MDNode *> &Cycles, ValueToValueMapTy &VM,
+ RemapFlags Flags, ValueMapTypeRemapper *TypeMapper,
+ ValueMaterializer *Materializer) {
+ assert(OldNode->getNumOperands() == NewNode->getNumOperands() &&
+ "Expected nodes to match");
+ assert(OldNode->isResolved() && "Expected resolved node");
+ assert(!NewNode->isUniqued() && "Expected non-uniqued node");
+
+ // Map the node upfront so it's available for cyclic references.
+ mapToMetadata(VM, OldNode, NewNode);
+ bool AnyChanged = false;
+ for (unsigned I = 0, E = OldNode->getNumOperands(); I != E; ++I) {
+ Metadata *Old = OldNode->getOperand(I);
+ assert(NewNode->getOperand(I) == Old &&
+ "Expected old operands to already be in place");
+
+ Metadata *New = mapMetadataOp(OldNode->getOperand(I), Cycles, VM, Flags,
+ TypeMapper, Materializer);
+ if (Old != New) {
+ AnyChanged = true;
+ NewNode->replaceOperandWith(I, New);
+ }
+ }
+
+ return AnyChanged;
+}
+
+/// \brief Map a distinct MDNode.
+///
+/// Distinct nodes are not uniqued, so they must always recreated.
+static Metadata *mapDistinctNode(const MDNode *Node,
+ SmallVectorImpl<MDNode *> &Cycles,
+ ValueToValueMapTy &VM, RemapFlags Flags,
+ ValueMapTypeRemapper *TypeMapper,
+ ValueMaterializer *Materializer) {
+ assert(Node->isDistinct() && "Expected distinct node");
+
+ MDNode *NewMD = MDNode::replaceWithDistinct(Node->clone());
+ remap(Node, NewMD, Cycles, VM, Flags, TypeMapper, Materializer);
+
+ // Track any cycles beneath this node.
+ for (Metadata *Op : NewMD->operands())
+ if (auto *Node = dyn_cast_or_null<MDNode>(Op))
+ if (!Node->isResolved())
+ Cycles.push_back(Node);
+
+ return NewMD;
+}
+
+/// \brief Map a uniqued MDNode.
+///
+/// Uniqued nodes may not need to be recreated (they may map to themselves).
+static Metadata *mapUniquedNode(const MDNode *Node,
+ SmallVectorImpl<MDNode *> &Cycles,
+ ValueToValueMapTy &VM, RemapFlags Flags,
+ ValueMapTypeRemapper *TypeMapper,
+ ValueMaterializer *Materializer) {
+ assert(Node->isUniqued() && "Expected uniqued node");
+
+ // Create a temporary node upfront in case we have a metadata cycle.
+ auto ClonedMD = Node->clone();
+ if (!remap(Node, ClonedMD.get(), Cycles, VM, Flags, TypeMapper, Materializer))
+ // No operands changed, so use the identity mapping.
+ return mapToSelf(VM, Node);
+
+ // At least one operand has changed, so uniquify the cloned node.
+ return mapToMetadata(VM, Node,
+ MDNode::replaceWithUniqued(std::move(ClonedMD)));
+}
+
+static Metadata *MapMetadataImpl(const Metadata *MD,
+ SmallVectorImpl<MDNode *> &Cycles,
+ ValueToValueMapTy &VM, RemapFlags Flags,
+ ValueMapTypeRemapper *TypeMapper,
+ ValueMaterializer *Materializer) {
+ // If the value already exists in the map, use it.
+ if (Metadata *NewMD = VM.MD().lookup(MD).get())
+ return NewMD;
+
+ if (isa<MDString>(MD))
+ return mapToSelf(VM, MD);
+
+ if (isa<ConstantAsMetadata>(MD))
+ if ((Flags & RF_NoModuleLevelChanges))
+ return mapToSelf(VM, MD);
+
+ if (const auto *VMD = dyn_cast<ValueAsMetadata>(MD)) {
+ Value *MappedV =
+ MapValue(VMD->getValue(), VM, Flags, TypeMapper, Materializer);
+ if (VMD->getValue() == MappedV ||
+ (!MappedV && (Flags & RF_IgnoreMissingEntries)))
+ return mapToSelf(VM, MD);
+
+ // FIXME: This assert crashes during bootstrap, but I think it should be
+ // correct. For now, just match behaviour from before the metadata/value
+ // split.
+ //
+ // assert(MappedV && "Referenced metadata not in value map!");
+ if (MappedV)
+ return mapToMetadata(VM, MD, ValueAsMetadata::get(MappedV));
+ return nullptr;
+ }
+
+ const MDNode *Node = cast<MDNode>(MD);
+ assert(Node->isResolved() && "Unexpected unresolved node");
+
+ // If this is a module-level metadata and we know that nothing at the
+ // module level is changing, then use an identity mapping.
+ if (Flags & RF_NoModuleLevelChanges)
+ return mapToSelf(VM, MD);
+
+ if (Node->isDistinct())
+ return mapDistinctNode(Node, Cycles, VM, Flags, TypeMapper, Materializer);
+
+ return mapUniquedNode(Node, Cycles, VM, Flags, TypeMapper, Materializer);
+}
+
+Metadata *llvm::MapMetadata(const Metadata *MD, ValueToValueMapTy &VM,
+ RemapFlags Flags, ValueMapTypeRemapper *TypeMapper,
+ ValueMaterializer *Materializer) {
+ SmallVector<MDNode *, 8> Cycles;
+ Metadata *NewMD =
+ MapMetadataImpl(MD, Cycles, VM, Flags, TypeMapper, Materializer);
+
+ // Resolve cycles underneath MD.
+ if (NewMD && NewMD != MD) {
+ if (auto *N = dyn_cast<MDNode>(NewMD))
+ if (!N->isResolved())
+ N->resolveCycles();
+
+ for (MDNode *N : Cycles)
+ if (!N->isResolved())
+ N->resolveCycles();
+ } else {
+ // Shouldn't get unresolved cycles if nothing was remapped.
+ assert(Cycles.empty() && "Expected no unresolved cycles");
+ }
+
+ return NewMD;
+}
+
+MDNode *llvm::MapMetadata(const MDNode *MD, ValueToValueMapTy &VM,
+ RemapFlags Flags, ValueMapTypeRemapper *TypeMapper,
+ ValueMaterializer *Materializer) {
+ return cast<MDNode>(MapMetadata(static_cast<const Metadata *>(MD), VM, Flags,
+ TypeMapper, Materializer));
+}
+
/// RemapInstruction - Convert the instruction operands from referencing the
/// current values into those specified by VMap.
///
@@ -215,7 +374,7 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
ME = MDs.end();
MI != ME; ++MI) {
MDNode *Old = MI->second;
- MDNode *New = MapValue(Old, VMap, Flags, TypeMapper, Materializer);
+ MDNode *New = MapMetadata(Old, VMap, Flags, TypeMapper, Materializer);
if (New != Old)
I->setMetadata(MI->first, New);
}