aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Transforms/Utils
diff options
context:
space:
mode:
authorStephen Hines <srhines@google.com>2014-04-23 16:57:46 -0700
committerStephen Hines <srhines@google.com>2014-04-24 15:53:16 -0700
commit36b56886974eae4f9c5ebc96befd3e7bfe5de338 (patch)
treee6cfb69fbbd937f450eeb83bfb83b9da3b01275a /lib/Transforms/Utils
parent69a8640022b04415ae9fac62f8ab090601d8f889 (diff)
downloadexternal_llvm-36b56886974eae4f9c5ebc96befd3e7bfe5de338.zip
external_llvm-36b56886974eae4f9c5ebc96befd3e7bfe5de338.tar.gz
external_llvm-36b56886974eae4f9c5ebc96befd3e7bfe5de338.tar.bz2
Update to LLVM 3.5a.
Change-Id: Ifadecab779f128e62e430c2b4f6ddd84953ed617
Diffstat (limited to 'lib/Transforms/Utils')
-rw-r--r--lib/Transforms/Utils/ASanStackFrameLayout.cpp114
-rw-r--r--lib/Transforms/Utils/AddDiscriminators.cpp217
-rw-r--r--lib/Transforms/Utils/Android.mk4
-rw-r--r--lib/Transforms/Utils/BasicBlockUtils.cpp89
-rw-r--r--lib/Transforms/Utils/BreakCriticalEdges.cpp90
-rw-r--r--lib/Transforms/Utils/BuildLibCalls.cpp46
-rw-r--r--lib/Transforms/Utils/CMakeLists.txt2
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp100
-rw-r--r--lib/Transforms/Utils/CodeExtractor.cpp28
-rw-r--r--lib/Transforms/Utils/DemoteRegToStack.cpp4
-rw-r--r--lib/Transforms/Utils/FlattenCFG.cpp2
-rw-r--r--lib/Transforms/Utils/GlobalStatus.cpp20
-rw-r--r--lib/Transforms/Utils/InlineFunction.cpp84
-rw-r--r--lib/Transforms/Utils/InstructionNamer.cpp4
-rw-r--r--lib/Transforms/Utils/IntegerDivision.cpp244
-rw-r--r--lib/Transforms/Utils/LCSSA.cpp367
-rw-r--r--lib/Transforms/Utils/Local.cpp67
-rw-r--r--lib/Transforms/Utils/LoopSimplify.cpp800
-rw-r--r--lib/Transforms/Utils/LoopUnroll.cpp38
-rw-r--r--lib/Transforms/Utils/LowerExpectIntrinsic.cpp25
-rw-r--r--lib/Transforms/Utils/LowerInvoke.cpp518
-rw-r--r--lib/Transforms/Utils/LowerSwitch.cpp9
-rw-r--r--lib/Transforms/Utils/Mem2Reg.cpp12
-rw-r--r--lib/Transforms/Utils/MetaRenamer.cpp4
-rw-r--r--lib/Transforms/Utils/PromoteMemoryToRegister.cpp42
-rw-r--r--lib/Transforms/Utils/SSAUpdater.cpp4
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp310
-rw-r--r--lib/Transforms/Utils/SimplifyIndVar.cpp94
-rw-r--r--lib/Transforms/Utils/SimplifyInstructions.cpp20
-rw-r--r--lib/Transforms/Utils/SimplifyLibCalls.cpp446
-rw-r--r--lib/Transforms/Utils/SpecialCaseList.cpp9
-rw-r--r--lib/Transforms/Utils/Utils.cpp3
32 files changed, 2127 insertions, 1689 deletions
diff --git a/lib/Transforms/Utils/ASanStackFrameLayout.cpp b/lib/Transforms/Utils/ASanStackFrameLayout.cpp
new file mode 100644
index 0000000..cce016a
--- /dev/null
+++ b/lib/Transforms/Utils/ASanStackFrameLayout.cpp
@@ -0,0 +1,114 @@
+//===-- ASanStackFrameLayout.cpp - helper for AddressSanitizer ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Definition of ComputeASanStackFrameLayout (see ASanStackFrameLayout.h).
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Utils/ASanStackFrameLayout.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+
+namespace llvm {
+
+// We sort the stack variables by alignment (largest first) to minimize
+// unnecessary large gaps due to alignment.
+// It is tempting to also sort variables by size so that larger variables
+// have larger redzones at both ends. But reordering will make report analysis
+// harder, especially when temporary unnamed variables are present.
+// So, until we can provide more information (type, line number, etc)
+// for the stack variables we avoid reordering them too much.
+static inline bool CompareVars(const ASanStackVariableDescription &a,
+ const ASanStackVariableDescription &b) {
+ return a.Alignment > b.Alignment;
+}
+
+// We also force minimal alignment for all vars to kMinAlignment so that vars
+// with e.g. alignment 1 and alignment 16 do not get reordered by CompareVars.
+static const size_t kMinAlignment = 16;
+
+static size_t RoundUpTo(size_t X, size_t RoundTo) {
+ assert((RoundTo & (RoundTo - 1)) == 0);
+ return (X + RoundTo - 1) & ~(RoundTo - 1);
+}
+
+// The larger the variable Size the larger is the redzone.
+// The resulting frame size is a multiple of Alignment.
+static size_t VarAndRedzoneSize(size_t Size, size_t Alignment) {
+ size_t Res = 0;
+ if (Size <= 4) Res = 16;
+ else if (Size <= 16) Res = 32;
+ else if (Size <= 128) Res = Size + 32;
+ else if (Size <= 512) Res = Size + 64;
+ else if (Size <= 4096) Res = Size + 128;
+ else Res = Size + 256;
+ return RoundUpTo(Res, Alignment);
+}
+
+void
+ComputeASanStackFrameLayout(SmallVectorImpl<ASanStackVariableDescription> &Vars,
+ size_t Granularity, size_t MinHeaderSize,
+ ASanStackFrameLayout *Layout) {
+ assert(Granularity >= 8 && Granularity <= 64 &&
+ (Granularity & (Granularity - 1)) == 0);
+ assert(MinHeaderSize >= 16 && (MinHeaderSize & (MinHeaderSize - 1)) == 0 &&
+ MinHeaderSize >= Granularity);
+ size_t NumVars = Vars.size();
+ assert(NumVars > 0);
+ for (size_t i = 0; i < NumVars; i++)
+ Vars[i].Alignment = std::max(Vars[i].Alignment, kMinAlignment);
+
+ std::stable_sort(Vars.begin(), Vars.end(), CompareVars);
+ SmallString<2048> StackDescriptionStorage;
+ raw_svector_ostream StackDescription(StackDescriptionStorage);
+ StackDescription << NumVars;
+ Layout->FrameAlignment = std::max(Granularity, Vars[0].Alignment);
+ SmallVector<uint8_t, 64> &SB(Layout->ShadowBytes);
+ SB.clear();
+ size_t Offset = std::max(std::max(MinHeaderSize, Granularity),
+ Vars[0].Alignment);
+ assert((Offset % Granularity) == 0);
+ SB.insert(SB.end(), Offset / Granularity, kAsanStackLeftRedzoneMagic);
+ for (size_t i = 0; i < NumVars; i++) {
+ bool IsLast = i == NumVars - 1;
+ size_t Alignment = std::max(Granularity, Vars[i].Alignment);
+ (void)Alignment; // Used only in asserts.
+ size_t Size = Vars[i].Size;
+ const char *Name = Vars[i].Name;
+ assert((Alignment & (Alignment - 1)) == 0);
+ assert(Layout->FrameAlignment >= Alignment);
+ assert((Offset % Alignment) == 0);
+ assert(Size > 0);
+ StackDescription << " " << Offset << " " << Size << " " << strlen(Name)
+ << " " << Name;
+ size_t NextAlignment = IsLast ? Granularity
+ : std::max(Granularity, Vars[i + 1].Alignment);
+ size_t SizeWithRedzone = VarAndRedzoneSize(Vars[i].Size, NextAlignment);
+ SB.insert(SB.end(), Size / Granularity, 0);
+ if (Size % Granularity)
+ SB.insert(SB.end(), Size % Granularity);
+ SB.insert(SB.end(), (SizeWithRedzone - Size) / Granularity,
+ IsLast ? kAsanStackRightRedzoneMagic
+ : kAsanStackMidRedzoneMagic);
+ Vars[i].Offset = Offset;
+ Offset += SizeWithRedzone;
+ }
+ if (Offset % MinHeaderSize) {
+ size_t ExtraRedzone = MinHeaderSize - (Offset % MinHeaderSize);
+ SB.insert(SB.end(), ExtraRedzone / Granularity,
+ kAsanStackRightRedzoneMagic);
+ Offset += ExtraRedzone;
+ }
+ Layout->DescriptionString = StackDescription.str();
+ Layout->FrameSize = Offset;
+ assert((Layout->FrameSize % MinHeaderSize) == 0);
+ assert(Layout->FrameSize / Granularity == Layout->ShadowBytes.size());
+}
+
+} // llvm namespace
diff --git a/lib/Transforms/Utils/AddDiscriminators.cpp b/lib/Transforms/Utils/AddDiscriminators.cpp
new file mode 100644
index 0000000..f42635e
--- /dev/null
+++ b/lib/Transforms/Utils/AddDiscriminators.cpp
@@ -0,0 +1,217 @@
+//===- AddDiscriminators.cpp - Insert DWARF path discriminators -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file adds DWARF discriminators to the IR. Path discriminators are
+// used to decide what CFG path was taken inside sub-graphs whose instructions
+// share the same line and column number information.
+//
+// The main user of this is the sample profiler. Instruction samples are
+// mapped to line number information. Since a single line may be spread
+// out over several basic blocks, discriminators add more precise location
+// for the samples.
+//
+// For example,
+//
+// 1 #define ASSERT(P)
+// 2 if (!(P))
+// 3 abort()
+// ...
+// 100 while (true) {
+// 101 ASSERT (sum < 0);
+// 102 ...
+// 130 }
+//
+// when converted to IR, this snippet looks something like:
+//
+// while.body: ; preds = %entry, %if.end
+// %0 = load i32* %sum, align 4, !dbg !15
+// %cmp = icmp slt i32 %0, 0, !dbg !15
+// br i1 %cmp, label %if.end, label %if.then, !dbg !15
+//
+// if.then: ; preds = %while.body
+// call void @abort(), !dbg !15
+// br label %if.end, !dbg !15
+//
+// Notice that all the instructions in blocks 'while.body' and 'if.then'
+// have exactly the same debug information. When this program is sampled
+// at runtime, the profiler will assume that all these instructions are
+// equally frequent. This, in turn, will consider the edge while.body->if.then
+// to be frequently taken (which is incorrect).
+//
+// By adding a discriminator value to the instructions in block 'if.then',
+// we can distinguish instructions at line 101 with discriminator 0 from
+// the instructions at line 101 with discriminator 1.
+//
+// For more details about DWARF discriminators, please visit
+// http://wiki.dwarfstd.org/index.php?title=Path_Discriminators
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "add-discriminators"
+
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+ struct AddDiscriminators : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ AddDiscriminators() : FunctionPass(ID) {
+ initializeAddDiscriminatorsPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override;
+ };
+}
+
+char AddDiscriminators::ID = 0;
+INITIALIZE_PASS_BEGIN(AddDiscriminators, "add-discriminators",
+ "Add DWARF path discriminators", false, false)
+INITIALIZE_PASS_END(AddDiscriminators, "add-discriminators",
+ "Add DWARF path discriminators", false, false)
+
+// Command line option to disable discriminator generation even in the
+// presence of debug information. This is only needed when debugging
+// debug info generation issues.
+static cl::opt<bool>
+NoDiscriminators("no-discriminators", cl::init(false),
+ cl::desc("Disable generation of discriminator information."));
+
+FunctionPass *llvm::createAddDiscriminatorsPass() {
+ return new AddDiscriminators();
+}
+
+static bool hasDebugInfo(const Function &F) {
+ NamedMDNode *CUNodes = F.getParent()->getNamedMetadata("llvm.dbg.cu");
+ return CUNodes != 0;
+}
+
+/// \brief Assign DWARF discriminators.
+///
+/// To assign discriminators, we examine the boundaries of every
+/// basic block and its successors. Suppose there is a basic block B1
+/// with successor B2. The last instruction I1 in B1 and the first
+/// instruction I2 in B2 are located at the same file and line number.
+/// This situation is illustrated in the following code snippet:
+///
+/// if (i < 10) x = i;
+///
+/// entry:
+/// br i1 %cmp, label %if.then, label %if.end, !dbg !10
+/// if.then:
+/// %1 = load i32* %i.addr, align 4, !dbg !10
+/// store i32 %1, i32* %x, align 4, !dbg !10
+/// br label %if.end, !dbg !10
+/// if.end:
+/// ret void, !dbg !12
+///
+/// Notice how the branch instruction in block 'entry' and all the
+/// instructions in block 'if.then' have the exact same debug location
+/// information (!dbg !10).
+///
+/// To distinguish instructions in block 'entry' from instructions in
+/// block 'if.then', we generate a new lexical block for all the
+/// instruction in block 'if.then' that share the same file and line
+/// location with the last instruction of block 'entry'.
+///
+/// This new lexical block will have the same location information as
+/// the previous one, but with a new DWARF discriminator value.
+///
+/// One of the main uses of this discriminator value is in runtime
+/// sample profilers. It allows the profiler to distinguish instructions
+/// at location !dbg !10 that execute on different basic blocks. This is
+/// important because while the predicate 'if (x < 10)' may have been
+/// executed millions of times, the assignment 'x = i' may have only
+/// executed a handful of times (meaning that the entry->if.then edge is
+/// seldom taken).
+///
+/// If we did not have discriminator information, the profiler would
+/// assign the same weight to both blocks 'entry' and 'if.then', which
+/// in turn will make it conclude that the entry->if.then edge is very
+/// hot.
+///
+/// To decide where to create new discriminator values, this function
+/// traverses the CFG and examines instruction at basic block boundaries.
+/// If the last instruction I1 of a block B1 is at the same file and line
+/// location as instruction I2 of successor B2, then it creates a new
+/// lexical block for I2 and all the instruction in B2 that share the same
+/// file and line location as I2. This new lexical block will have a
+/// different discriminator number than I1.
+bool AddDiscriminators::runOnFunction(Function &F) {
+ // No need to do anything if there is no debug info for this function.
+ // If the function has debug information, but the user has disabled
+ // discriminators, do nothing.
+ if (!hasDebugInfo(F) || NoDiscriminators) return false;
+
+ bool Changed = false;
+ Module *M = F.getParent();
+ LLVMContext &Ctx = M->getContext();
+ DIBuilder Builder(*M);
+
+ // Traverse all the blocks looking for instructions in different
+ // blocks that are at the same file:line location.
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ BasicBlock *B = I;
+ TerminatorInst *Last = B->getTerminator();
+ DebugLoc LastLoc = Last->getDebugLoc();
+ if (LastLoc.isUnknown()) continue;
+ DILocation LastDIL(LastLoc.getAsMDNode(Ctx));
+
+ for (unsigned I = 0; I < Last->getNumSuccessors(); ++I) {
+ BasicBlock *Succ = Last->getSuccessor(I);
+ Instruction *First = Succ->getFirstNonPHIOrDbgOrLifetime();
+ DebugLoc FirstLoc = First->getDebugLoc();
+ if (FirstLoc.isUnknown()) continue;
+ DILocation FirstDIL(FirstLoc.getAsMDNode(Ctx));
+
+ // If the first instruction (First) of Succ is at the same file
+ // location as B's last instruction (Last), add a new
+ // discriminator for First's location and all the instructions
+ // in Succ that share the same location with First.
+ if (FirstDIL.atSameLineAs(LastDIL)) {
+ // Create a new lexical scope and compute a new discriminator
+ // number for it.
+ StringRef Filename = FirstDIL.getFilename();
+ unsigned LineNumber = FirstDIL.getLineNumber();
+ unsigned ColumnNumber = FirstDIL.getColumnNumber();
+ DIScope Scope = FirstDIL.getScope();
+ DIFile File = Builder.createFile(Filename, Scope.getDirectory());
+ unsigned Discriminator = FirstDIL.computeNewDiscriminator(Ctx);
+ DILexicalBlock NewScope = Builder.createLexicalBlock(
+ Scope, File, LineNumber, ColumnNumber, Discriminator);
+ DILocation NewDIL = FirstDIL.copyWithNewScope(Ctx, NewScope);
+ DebugLoc newDebugLoc = DebugLoc::getFromDILocation(NewDIL);
+
+ // Attach this new debug location to First and every
+ // instruction following First that shares the same location.
+ for (BasicBlock::iterator I1(*First), E1 = Succ->end(); I1 != E1;
+ ++I1) {
+ if (I1->getDebugLoc() != FirstLoc) break;
+ I1->setDebugLoc(newDebugLoc);
+ DEBUG(dbgs() << NewDIL.getFilename() << ":" << NewDIL.getLineNumber()
+ << ":" << NewDIL.getColumnNumber() << ":"
+ << NewDIL.getDiscriminator() << *I1 << "\n");
+ }
+ DEBUG(dbgs() << "\n");
+ Changed = true;
+ }
+ }
+ }
+ return Changed;
+}
diff --git a/lib/Transforms/Utils/Android.mk b/lib/Transforms/Utils/Android.mk
index 73bb3bf..ab4d8a8 100644
--- a/lib/Transforms/Utils/Android.mk
+++ b/lib/Transforms/Utils/Android.mk
@@ -1,6 +1,8 @@
LOCAL_PATH:= $(call my-dir)
transforms_utils_SRC_FILES := \
+ AddDiscriminators.cpp \
+ ASanStackFrameLayout.cpp \
BasicBlockUtils.cpp \
BreakCriticalEdges.cpp \
BuildLibCalls.cpp \
@@ -50,6 +52,7 @@ include $(BUILD_HOST_STATIC_LIBRARY)
# For the device
# =====================================================
+ifneq (true,$(DISABLE_LLVM_DEVICE_BUILDS))
include $(CLEAR_VARS)
LOCAL_SRC_FILES := $(transforms_utils_SRC_FILES)
@@ -60,3 +63,4 @@ LOCAL_MODULE_TAGS := optional
include $(LLVM_DEVICE_BUILD_MK)
include $(LLVM_GEN_INTRINSICS_MK)
include $(BUILD_STATIC_LIBRARY)
+endif
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 12de9ee..b3cd5ce 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -15,17 +15,17 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Type.h"
+#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ValueHandle.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
@@ -167,15 +167,17 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
// Finally, erase the old block and update dominator info.
if (P) {
- if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) {
- if (DomTreeNode *DTN = DT->getNode(BB)) {
- DomTreeNode *PredDTN = DT->getNode(PredBB);
+ if (DominatorTreeWrapperPass *DTWP =
+ P->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) {
+ DominatorTree &DT = DTWP->getDomTree();
+ if (DomTreeNode *DTN = DT.getNode(BB)) {
+ DomTreeNode *PredDTN = DT.getNode(PredBB);
SmallVector<DomTreeNode*, 8> Children(DTN->begin(), DTN->end());
for (SmallVectorImpl<DomTreeNode *>::iterator DI = Children.begin(),
DE = Children.end(); DI != DE; ++DI)
- DT->changeImmediateDominator(*DI, PredDTN);
+ DT.changeImmediateDominator(*DI, PredDTN);
- DT->eraseNode(BB);
+ DT.eraseNode(BB);
}
if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>())
@@ -280,18 +282,20 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {
if (Loop *L = LI->getLoopFor(Old))
L->addBasicBlockToLoop(New, LI->getBase());
- if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) {
+ if (DominatorTreeWrapperPass *DTWP =
+ P->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) {
+ DominatorTree &DT = DTWP->getDomTree();
// Old dominates New. New node dominates all other nodes dominated by Old.
- if (DomTreeNode *OldNode = DT->getNode(Old)) {
+ if (DomTreeNode *OldNode = DT.getNode(Old)) {
std::vector<DomTreeNode *> Children;
for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end();
I != E; ++I)
Children.push_back(*I);
- DomTreeNode *NewNode = DT->addNewBlock(New,Old);
+ DomTreeNode *NewNode = DT.addNewBlock(New, Old);
for (std::vector<DomTreeNode *>::iterator I = Children.begin(),
E = Children.end(); I != E; ++I)
- DT->changeImmediateDominator(*I, NewNode);
+ DT.changeImmediateDominator(*I, NewNode);
}
}
@@ -336,9 +340,9 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
}
// Update dominator tree if available.
- DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
- if (DT)
- DT->splitBlock(NewBB);
+ if (DominatorTreeWrapperPass *DTWP =
+ P->getAnalysisIfAvailable<DominatorTreeWrapperPass>())
+ DTWP->getDomTree().splitBlock(NewBB);
if (!L) return;
@@ -630,28 +634,29 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
}
/// SplitBlockAndInsertIfThen - Split the containing block at the
-/// specified instruction - everything before and including Cmp stays
-/// in the old basic block, and everything after Cmp is moved to a
+/// specified instruction - everything before and including SplitBefore stays
+/// in the old basic block, and everything after SplitBefore is moved to a
/// new block. The two blocks are connected by a conditional branch
/// (with value of Cmp being the condition).
/// Before:
/// Head
-/// Cmp
+/// SplitBefore
/// Tail
/// After:
/// Head
-/// Cmp
-/// if (Cmp)
+/// if (Cond)
/// ThenBlock
+/// SplitBefore
/// Tail
///
/// If Unreachable is true, then ThenBlock ends with
/// UnreachableInst, otherwise it branches to Tail.
/// Returns the NewBasicBlock's terminator.
-TerminatorInst *llvm::SplitBlockAndInsertIfThen(Instruction *Cmp,
- bool Unreachable, MDNode *BranchWeights) {
- Instruction *SplitBefore = Cmp->getNextNode();
+TerminatorInst *llvm::SplitBlockAndInsertIfThen(Value *Cond,
+ Instruction *SplitBefore,
+ bool Unreachable,
+ MDNode *BranchWeights) {
BasicBlock *Head = SplitBefore->getParent();
BasicBlock *Tail = Head->splitBasicBlock(SplitBefore);
TerminatorInst *HeadOldTerm = Head->getTerminator();
@@ -662,13 +667,51 @@ TerminatorInst *llvm::SplitBlockAndInsertIfThen(Instruction *Cmp,
CheckTerm = new UnreachableInst(C, ThenBlock);
else
CheckTerm = BranchInst::Create(Tail, ThenBlock);
+ CheckTerm->setDebugLoc(SplitBefore->getDebugLoc());
BranchInst *HeadNewTerm =
- BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cmp);
+ BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cond);
+ HeadNewTerm->setDebugLoc(SplitBefore->getDebugLoc());
HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
ReplaceInstWithInst(HeadOldTerm, HeadNewTerm);
return CheckTerm;
}
+/// SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen,
+/// but also creates the ElseBlock.
+/// Before:
+/// Head
+/// SplitBefore
+/// Tail
+/// After:
+/// Head
+/// if (Cond)
+/// ThenBlock
+/// else
+/// ElseBlock
+/// SplitBefore
+/// Tail
+void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore,
+ TerminatorInst **ThenTerm,
+ TerminatorInst **ElseTerm,
+ MDNode *BranchWeights) {
+ BasicBlock *Head = SplitBefore->getParent();
+ BasicBlock *Tail = Head->splitBasicBlock(SplitBefore);
+ TerminatorInst *HeadOldTerm = Head->getTerminator();
+ LLVMContext &C = Head->getContext();
+ BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
+ BasicBlock *ElseBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
+ *ThenTerm = BranchInst::Create(Tail, ThenBlock);
+ (*ThenTerm)->setDebugLoc(SplitBefore->getDebugLoc());
+ *ElseTerm = BranchInst::Create(Tail, ElseBlock);
+ (*ElseTerm)->setDebugLoc(SplitBefore->getDebugLoc());
+ BranchInst *HeadNewTerm =
+ BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/ElseBlock, Cond);
+ HeadNewTerm->setDebugLoc(SplitBefore->getDebugLoc());
+ HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
+ ReplaceInstWithInst(HeadOldTerm, HeadNewTerm);
+}
+
+
/// GetIfCondition - Given a basic block (BB) with two predecessors,
/// check to see if the merge at this block is due
/// to an "if condition". If so, return the boolean condition that determines
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 0e7f7f7..76ebb9f 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -20,12 +20,12 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Type.h"
-#include "llvm/Support/CFG.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
using namespace llvm;
@@ -39,10 +39,10 @@ namespace {
initializeBreakCriticalEdgesPass(*PassRegistry::getPassRegistry());
}
- virtual bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addPreserved<DominatorTree>();
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<LoopInfo>();
// No loop canonicalization guarantees are broken by this pass.
@@ -209,7 +209,9 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
// If we don't have a pass object, we can't update anything...
if (P == 0) return NewBB;
- DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
+ DominatorTreeWrapperPass *DTWP =
+ P->getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ DominatorTree *DT = DTWP ? &DTWP->getDomTree() : 0;
LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>();
// If we have nothing to update, just return.
@@ -297,9 +299,8 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
P->addBasicBlockToLoop(NewBB, LI->getBase());
}
}
- // If TIBB is in a loop and DestBB is outside of that loop, split the
- // other exit blocks of the loop that also have predecessors outside
- // the loop, to maintain a LoopSimplify guarantee.
+ // If TIBB is in a loop and DestBB is outside of that loop, we may need
+ // to update LoopSimplify form and LCSSA form.
if (!TIL->contains(DestBB) &&
P->mustPreserveAnalysisID(LoopSimplifyID)) {
assert(!TIL->contains(NewBB) &&
@@ -309,50 +310,35 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
if (P->mustPreserveAnalysisID(LCSSAID))
createPHIsForSplitLoopExit(TIBB, NewBB, DestBB);
- // For each unique exit block...
- // FIXME: This code is functionally equivalent to the corresponding
- // loop in LoopSimplify.
- SmallVector<BasicBlock *, 4> ExitBlocks;
- TIL->getExitBlocks(ExitBlocks);
- for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
- // Collect all the preds that are inside the loop, and note
- // whether there are any preds outside the loop.
- SmallVector<BasicBlock *, 4> Preds;
- bool HasPredOutsideOfLoop = false;
- BasicBlock *Exit = ExitBlocks[i];
- for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit);
- I != E; ++I) {
- BasicBlock *P = *I;
- if (TIL->contains(P)) {
- if (isa<IndirectBrInst>(P->getTerminator())) {
- Preds.clear();
- break;
- }
- Preds.push_back(P);
- } else {
- HasPredOutsideOfLoop = true;
- }
- }
- // If there are any preds not in the loop, we'll need to split
- // the edges. The Preds.empty() check is needed because a block
- // may appear multiple times in the list. We can't use
- // getUniqueExitBlocks above because that depends on LoopSimplify
- // form, which we're in the process of restoring!
- if (!Preds.empty() && HasPredOutsideOfLoop) {
- if (!Exit->isLandingPad()) {
- BasicBlock *NewExitBB =
- SplitBlockPredecessors(Exit, Preds, "split", P);
- if (P->mustPreserveAnalysisID(LCSSAID))
- createPHIsForSplitLoopExit(Preds, NewExitBB, Exit);
- } else if (SplitLandingPads) {
- SmallVector<BasicBlock*, 8> NewBBs;
- SplitLandingPadPredecessors(Exit, Preds,
- ".split1", ".split2",
- P, NewBBs);
- if (P->mustPreserveAnalysisID(LCSSAID))
- createPHIsForSplitLoopExit(Preds, NewBBs[0], Exit);
- }
+ // The only that we can break LoopSimplify form by splitting a critical
+ // edge is if after the split there exists some edge from TIL to DestBB
+ // *and* the only edge into DestBB from outside of TIL is that of
+ // NewBB. If the first isn't true, then LoopSimplify still holds, NewBB
+ // is the new exit block and it has no non-loop predecessors. If the
+ // second isn't true, then DestBB was not in LoopSimplify form prior to
+ // the split as it had a non-loop predecessor. In both of these cases,
+ // the predecessor must be directly in TIL, not in a subloop, or again
+ // LoopSimplify doesn't hold.
+ SmallVector<BasicBlock *, 4> LoopPreds;
+ for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB); I != E;
+ ++I) {
+ BasicBlock *P = *I;
+ if (P == NewBB)
+ continue; // The new block is known.
+ if (LI->getLoopFor(P) != TIL) {
+ // No need to re-simplify, it wasn't to start with.
+ LoopPreds.clear();
+ break;
}
+ LoopPreds.push_back(P);
+ }
+ if (!LoopPreds.empty()) {
+ assert(!DestBB->isLandingPad() &&
+ "We don't split edges to landing pads!");
+ BasicBlock *NewExitBB =
+ SplitBlockPredecessors(DestBB, LoopPreds, "split", P);
+ if (P->mustPreserveAnalysisID(LCSSAID))
+ createPHIsForSplitLoopExit(LoopPreds, NewExitBB, DestBB);
}
}
// LCSSA form was updated above for the case where LoopSimplify is
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index 6d13217..82384a1 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -286,6 +286,21 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
return CI;
}
+/// Append a suffix to the function name according to the type of 'Op'.
+static void AppendTypeSuffix(Value *Op, StringRef &Name, SmallString<20> &NameBuffer) {
+ if (!Op->getType()->isDoubleTy()) {
+ NameBuffer += Name;
+
+ if (Op->getType()->isFloatTy())
+ NameBuffer += 'f';
+ else
+ NameBuffer += 'l';
+
+ Name = NameBuffer;
+ }
+ return;
+}
+
/// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' (e.g.
/// 'floor'). This function is known to take a single of type matching 'Op' and
/// returns one value with the same type. If 'Op' is a long double, 'l' is
@@ -293,15 +308,7 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
Value *llvm::EmitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
const AttributeSet &Attrs) {
SmallString<20> NameBuffer;
- if (!Op->getType()->isDoubleTy()) {
- // If we need to add a suffix, copy into NameBuffer.
- NameBuffer += Name;
- if (Op->getType()->isFloatTy())
- NameBuffer += 'f'; // floorf
- else
- NameBuffer += 'l'; // floorl
- Name = NameBuffer;
- }
+ AppendTypeSuffix(Op, Name, NameBuffer);
Module *M = B.GetInsertBlock()->getParent()->getParent();
Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
@@ -314,6 +321,27 @@ Value *llvm::EmitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
return CI;
}
+/// EmitBinaryFloatFnCall - Emit a call to the binary function named 'Name'
+/// (e.g. 'fmin'). This function is known to take type matching 'Op1' and 'Op2'
+/// and return one value with the same type. If 'Op1/Op2' are long double, 'l'
+/// is added as the suffix of name, if 'Op1/Op2' is a float, we add a 'f'
+/// suffix.
+Value *llvm::EmitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name,
+ IRBuilder<> &B, const AttributeSet &Attrs) {
+ SmallString<20> NameBuffer;
+ AppendTypeSuffix(Op1, Name, NameBuffer);
+
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Value *Callee = M->getOrInsertFunction(Name, Op1->getType(),
+ Op1->getType(), Op2->getType(), NULL);
+ CallInst *CI = B.CreateCall2(Callee, Op1, Op2, Name);
+ CI->setAttributes(Attrs);
+ if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
/// EmitPutChar - Emit a call to the putchar function. This assumes that Char
/// is an integer.
Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const DataLayout *TD,
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index 5afd6b8..dac2090 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -1,4 +1,6 @@
add_llvm_library(LLVMTransformUtils
+ AddDiscriminators.cpp
+ ASanStackFrameLayout.cpp
BasicBlockUtils.cpp
BreakCriticalEdges.cpp
BuildLibCalls.cpp
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index d105f5e..a199086 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -17,8 +17,9 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/DebugInfo.h"
+#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
@@ -26,7 +27,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
-#include "llvm/Support/CFG.h"
+#include "llvm/IR/Module.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
@@ -88,26 +89,28 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
assert(VMap.count(I) && "No mapping from source argument specified!");
#endif
+ // Copy all attributes other than those stored in the AttributeSet. We need
+ // to remap the parameter indices of the AttributeSet.
+ AttributeSet NewAttrs = NewFunc->getAttributes();
+ NewFunc->copyAttributesFrom(OldFunc);
+ NewFunc->setAttributes(NewAttrs);
+
AttributeSet OldAttrs = OldFunc->getAttributes();
// Clone any argument attributes that are present in the VMap.
- for (Function::const_arg_iterator I = OldFunc->arg_begin(),
- E = OldFunc->arg_end();
- I != E; ++I)
- if (Argument *Anew = dyn_cast<Argument>(VMap[I])) {
+ for (const Argument &OldArg : OldFunc->args())
+ if (Argument *NewArg = dyn_cast<Argument>(VMap[&OldArg])) {
AttributeSet attrs =
- OldAttrs.getParamAttributes(I->getArgNo() + 1);
+ OldAttrs.getParamAttributes(OldArg.getArgNo() + 1);
if (attrs.getNumSlots() > 0)
- Anew->addAttr(attrs);
+ NewArg->addAttr(attrs);
}
- NewFunc->setAttributes(NewFunc->getAttributes()
- .addAttributes(NewFunc->getContext(),
- AttributeSet::ReturnIndex,
- OldAttrs.getRetAttributes()));
- NewFunc->setAttributes(NewFunc->getAttributes()
- .addAttributes(NewFunc->getContext(),
- AttributeSet::FunctionIndex,
- OldAttrs.getFnAttributes()));
+ NewFunc->setAttributes(
+ NewFunc->getAttributes()
+ .addAttributes(NewFunc->getContext(), AttributeSet::ReturnIndex,
+ OldAttrs.getRetAttributes())
+ .addAttributes(NewFunc->getContext(), AttributeSet::FunctionIndex,
+ OldAttrs.getFnAttributes()));
// Loop over all of the basic blocks in the function, cloning them as
// appropriate. Note that we save BE this way in order to handle cloning of
@@ -151,6 +154,54 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
TypeMapper, Materializer);
}
+// Find the MDNode which corresponds to the DISubprogram data that described F.
+static MDNode* FindSubprogram(const Function *F, DebugInfoFinder &Finder) {
+ for (DISubprogram Subprogram : Finder.subprograms()) {
+ if (Subprogram.describes(F)) return Subprogram;
+ }
+ return NULL;
+}
+
+// Add an operand to an existing MDNode. The new operand will be added at the
+// back of the operand list.
+static void AddOperand(MDNode *Node, Value *Operand) {
+ SmallVector<Value*, 16> Operands;
+ for (unsigned i = 0; i < Node->getNumOperands(); i++) {
+ Operands.push_back(Node->getOperand(i));
+ }
+ Operands.push_back(Operand);
+ MDNode *NewNode = MDNode::get(Node->getContext(), Operands);
+ Node->replaceAllUsesWith(NewNode);
+}
+
+// Clone the module-level debug info associated with OldFunc. The cloned data
+// will point to NewFunc instead.
+static void CloneDebugInfoMetadata(Function *NewFunc, const Function *OldFunc,
+ ValueToValueMapTy &VMap) {
+ DebugInfoFinder Finder;
+ Finder.processModule(*OldFunc->getParent());
+
+ const MDNode *OldSubprogramMDNode = FindSubprogram(OldFunc, Finder);
+ if (!OldSubprogramMDNode) return;
+
+ // Ensure that OldFunc appears in the map.
+ // (if it's already there it must point to NewFunc anyway)
+ VMap[OldFunc] = NewFunc;
+ DISubprogram NewSubprogram(MapValue(OldSubprogramMDNode, VMap));
+
+ for (DICompileUnit CU : Finder.compile_units()) {
+ DIArray Subprograms(CU.getSubprograms());
+
+ // If the compile unit's function list contains the old function, it should
+ // also contain the new one.
+ for (unsigned i = 0; i < Subprograms.getNumElements(); i++) {
+ if ((MDNode*)Subprograms.getElement(i) == OldSubprogramMDNode) {
+ AddOperand(Subprograms, NewSubprogram);
+ }
+ }
+ }
+}
+
/// CloneFunction - Return a copy of the specified function, but without
/// embedding the function into another module. Also, any references specified
/// in the VMap are changed to refer to their mapped value instead of the
@@ -188,6 +239,9 @@ Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap,
VMap[I] = DestI++; // Add mapping to VMap
}
+ if (ModuleLevelChanges)
+ CloneDebugInfoMetadata(NewF, F, VMap);
+
SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned.
CloneFunctionInto(NewF, F, VMap, ModuleLevelChanges, Returns, "", CodeInfo);
return NewF;
@@ -205,17 +259,17 @@ namespace {
bool ModuleLevelChanges;
const char *NameSuffix;
ClonedCodeInfo *CodeInfo;
- const DataLayout *TD;
+ const DataLayout *DL;
public:
PruningFunctionCloner(Function *newFunc, const Function *oldFunc,
ValueToValueMapTy &valueMap,
bool moduleLevelChanges,
const char *nameSuffix,
ClonedCodeInfo *codeInfo,
- const DataLayout *td)
+ const DataLayout *DL)
: NewFunc(newFunc), OldFunc(oldFunc),
VMap(valueMap), ModuleLevelChanges(moduleLevelChanges),
- NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) {
+ NameSuffix(nameSuffix), CodeInfo(codeInfo), DL(DL) {
}
/// CloneBlock - The specified block is found to be reachable, clone it and
@@ -272,7 +326,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
// If we can simplify this instruction to some other value, simply add
// a mapping to that value rather than inserting a new instruction into
// the basic block.
- if (Value *V = SimplifyInstruction(NewInst, TD)) {
+ if (Value *V = SimplifyInstruction(NewInst, DL)) {
// On the off-chance that this simplifies to an instruction in the old
// function, map it back into the new function.
if (Value *MappedV = VMap.lookup(V))
@@ -368,7 +422,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
SmallVectorImpl<ReturnInst*> &Returns,
const char *NameSuffix,
ClonedCodeInfo *CodeInfo,
- const DataLayout *TD,
+ const DataLayout *DL,
Instruction *TheCall) {
assert(NameSuffix && "NameSuffix cannot be null!");
@@ -379,7 +433,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
#endif
PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges,
- NameSuffix, CodeInfo, TD);
+ NameSuffix, CodeInfo, DL);
// Clone the entry block, and anything recursively reachable from it.
std::vector<const BasicBlock*> CloneWorklist;
@@ -509,7 +563,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
// node).
for (unsigned Idx = 0, Size = PHIToResolve.size(); Idx != Size; ++Idx)
if (PHINode *PN = dyn_cast<PHINode>(VMap[PHIToResolve[Idx]]))
- recursivelySimplifyInstruction(PN, TD);
+ recursivelySimplifyInstruction(PN, DL);
// Now that the inlined function body has been fully constructed, go through
// and zap unconditional fall-through branches. This happen all the time when
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index 6f00864..b814842 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -14,20 +14,20 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/CodeExtractor.h"
-#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/RegionInfo.h"
#include "llvm/Analysis/RegionIterator.h"
-#include "llvm/Analysis/Verifier.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Verifier.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -86,7 +86,7 @@ static SetVector<BasicBlock *> buildExtractionBlockSet(IteratorT BBBegin,
}
#ifndef NDEBUG
- for (SetVector<BasicBlock *>::iterator I = llvm::next(Result.begin()),
+ for (SetVector<BasicBlock *>::iterator I = std::next(Result.begin()),
E = Result.end();
I != E; ++I)
for (pred_iterator PI = pred_begin(*I), PE = pred_end(*I);
@@ -171,9 +171,8 @@ void CodeExtractor::findInputsOutputs(ValueSet &Inputs,
if (definedInCaller(Blocks, *OI))
Inputs.insert(*OI);
- for (Value::use_iterator UI = II->use_begin(), UE = II->use_end();
- UI != UE; ++UI)
- if (!definedInRegion(Blocks, *UI)) {
+ for (User *U : II->users())
+ if (!definedInRegion(Blocks, U)) {
Outputs.insert(II);
break;
}
@@ -369,7 +368,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
} else
RewriteVal = AI++;
- std::vector<User*> Users(inputs[i]->use_begin(), inputs[i]->use_end());
+ std::vector<User*> Users(inputs[i]->user_begin(), inputs[i]->user_end());
for (std::vector<User*>::iterator use = Users.begin(), useE = Users.end();
use != useE; ++use)
if (Instruction* inst = dyn_cast<Instruction>(*use))
@@ -389,7 +388,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
// Rewrite branches to basic blocks outside of the loop to new dummy blocks
// within the new function. This must be done before we lose track of which
// blocks were originally in the code region.
- std::vector<User*> Users(header->use_begin(), header->use_end());
+ std::vector<User*> Users(header->user_begin(), header->user_end());
for (unsigned i = 0, e = Users.size(); i != e; ++i)
// The BasicBlock which contains the branch is not in the region
// modify the branch target to a new block
@@ -405,13 +404,12 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
/// that uses the value within the basic block, and return the predecessor
/// block associated with that use, or return 0 if none is found.
static BasicBlock* FindPhiPredForUseInBlock(Value* Used, BasicBlock* BB) {
- for (Value::use_iterator UI = Used->use_begin(),
- UE = Used->use_end(); UI != UE; ++UI) {
- PHINode *P = dyn_cast<PHINode>(*UI);
+ for (Use &U : Used->uses()) {
+ PHINode *P = dyn_cast<PHINode>(U.getUser());
if (P && P->getParent() == BB)
- return P->getIncomingBlock(UI);
+ return P->getIncomingBlock(U);
}
-
+
return 0;
}
@@ -502,7 +500,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
LoadInst *load = new LoadInst(Output, outputs[i]->getName()+".reload");
Reloads.push_back(load);
codeReplacer->getInstList().push_back(load);
- std::vector<User*> Users(outputs[i]->use_begin(), outputs[i]->use_end());
+ std::vector<User*> Users(outputs[i]->user_begin(), outputs[i]->user_end());
for (unsigned u = 0, e = Users.size(); u != e; ++u) {
Instruction *inst = cast<Instruction>(Users[u]);
if (!Blocks.count(inst->getParent()))
diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp
index 0723b35..ac6926f 100644
--- a/lib/Transforms/Utils/DemoteRegToStack.cpp
+++ b/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -8,12 +8,12 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Type.h"
+#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
/// DemoteRegToStack - This function takes a virtual register computed by an
@@ -41,7 +41,7 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
// Change all of the users of the instruction to read from the stack slot.
while (!I.use_empty()) {
- Instruction *U = cast<Instruction>(I.use_back());
+ Instruction *U = cast<Instruction>(I.user_back());
if (PHINode *PN = dyn_cast<PHINode>(U)) {
// If this is a PHI node, we can't insert a load of the value before the
// use. Instead insert the load in the predecessor block corresponding
diff --git a/lib/Transforms/Utils/FlattenCFG.cpp b/lib/Transforms/Utils/FlattenCFG.cpp
index 1da226b..39c80f8 100644
--- a/lib/Transforms/Utils/FlattenCFG.cpp
+++ b/lib/Transforms/Utils/FlattenCFG.cpp
@@ -240,7 +240,7 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder,
BranchInst *BI = dyn_cast<BranchInst>(CurrBlock->getTerminator());
CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
CmpInst::Predicate Predicate = CI->getPredicate();
- // Cannonicalize icmp_ne -> icmp_eq, fcmp_one -> fcmp_oeq
+ // Canonicalize icmp_ne -> icmp_eq, fcmp_one -> fcmp_oeq
if ((Predicate == CmpInst::ICMP_NE) || (Predicate == CmpInst::FCMP_ONE)) {
CI->setPredicate(ICmpInst::getInversePredicate(Predicate));
BI->swapSuccessors();
diff --git a/lib/Transforms/Utils/GlobalStatus.cpp b/lib/Transforms/Utils/GlobalStatus.cpp
index 5f0a563..e9ebc45 100644
--- a/lib/Transforms/Utils/GlobalStatus.cpp
+++ b/lib/Transforms/Utils/GlobalStatus.cpp
@@ -9,9 +9,9 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CallSite.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/Support/CallSite.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
using namespace llvm;
@@ -35,9 +35,8 @@ bool llvm::isSafeToDestroyConstant(const Constant *C) {
if (isa<GlobalValue>(C))
return false;
- for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E;
- ++UI)
- if (const Constant *CU = dyn_cast<Constant>(*UI)) {
+ for (const User *U : C->users())
+ if (const Constant *CU = dyn_cast<Constant>(U)) {
if (!isSafeToDestroyConstant(CU))
return false;
} else
@@ -47,10 +46,9 @@ bool llvm::isSafeToDestroyConstant(const Constant *C) {
static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
SmallPtrSet<const PHINode *, 16> &PhiUsers) {
- for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
- ++UI) {
- const User *U = *UI;
- if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
+ for (const Use &U : V->uses()) {
+ const User *UR = U.getUser();
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(UR)) {
GS.HasNonInstructionUser = true;
// If the result of the constantexpr isn't pointer type, then we won't
@@ -60,7 +58,7 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
if (analyzeGlobalAux(CE, GS, PhiUsers))
return true;
- } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
+ } else if (const Instruction *I = dyn_cast<Instruction>(UR)) {
if (!GS.HasMultipleAccessingFunctions) {
const Function *F = I->getParent()->getParent();
if (GS.AccessingFunction == 0)
@@ -150,13 +148,13 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
return true;
GS.StoredType = GlobalStatus::Stored;
} else if (ImmutableCallSite C = I) {
- if (!C.isCallee(UI))
+ if (!C.isCallee(&U))
return true;
GS.IsLoaded = true;
} else {
return true; // Any other non-load instruction might take address!
}
- } else if (const Constant *C = dyn_cast<Constant>(U)) {
+ } else if (const Constant *C = dyn_cast<Constant>(UR)) {
GS.HasNonInstructionUser = true;
// We might have a dead and dangling constant hanging off of here.
if (!isSafeToDestroyConstant(C))
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index d021bce..86def3e 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -17,17 +17,17 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/DebugInfo.h"
#include "llvm/IR/Attributes.h"
+#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
-#include "llvm/Support/CallSite.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -144,7 +144,6 @@ BasicBlock *InvokeInliningInfo::getInnerResumeDest() {
void InvokeInliningInfo::forwardResume(ResumeInst *RI,
SmallPtrSet<LandingPadInst*, 16> &InlinedLPads) {
BasicBlock *Dest = getInnerResumeDest();
- LandingPadInst *OuterLPad = getLandingPadInst();
BasicBlock *Src = RI->getParent();
BranchInst::Create(Dest, Src);
@@ -155,16 +154,6 @@ void InvokeInliningInfo::forwardResume(ResumeInst *RI,
InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src);
RI->eraseFromParent();
-
- // Append the clauses from the outer landing pad instruction into the inlined
- // landing pad instructions.
- for (SmallPtrSet<LandingPadInst*, 16>::iterator I = InlinedLPads.begin(),
- E = InlinedLPads.end(); I != E; ++I) {
- LandingPadInst *InlinedLPad = *I;
- for (unsigned OuterIdx = 0, OuterNum = OuterLPad->getNumClauses();
- OuterIdx != OuterNum; ++OuterIdx)
- InlinedLPad->addClause(OuterLPad->getClause(OuterIdx));
- }
}
/// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into
@@ -172,22 +161,11 @@ void InvokeInliningInfo::forwardResume(ResumeInst *RI,
/// invokes. This function analyze BB to see if there are any calls, and if so,
/// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI
/// nodes in that block with the values specified in InvokeDestPHIValues.
-///
-/// Returns true to indicate that the next block should be skipped.
-static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
+static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
InvokeInliningInfo &Invoke) {
- LandingPadInst *LPI = Invoke.getLandingPadInst();
-
for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
Instruction *I = BBI++;
- if (LandingPadInst *L = dyn_cast<LandingPadInst>(I)) {
- unsigned NumClauses = LPI->getNumClauses();
- L->reserveClauses(NumClauses);
- for (unsigned i = 0; i != NumClauses; ++i)
- L->addClause(LPI->getClause(i));
- }
-
// We only need to check for function calls: inlined invoke
// instructions require no special handling.
CallInst *CI = dyn_cast<CallInst>(I);
@@ -223,10 +201,8 @@ static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
// Update any PHI nodes in the exceptional block to indicate that there is
// now a new entry in them.
Invoke.addIncomingPHIValuesFor(BB);
- return false;
+ return;
}
-
- return false;
}
/// HandleInlinedInvoke - If we inlined an invoke site, we need to convert calls
@@ -252,13 +228,23 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
if (InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator()))
InlinedLPads.insert(II->getLandingPadInst());
+ // Append the clauses from the outer landing pad instruction into the inlined
+ // landing pad instructions.
+ LandingPadInst *OuterLPad = Invoke.getLandingPadInst();
+ for (SmallPtrSet<LandingPadInst*, 16>::iterator I = InlinedLPads.begin(),
+ E = InlinedLPads.end(); I != E; ++I) {
+ LandingPadInst *InlinedLPad = *I;
+ unsigned OuterNum = OuterLPad->getNumClauses();
+ InlinedLPad->reserveClauses(OuterNum);
+ for (unsigned OuterIdx = 0; OuterIdx != OuterNum; ++OuterIdx)
+ InlinedLPad->addClause(OuterLPad->getClause(OuterIdx));
+ if (OuterLPad->isCleanup())
+ InlinedLPad->setCleanup(true);
+ }
+
for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){
if (InlinedCodeInfo.ContainsCalls)
- if (HandleCallsInBlockInlinedThroughInvoke(BB, Invoke)) {
- // Honor a request to skip the next block.
- ++BB;
- continue;
- }
+ HandleCallsInBlockInlinedThroughInvoke(BB, Invoke);
// Forward any resumes that are remaining here.
if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator()))
@@ -357,7 +343,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
// If the pointer is already known to be sufficiently aligned, or if we can
// round it up to a larger alignment, then we don't need a temporary.
if (getOrEnforceKnownAlignment(Arg, ByValAlignment,
- IFI.TD) >= ByValAlignment)
+ IFI.DL) >= ByValAlignment)
return Arg;
// Otherwise, we have to make a memcpy to get a safe alignment. This is bad
@@ -370,8 +356,8 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
// Create the alloca. If we have DataLayout, use nice alignment.
unsigned Align = 1;
- if (IFI.TD)
- Align = IFI.TD->getPrefTypeAlignment(AggTy);
+ if (IFI.DL)
+ Align = IFI.DL->getPrefTypeAlignment(AggTy);
// If the byval had an alignment specified, we *must* use at least that
// alignment, as it is required by the byval argument (and uses of the
@@ -391,11 +377,11 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
Value *SrcCast = new BitCastInst(Arg, VoidPtrTy, "tmp", TheCall);
Value *Size;
- if (IFI.TD == 0)
+ if (IFI.DL == 0)
Size = ConstantExpr::getSizeOf(AggTy);
else
Size = ConstantInt::get(Type::getInt64Ty(Context),
- IFI.TD->getTypeStoreSize(AggTy));
+ IFI.DL->getTypeStoreSize(AggTy));
// Always generate a memcpy of alignment 1 here because we don't know
// the alignment of the src pointer. Other optimizations can infer
@@ -415,9 +401,8 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
// isUsedByLifetimeMarker - Check whether this Value is used by a lifetime
// intrinsic.
static bool isUsedByLifetimeMarker(Value *V) {
- for (Value::use_iterator UI = V->use_begin(), UE = V->use_end(); UI != UE;
- ++UI) {
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(*UI)) {
+ for (User *U : V->users()) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
switch (II->getIntrinsicID()) {
default: break;
case Intrinsic::lifetime_start:
@@ -437,11 +422,10 @@ static bool hasLifetimeMarkers(AllocaInst *AI) {
return isUsedByLifetimeMarker(AI);
// Do a scan to find all the casts to i8*.
- for (Value::use_iterator I = AI->use_begin(), E = AI->use_end(); I != E;
- ++I) {
- if (I->getType() != Int8PtrTy) continue;
- if (I->stripPointerCasts() != AI) continue;
- if (isUsedByLifetimeMarker(*I))
+ for (User *U : AI->users()) {
+ if (U->getType() != Int8PtrTy) continue;
+ if (U->stripPointerCasts() != AI) continue;
+ if (isUsedByLifetimeMarker(U))
return true;
}
return false;
@@ -613,7 +597,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// happy with whatever the cloner can do.
CloneAndPruneFunctionInto(Caller, CalledFunc, VMap,
/*ModuleLevelChanges=*/false, Returns, ".i",
- &InlinedFunctionInfo, IFI.TD, TheCall);
+ &InlinedFunctionInfo, IFI.DL, TheCall);
// Remember the first block that is newly cloned over.
FirstNewBlock = LastBlock; ++FirstNewBlock;
@@ -683,9 +667,9 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
ConstantInt *AllocaSize = 0;
if (ConstantInt *AIArraySize =
dyn_cast<ConstantInt>(AI->getArraySize())) {
- if (IFI.TD) {
+ if (IFI.DL) {
Type *AllocaType = AI->getAllocatedType();
- uint64_t AllocaTypeSize = IFI.TD->getTypeAllocSize(AllocaType);
+ uint64_t AllocaTypeSize = IFI.DL->getTypeAllocSize(AllocaType);
uint64_t AllocaArraySize = AIArraySize->getLimitedValue();
assert(AllocaArraySize > 0 && "array size of AllocaInst is zero");
// Check that array size doesn't saturate uint64_t and doesn't
@@ -922,7 +906,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// the entries are the same or undef). If so, remove the PHI so it doesn't
// block other optimizations.
if (PHI) {
- if (Value *V = SimplifyInstruction(PHI, IFI.TD)) {
+ if (Value *V = SimplifyInstruction(PHI, IFI.DL)) {
PHI->replaceAllUsesWith(V);
PHI->eraseFromParent();
}
diff --git a/lib/Transforms/Utils/InstructionNamer.cpp b/lib/Transforms/Utils/InstructionNamer.cpp
index a020bc7..da890a2 100644
--- a/lib/Transforms/Utils/InstructionNamer.cpp
+++ b/lib/Transforms/Utils/InstructionNamer.cpp
@@ -27,11 +27,11 @@ namespace {
initializeInstNamerPass(*PassRegistry::getPassRegistry());
}
- void getAnalysisUsage(AnalysisUsage &Info) const {
+ void getAnalysisUsage(AnalysisUsage &Info) const override {
Info.setPreservesAll();
}
- bool runOnFunction(Function &F) {
+ bool runOnFunction(Function &F) override {
for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end();
AI != AE; ++AI)
if (!AI->hasName() && !AI->getType()->isVoidTy())
diff --git a/lib/Transforms/Utils/IntegerDivision.cpp b/lib/Transforms/Utils/IntegerDivision.cpp
index 3cb8ded..e73a543 100644
--- a/lib/Transforms/Utils/IntegerDivision.cpp
+++ b/lib/Transforms/Utils/IntegerDivision.cpp
@@ -7,10 +7,10 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains an implementation of 32bit scalar integer division for
-// targets that don't have native support. It's largely derived from
-// compiler-rt's implementation of __udivsi3, but hand-tuned to reduce the
-// amount of control flow
+// This file contains an implementation of 32bit and 64bit scalar integer
+// division for targets that don't have native support. It's largely derived
+// from compiler-rt's implementations of __udivsi3 and __udivmoddi4,
+// but hand-tuned for targets that prefer less control flow.
//
//===----------------------------------------------------------------------===//
@@ -20,6 +20,7 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
+#include <utility>
using namespace llvm;
@@ -31,7 +32,18 @@ using namespace llvm;
/// be expanded if the user wishes
static Value *generateSignedRemainderCode(Value *Dividend, Value *Divisor,
IRBuilder<> &Builder) {
- ConstantInt *ThirtyOne = Builder.getInt32(31);
+ unsigned BitWidth = Dividend->getType()->getIntegerBitWidth();
+ ConstantInt *Shift;
+
+ if (BitWidth == 64) {
+ Shift = Builder.getInt64(63);
+ } else {
+ assert(BitWidth == 32 && "Unexpected bit width");
+ Shift = Builder.getInt32(31);
+ }
+
+ // Following instructions are generated for both i32 (shift 31) and
+ // i64 (shift 63).
// ; %dividend_sgn = ashr i32 %dividend, 31
// ; %divisor_sgn = ashr i32 %divisor, 31
@@ -42,8 +54,8 @@ static Value *generateSignedRemainderCode(Value *Dividend, Value *Divisor,
// ; %urem = urem i32 %dividend, %divisor
// ; %xored = xor i32 %urem, %dividend_sgn
// ; %srem = sub i32 %xored, %dividend_sgn
- Value *DividendSign = Builder.CreateAShr(Dividend, ThirtyOne);
- Value *DivisorSign = Builder.CreateAShr(Divisor, ThirtyOne);
+ Value *DividendSign = Builder.CreateAShr(Dividend, Shift);
+ Value *DivisorSign = Builder.CreateAShr(Divisor, Shift);
Value *DvdXor = Builder.CreateXor(Dividend, DividendSign);
Value *DvsXor = Builder.CreateXor(Divisor, DivisorSign);
Value *UDividend = Builder.CreateSub(DvdXor, DividendSign);
@@ -68,6 +80,8 @@ static Value *generatedUnsignedRemainderCode(Value *Dividend, Value *Divisor,
IRBuilder<> &Builder) {
// Remainder = Dividend - Quotient*Divisor
+ // Following instructions are generated for both i32 and i64
+
// ; %quotient = udiv i32 %dividend, %divisor
// ; %product = mul i32 %divisor, %quotient
// ; %remainder = sub i32 %dividend, %product
@@ -88,9 +102,20 @@ static Value *generatedUnsignedRemainderCode(Value *Dividend, Value *Divisor,
/// present, i.e. not folded), ready to be expanded if the user wishes.
static Value *generateSignedDivisionCode(Value *Dividend, Value *Divisor,
IRBuilder<> &Builder) {
- // Implementation taken from compiler-rt's __divsi3
+ // Implementation taken from compiler-rt's __divsi3 and __divdi3
- ConstantInt *ThirtyOne = Builder.getInt32(31);
+ unsigned BitWidth = Dividend->getType()->getIntegerBitWidth();
+ ConstantInt *Shift;
+
+ if (BitWidth == 64) {
+ Shift = Builder.getInt64(63);
+ } else {
+ assert(BitWidth == 32 && "Unexpected bit width");
+ Shift = Builder.getInt32(31);
+ }
+
+ // Following instructions are generated for both i32 (shift 31) and
+ // i64 (shift 63).
// ; %tmp = ashr i32 %dividend, 31
// ; %tmp1 = ashr i32 %divisor, 31
@@ -102,8 +127,8 @@ static Value *generateSignedDivisionCode(Value *Dividend, Value *Divisor,
// ; %q_mag = udiv i32 %u_dvnd, %u_dvsr
// ; %tmp4 = xor i32 %q_mag, %q_sgn
// ; %q = sub i32 %tmp4, %q_sgn
- Value *Tmp = Builder.CreateAShr(Dividend, ThirtyOne);
- Value *Tmp1 = Builder.CreateAShr(Divisor, ThirtyOne);
+ Value *Tmp = Builder.CreateAShr(Dividend, Shift);
+ Value *Tmp1 = Builder.CreateAShr(Divisor, Shift);
Value *Tmp2 = Builder.CreateXor(Tmp, Dividend);
Value *U_Dvnd = Builder.CreateSub(Tmp2, Tmp);
Value *Tmp3 = Builder.CreateXor(Tmp1, Divisor);
@@ -119,9 +144,9 @@ static Value *generateSignedDivisionCode(Value *Dividend, Value *Divisor,
return Q;
}
-/// Generates code to divide two unsigned scalar 32-bit integers. Returns the
-/// quotient, rounded towards 0. Builder's insert point should be pointing where
-/// the caller wants code generated, e.g. at the udiv instruction.
+/// Generates code to divide two unsigned scalar 32-bit or 64-bit integers.
+/// Returns the quotient, rounded towards 0. Builder's insert point should
+/// point where the caller wants code generated, e.g. at the udiv instruction.
static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
IRBuilder<> &Builder) {
// The basic algorithm can be found in the compiler-rt project's
@@ -129,18 +154,33 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
// that's been hand-tuned to lessen the amount of control flow involved.
// Some helper values
- IntegerType *I32Ty = Builder.getInt32Ty();
+ IntegerType *DivTy = cast<IntegerType>(Dividend->getType());
+ unsigned BitWidth = DivTy->getBitWidth();
+
+ ConstantInt *Zero;
+ ConstantInt *One;
+ ConstantInt *NegOne;
+ ConstantInt *MSB;
+
+ if (BitWidth == 64) {
+ Zero = Builder.getInt64(0);
+ One = Builder.getInt64(1);
+ NegOne = ConstantInt::getSigned(DivTy, -1);
+ MSB = Builder.getInt64(63);
+ } else {
+ assert(BitWidth == 32 && "Unexpected bit width");
+ Zero = Builder.getInt32(0);
+ One = Builder.getInt32(1);
+ NegOne = ConstantInt::getSigned(DivTy, -1);
+ MSB = Builder.getInt32(31);
+ }
- ConstantInt *Zero = Builder.getInt32(0);
- ConstantInt *One = Builder.getInt32(1);
- ConstantInt *ThirtyOne = Builder.getInt32(31);
- ConstantInt *NegOne = ConstantInt::getSigned(I32Ty, -1);
- ConstantInt *True = Builder.getTrue();
+ ConstantInt *True = Builder.getTrue();
BasicBlock *IBB = Builder.GetInsertBlock();
Function *F = IBB->getParent();
- Function *CTLZi32 = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
- I32Ty);
+ Function *CTLZ = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
+ DivTy);
// Our CFG is going to look like:
// +---------------------+
@@ -190,6 +230,8 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
// We'll be overwriting the terminator to insert our extra blocks
SpecialCases->getTerminator()->eraseFromParent();
+ // Same instructions are generated for both i32 (msb 31) and i64 (msb 63).
+
// First off, check for special cases: dividend or divisor is zero, divisor
// is greater than dividend, and divisor is 1.
// ; special-cases:
@@ -209,12 +251,12 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
Value *Ret0_1 = Builder.CreateICmpEQ(Divisor, Zero);
Value *Ret0_2 = Builder.CreateICmpEQ(Dividend, Zero);
Value *Ret0_3 = Builder.CreateOr(Ret0_1, Ret0_2);
- Value *Tmp0 = Builder.CreateCall2(CTLZi32, Divisor, True);
- Value *Tmp1 = Builder.CreateCall2(CTLZi32, Dividend, True);
+ Value *Tmp0 = Builder.CreateCall2(CTLZ, Divisor, True);
+ Value *Tmp1 = Builder.CreateCall2(CTLZ, Dividend, True);
Value *SR = Builder.CreateSub(Tmp0, Tmp1);
- Value *Ret0_4 = Builder.CreateICmpUGT(SR, ThirtyOne);
+ Value *Ret0_4 = Builder.CreateICmpUGT(SR, MSB);
Value *Ret0 = Builder.CreateOr(Ret0_3, Ret0_4);
- Value *RetDividend = Builder.CreateICmpEQ(SR, ThirtyOne);
+ Value *RetDividend = Builder.CreateICmpEQ(SR, MSB);
Value *RetVal = Builder.CreateSelect(Ret0, Zero, Dividend);
Value *EarlyRet = Builder.CreateOr(Ret0, RetDividend);
Builder.CreateCondBr(EarlyRet, End, BB1);
@@ -227,7 +269,7 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
// ; br i1 %skipLoop, label %loop-exit, label %preheader
Builder.SetInsertPoint(BB1);
Value *SR_1 = Builder.CreateAdd(SR, One);
- Value *Tmp2 = Builder.CreateSub(ThirtyOne, SR);
+ Value *Tmp2 = Builder.CreateSub(MSB, SR);
Value *Q = Builder.CreateShl(Dividend, Tmp2);
Value *SkipLoop = Builder.CreateICmpEQ(SR_1, Zero);
Builder.CreateCondBr(SkipLoop, LoopExit, Preheader);
@@ -260,17 +302,17 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
// ; %tmp12 = icmp eq i32 %sr_2, 0
// ; br i1 %tmp12, label %loop-exit, label %do-while
Builder.SetInsertPoint(DoWhile);
- PHINode *Carry_1 = Builder.CreatePHI(I32Ty, 2);
- PHINode *SR_3 = Builder.CreatePHI(I32Ty, 2);
- PHINode *R_1 = Builder.CreatePHI(I32Ty, 2);
- PHINode *Q_2 = Builder.CreatePHI(I32Ty, 2);
+ PHINode *Carry_1 = Builder.CreatePHI(DivTy, 2);
+ PHINode *SR_3 = Builder.CreatePHI(DivTy, 2);
+ PHINode *R_1 = Builder.CreatePHI(DivTy, 2);
+ PHINode *Q_2 = Builder.CreatePHI(DivTy, 2);
Value *Tmp5 = Builder.CreateShl(R_1, One);
- Value *Tmp6 = Builder.CreateLShr(Q_2, ThirtyOne);
+ Value *Tmp6 = Builder.CreateLShr(Q_2, MSB);
Value *Tmp7 = Builder.CreateOr(Tmp5, Tmp6);
Value *Tmp8 = Builder.CreateShl(Q_2, One);
Value *Q_1 = Builder.CreateOr(Carry_1, Tmp8);
Value *Tmp9 = Builder.CreateSub(Tmp4, Tmp7);
- Value *Tmp10 = Builder.CreateAShr(Tmp9, 31);
+ Value *Tmp10 = Builder.CreateAShr(Tmp9, MSB);
Value *Carry = Builder.CreateAnd(Tmp10, One);
Value *Tmp11 = Builder.CreateAnd(Tmp10, Divisor);
Value *R = Builder.CreateSub(Tmp7, Tmp11);
@@ -285,8 +327,8 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
// ; %q_4 = or i32 %carry_2, %tmp13
// ; br label %end
Builder.SetInsertPoint(LoopExit);
- PHINode *Carry_2 = Builder.CreatePHI(I32Ty, 2);
- PHINode *Q_3 = Builder.CreatePHI(I32Ty, 2);
+ PHINode *Carry_2 = Builder.CreatePHI(DivTy, 2);
+ PHINode *Q_3 = Builder.CreatePHI(DivTy, 2);
Value *Tmp13 = Builder.CreateShl(Q_3, One);
Value *Q_4 = Builder.CreateOr(Carry_2, Tmp13);
Builder.CreateBr(End);
@@ -295,7 +337,7 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
// ; %q_5 = phi i32 [ %q_4, %loop-exit ], [ %retVal, %special-cases ]
// ; ret i32 %q_5
Builder.SetInsertPoint(End, End->begin());
- PHINode *Q_5 = Builder.CreatePHI(I32Ty, 2);
+ PHINode *Q_5 = Builder.CreatePHI(DivTy, 2);
// Populate the Phis, since all values have now been created. Our Phis were:
// ; %carry_1 = phi i32 [ 0, %preheader ], [ %carry, %do-while ]
@@ -326,9 +368,8 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
/// Generate code to calculate the remainder of two integers, replacing Rem with
/// the generated code. This currently generates code using the udiv expansion,
/// but future work includes generating more specialized code, e.g. when more
-/// information about the operands are known. Currently only implements 32bit
-/// scalar division (due to udiv's limitation), but future work is removing this
-/// limitation.
+/// information about the operands are known. Implements both 32bit and 64bit
+/// scalar division.
///
/// @brief Replace Rem with generated code.
bool llvm::expandRemainder(BinaryOperator *Rem) {
@@ -338,6 +379,15 @@ bool llvm::expandRemainder(BinaryOperator *Rem) {
IRBuilder<> Builder(Rem);
+ Type *RemTy = Rem->getType();
+ if (RemTy->isVectorTy())
+ llvm_unreachable("Div over vectors not supported");
+
+ unsigned RemTyBitWidth = RemTy->getIntegerBitWidth();
+
+ if (RemTyBitWidth != 32 && RemTyBitWidth != 64)
+ llvm_unreachable("Div of bitwidth other than 32 or 64 not supported");
+
// First prepare the sign if it's a signed remainder
if (Rem->getOpcode() == Instruction::SRem) {
Value *Remainder = generateSignedRemainderCode(Rem->getOperand(0),
@@ -376,9 +426,8 @@ bool llvm::expandRemainder(BinaryOperator *Rem) {
/// Generate code to divide two integers, replacing Div with the generated
/// code. This currently generates code similarly to compiler-rt's
/// implementations, but future work includes generating more specialized code
-/// when more information about the operands are known. Currently only
-/// implements 32bit scalar division, but future work is removing this
-/// limitation.
+/// when more information about the operands are known. Implements both
+/// 32bit and 64bit scalar division.
///
/// @brief Replace Div with generated code.
bool llvm::expandDivision(BinaryOperator *Div) {
@@ -388,9 +437,15 @@ bool llvm::expandDivision(BinaryOperator *Div) {
IRBuilder<> Builder(Div);
- if (Div->getType()->isVectorTy())
+ Type *DivTy = Div->getType();
+ if (DivTy->isVectorTy())
llvm_unreachable("Div over vectors not supported");
+ unsigned DivTyBitWidth = DivTy->getIntegerBitWidth();
+
+ if (DivTyBitWidth != 32 && DivTyBitWidth != 64)
+ llvm_unreachable("Div of bitwidth other than 32 or 64 not supported");
+
// First prepare the sign if it's a signed division
if (Div->getOpcode() == Instruction::SDiv) {
// Lower the code to unsigned division, and reset Div to point to the udiv.
@@ -443,7 +498,7 @@ bool llvm::expandRemainderUpTo32Bits(BinaryOperator *Rem) {
if (RemTyBitWidth == 32)
return expandRemainder(Rem);
- // If bitwidth smaller than 32 extend inputs, truncate output and proceed
+ // If bitwidth smaller than 32 extend inputs, extend output and proceed
// with 32 bit division.
IRBuilder<> Builder(Rem);
@@ -471,6 +526,55 @@ bool llvm::expandRemainderUpTo32Bits(BinaryOperator *Rem) {
return expandRemainder(cast<BinaryOperator>(ExtRem));
}
+/// Generate code to compute the remainder of two integers of bitwidth up to
+/// 64 bits. Uses the above routines and extends the inputs/truncates the
+/// outputs to operate in 64 bits.
+///
+/// @brief Replace Rem with emulation code.
+bool llvm::expandRemainderUpTo64Bits(BinaryOperator *Rem) {
+ assert((Rem->getOpcode() == Instruction::SRem ||
+ Rem->getOpcode() == Instruction::URem) &&
+ "Trying to expand remainder from a non-remainder function");
+
+ Type *RemTy = Rem->getType();
+ if (RemTy->isVectorTy())
+ llvm_unreachable("Div over vectors not supported");
+
+ unsigned RemTyBitWidth = RemTy->getIntegerBitWidth();
+
+ if (RemTyBitWidth > 64)
+ llvm_unreachable("Div of bitwidth greater than 64 not supported");
+
+ if (RemTyBitWidth == 64)
+ return expandRemainder(Rem);
+
+ // If bitwidth smaller than 64 extend inputs, extend output and proceed
+ // with 64 bit division.
+ IRBuilder<> Builder(Rem);
+
+ Value *ExtDividend;
+ Value *ExtDivisor;
+ Value *ExtRem;
+ Value *Trunc;
+ Type *Int64Ty = Builder.getInt64Ty();
+
+ if (Rem->getOpcode() == Instruction::SRem) {
+ ExtDividend = Builder.CreateSExt(Rem->getOperand(0), Int64Ty);
+ ExtDivisor = Builder.CreateSExt(Rem->getOperand(1), Int64Ty);
+ ExtRem = Builder.CreateSRem(ExtDividend, ExtDivisor);
+ } else {
+ ExtDividend = Builder.CreateZExt(Rem->getOperand(0), Int64Ty);
+ ExtDivisor = Builder.CreateZExt(Rem->getOperand(1), Int64Ty);
+ ExtRem = Builder.CreateURem(ExtDividend, ExtDivisor);
+ }
+ Trunc = Builder.CreateTrunc(ExtRem, RemTy);
+
+ Rem->replaceAllUsesWith(Trunc);
+ Rem->dropAllReferences();
+ Rem->eraseFromParent();
+
+ return expandRemainder(cast<BinaryOperator>(ExtRem));
+}
/// Generate code to divide two integers of bitwidth up to 32 bits. Uses the
/// above routines and extends the inputs/truncates the outputs to operate
@@ -495,7 +599,7 @@ bool llvm::expandDivisionUpTo32Bits(BinaryOperator *Div) {
if (DivTyBitWidth == 32)
return expandDivision(Div);
- // If bitwidth smaller than 32 extend inputs, truncate output and proceed
+ // If bitwidth smaller than 32 extend inputs, extend output and proceed
// with 32 bit division.
IRBuilder<> Builder(Div);
@@ -522,3 +626,53 @@ bool llvm::expandDivisionUpTo32Bits(BinaryOperator *Div) {
return expandDivision(cast<BinaryOperator>(ExtDiv));
}
+
+/// Generate code to divide two integers of bitwidth up to 64 bits. Uses the
+/// above routines and extends the inputs/truncates the outputs to operate
+/// in 64 bits.
+///
+/// @brief Replace Div with emulation code.
+bool llvm::expandDivisionUpTo64Bits(BinaryOperator *Div) {
+ assert((Div->getOpcode() == Instruction::SDiv ||
+ Div->getOpcode() == Instruction::UDiv) &&
+ "Trying to expand division from a non-division function");
+
+ Type *DivTy = Div->getType();
+ if (DivTy->isVectorTy())
+ llvm_unreachable("Div over vectors not supported");
+
+ unsigned DivTyBitWidth = DivTy->getIntegerBitWidth();
+
+ if (DivTyBitWidth > 64)
+ llvm_unreachable("Div of bitwidth greater than 64 not supported");
+
+ if (DivTyBitWidth == 64)
+ return expandDivision(Div);
+
+ // If bitwidth smaller than 64 extend inputs, extend output and proceed
+ // with 64 bit division.
+ IRBuilder<> Builder(Div);
+
+ Value *ExtDividend;
+ Value *ExtDivisor;
+ Value *ExtDiv;
+ Value *Trunc;
+ Type *Int64Ty = Builder.getInt64Ty();
+
+ if (Div->getOpcode() == Instruction::SDiv) {
+ ExtDividend = Builder.CreateSExt(Div->getOperand(0), Int64Ty);
+ ExtDivisor = Builder.CreateSExt(Div->getOperand(1), Int64Ty);
+ ExtDiv = Builder.CreateSDiv(ExtDividend, ExtDivisor);
+ } else {
+ ExtDividend = Builder.CreateZExt(Div->getOperand(0), Int64Ty);
+ ExtDivisor = Builder.CreateZExt(Div->getOperand(1), Int64Ty);
+ ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor);
+ }
+ Trunc = Builder.CreateTrunc(ExtDiv, DivTy);
+
+ Div->replaceAllUsesWith(Trunc);
+ Div->dropAllReferences();
+ Div->eraseFromParent();
+
+ return expandDivision(cast<BinaryOperator>(ExtDiv));
+}
diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp
index f15e8d5..d538175 100644
--- a/lib/Transforms/Utils/LCSSA.cpp
+++ b/lib/Transforms/Utils/LCSSA.cpp
@@ -31,216 +31,103 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/PredIteratorCache.h"
#include "llvm/Pass.h"
-#include "llvm/Support/PredIteratorCache.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
STATISTIC(NumLCSSA, "Number of live out of a loop variables");
-namespace {
- struct LCSSA : public LoopPass {
- static char ID; // Pass identification, replacement for typeid
- LCSSA() : LoopPass(ID) {
- initializeLCSSAPass(*PassRegistry::getPassRegistry());
- }
-
- // Cached analysis information for the current function.
- DominatorTree *DT;
- LoopInfo *LI;
- ScalarEvolution *SE;
- PredIteratorCache PredCache;
- Loop *L;
-
- virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
-
- /// This transformation requires natural loop information & requires that
- /// loop preheaders be inserted into the CFG. It maintains both of these,
- /// as well as the CFG. It also requires dominator information.
- ///
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
-
- AU.addRequired<DominatorTree>();
- AU.addRequired<LoopInfo>();
- AU.addPreservedID(LoopSimplifyID);
- AU.addPreserved<ScalarEvolution>();
- }
- private:
- bool ProcessInstruction(Instruction *Inst,
- const SmallVectorImpl<BasicBlock*> &ExitBlocks);
-
- /// verifyAnalysis() - Verify loop nest.
- virtual void verifyAnalysis() const {
- // Check the special guarantees that LCSSA makes.
- assert(L->isLCSSAForm(*DT) && "LCSSA form not preserved!");
- }
- };
-}
-
-char LCSSA::ID = 0;
-INITIALIZE_PASS_BEGIN(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
-INITIALIZE_PASS_END(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
-
-Pass *llvm::createLCSSAPass() { return new LCSSA(); }
-char &llvm::LCSSAID = LCSSA::ID;
-
-
-/// BlockDominatesAnExit - Return true if the specified block dominates at least
-/// one of the blocks in the specified list.
-static bool BlockDominatesAnExit(BasicBlock *BB,
- const SmallVectorImpl<BasicBlock*> &ExitBlocks,
- DominatorTree *DT) {
- DomTreeNode *DomNode = DT->getNode(BB);
+/// Return true if the specified block is in the list.
+static bool isExitBlock(BasicBlock *BB,
+ const SmallVectorImpl<BasicBlock *> &ExitBlocks) {
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
- if (DT->dominates(DomNode, DT->getNode(ExitBlocks[i])))
+ if (ExitBlocks[i] == BB)
return true;
-
return false;
}
+/// Given an instruction in the loop, check to see if it has any uses that are
+/// outside the current loop. If so, insert LCSSA PHI nodes and rewrite the
+/// uses.
+static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT,
+ const SmallVectorImpl<BasicBlock *> &ExitBlocks,
+ PredIteratorCache &PredCache) {
+ SmallVector<Use *, 16> UsesToRewrite;
-/// runOnFunction - Process all loops in the function, inner-most out.
-bool LCSSA::runOnLoop(Loop *TheLoop, LPPassManager &LPM) {
- L = TheLoop;
-
- DT = &getAnalysis<DominatorTree>();
- LI = &getAnalysis<LoopInfo>();
- SE = getAnalysisIfAvailable<ScalarEvolution>();
+ BasicBlock *InstBB = Inst.getParent();
- // Get the set of exiting blocks.
- SmallVector<BasicBlock*, 8> ExitBlocks;
- L->getExitBlocks(ExitBlocks);
-
- if (ExitBlocks.empty())
- return false;
-
- // Look at all the instructions in the loop, checking to see if they have uses
- // outside the loop. If so, rewrite those uses.
- bool MadeChange = false;
-
- for (Loop::block_iterator BBI = L->block_begin(), E = L->block_end();
- BBI != E; ++BBI) {
- BasicBlock *BB = *BBI;
-
- // For large loops, avoid use-scanning by using dominance information: In
- // particular, if a block does not dominate any of the loop exits, then none
- // of the values defined in the block could be used outside the loop.
- if (!BlockDominatesAnExit(BB, ExitBlocks, DT))
- continue;
-
- for (BasicBlock::iterator I = BB->begin(), E = BB->end();
- I != E; ++I) {
- // Reject two common cases fast: instructions with no uses (like stores)
- // and instructions with one use that is in the same block as this.
- if (I->use_empty() ||
- (I->hasOneUse() && I->use_back()->getParent() == BB &&
- !isa<PHINode>(I->use_back())))
- continue;
-
- MadeChange |= ProcessInstruction(I, ExitBlocks);
- }
- }
-
- // If we modified the code, remove any caches about the loop from SCEV to
- // avoid dangling entries.
- // FIXME: This is a big hammer, can we clear the cache more selectively?
- if (SE && MadeChange)
- SE->forgetLoop(L);
-
- assert(L->isLCSSAForm(*DT));
- PredCache.clear();
-
- return MadeChange;
-}
-
-/// isExitBlock - Return true if the specified block is in the list.
-static bool isExitBlock(BasicBlock *BB,
- const SmallVectorImpl<BasicBlock*> &ExitBlocks) {
- for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
- if (ExitBlocks[i] == BB)
- return true;
- return false;
-}
+ for (Use &U : Inst.uses()) {
+ Instruction *User = cast<Instruction>(U.getUser());
+ BasicBlock *UserBB = User->getParent();
+ if (PHINode *PN = dyn_cast<PHINode>(User))
+ UserBB = PN->getIncomingBlock(U);
-/// ProcessInstruction - Given an instruction in the loop, check to see if it
-/// has any uses that are outside the current loop. If so, insert LCSSA PHI
-/// nodes and rewrite the uses.
-bool LCSSA::ProcessInstruction(Instruction *Inst,
- const SmallVectorImpl<BasicBlock*> &ExitBlocks) {
- SmallVector<Use*, 16> UsesToRewrite;
-
- BasicBlock *InstBB = Inst->getParent();
-
- for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end();
- UI != E; ++UI) {
- User *U = *UI;
- BasicBlock *UserBB = cast<Instruction>(U)->getParent();
- if (PHINode *PN = dyn_cast<PHINode>(U))
- UserBB = PN->getIncomingBlock(UI);
-
- if (InstBB != UserBB && !L->contains(UserBB))
- UsesToRewrite.push_back(&UI.getUse());
+ if (InstBB != UserBB && !L.contains(UserBB))
+ UsesToRewrite.push_back(&U);
}
// If there are no uses outside the loop, exit with no change.
- if (UsesToRewrite.empty()) return false;
-
+ if (UsesToRewrite.empty())
+ return false;
+
++NumLCSSA; // We are applying the transformation
// Invoke instructions are special in that their result value is not available
- // along their unwind edge. The code below tests to see whether DomBB dominates
+ // along their unwind edge. The code below tests to see whether DomBB
+ // dominates
// the value, so adjust DomBB to the normal destination block, which is
// effectively where the value is first usable.
- BasicBlock *DomBB = Inst->getParent();
- if (InvokeInst *Inv = dyn_cast<InvokeInst>(Inst))
+ BasicBlock *DomBB = Inst.getParent();
+ if (InvokeInst *Inv = dyn_cast<InvokeInst>(&Inst))
DomBB = Inv->getNormalDest();
- DomTreeNode *DomNode = DT->getNode(DomBB);
+ DomTreeNode *DomNode = DT.getNode(DomBB);
- SmallVector<PHINode*, 16> AddedPHIs;
+ SmallVector<PHINode *, 16> AddedPHIs;
SSAUpdater SSAUpdate;
- SSAUpdate.Initialize(Inst->getType(), Inst->getName());
-
+ SSAUpdate.Initialize(Inst.getType(), Inst.getName());
+
// Insert the LCSSA phi's into all of the exit blocks dominated by the
// value, and add them to the Phi's map.
- for (SmallVectorImpl<BasicBlock*>::const_iterator BBI = ExitBlocks.begin(),
- BBE = ExitBlocks.end(); BBI != BBE; ++BBI) {
+ for (SmallVectorImpl<BasicBlock *>::const_iterator BBI = ExitBlocks.begin(),
+ BBE = ExitBlocks.end();
+ BBI != BBE; ++BBI) {
BasicBlock *ExitBB = *BBI;
- if (!DT->dominates(DomNode, DT->getNode(ExitBB))) continue;
-
+ if (!DT.dominates(DomNode, DT.getNode(ExitBB)))
+ continue;
+
// If we already inserted something for this BB, don't reprocess it.
- if (SSAUpdate.HasValueForBlock(ExitBB)) continue;
-
- PHINode *PN = PHINode::Create(Inst->getType(),
- PredCache.GetNumPreds(ExitBB),
- Inst->getName()+".lcssa",
- ExitBB->begin());
+ if (SSAUpdate.HasValueForBlock(ExitBB))
+ continue;
+
+ PHINode *PN = PHINode::Create(Inst.getType(), PredCache.GetNumPreds(ExitBB),
+ Inst.getName() + ".lcssa", ExitBB->begin());
// Add inputs from inside the loop for this PHI.
for (BasicBlock **PI = PredCache.GetPreds(ExitBB); *PI; ++PI) {
- PN->addIncoming(Inst, *PI);
+ PN->addIncoming(&Inst, *PI);
// If the exit block has a predecessor not within the loop, arrange for
// the incoming value use corresponding to that predecessor to be
// rewritten in terms of a different LCSSA PHI.
- if (!L->contains(*PI))
+ if (!L.contains(*PI))
UsesToRewrite.push_back(
- &PN->getOperandUse(
- PN->getOperandNumForIncomingValue(PN->getNumIncomingValues()-1)));
+ &PN->getOperandUse(PN->getOperandNumForIncomingValue(
+ PN->getNumIncomingValues() - 1)));
}
AddedPHIs.push_back(PN);
-
+
// Remember that this phi makes the value alive in this block.
SSAUpdate.AddAvailableValue(ExitBB, PN);
}
@@ -257,15 +144,14 @@ bool LCSSA::ProcessInstruction(Instruction *Inst,
if (PHINode *PN = dyn_cast<PHINode>(User))
UserBB = PN->getIncomingBlock(*UsesToRewrite[i]);
- if (isa<PHINode>(UserBB->begin()) &&
- isExitBlock(UserBB, ExitBlocks)) {
+ if (isa<PHINode>(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) {
// Tell the VHs that the uses changed. This updates SCEV's caches.
if (UsesToRewrite[i]->get()->hasValueHandle())
ValueHandleBase::ValueIsRAUWd(*UsesToRewrite[i], UserBB->begin());
UsesToRewrite[i]->set(UserBB->begin());
continue;
}
-
+
// Otherwise, do full PHI insertion.
SSAUpdate.RewriteUse(*UsesToRewrite[i]);
}
@@ -275,7 +161,154 @@ bool LCSSA::ProcessInstruction(Instruction *Inst,
if (AddedPHIs[i]->use_empty())
AddedPHIs[i]->eraseFromParent();
}
-
+
return true;
}
+/// Return true if the specified block dominates at least
+/// one of the blocks in the specified list.
+static bool
+blockDominatesAnExit(BasicBlock *BB,
+ DominatorTree &DT,
+ const SmallVectorImpl<BasicBlock *> &ExitBlocks) {
+ DomTreeNode *DomNode = DT.getNode(BB);
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+ if (DT.dominates(DomNode, DT.getNode(ExitBlocks[i])))
+ return true;
+
+ return false;
+}
+
+bool llvm::formLCSSA(Loop &L, DominatorTree &DT, ScalarEvolution *SE) {
+ bool Changed = false;
+
+ // Get the set of exiting blocks.
+ SmallVector<BasicBlock *, 8> ExitBlocks;
+ L.getExitBlocks(ExitBlocks);
+
+ if (ExitBlocks.empty())
+ return false;
+
+ PredIteratorCache PredCache;
+
+ // Look at all the instructions in the loop, checking to see if they have uses
+ // outside the loop. If so, rewrite those uses.
+ for (Loop::block_iterator BBI = L.block_begin(), BBE = L.block_end();
+ BBI != BBE; ++BBI) {
+ BasicBlock *BB = *BBI;
+
+ // For large loops, avoid use-scanning by using dominance information: In
+ // particular, if a block does not dominate any of the loop exits, then none
+ // of the values defined in the block could be used outside the loop.
+ if (!blockDominatesAnExit(BB, DT, ExitBlocks))
+ continue;
+
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ // Reject two common cases fast: instructions with no uses (like stores)
+ // and instructions with one use that is in the same block as this.
+ if (I->use_empty() ||
+ (I->hasOneUse() && I->user_back()->getParent() == BB &&
+ !isa<PHINode>(I->user_back())))
+ continue;
+
+ Changed |= processInstruction(L, *I, DT, ExitBlocks, PredCache);
+ }
+ }
+
+ // If we modified the code, remove any caches about the loop from SCEV to
+ // avoid dangling entries.
+ // FIXME: This is a big hammer, can we clear the cache more selectively?
+ if (SE && Changed)
+ SE->forgetLoop(&L);
+
+ assert(L.isLCSSAForm(DT));
+
+ return Changed;
+}
+
+/// Process a loop nest depth first.
+bool llvm::formLCSSARecursively(Loop &L, DominatorTree &DT,
+ ScalarEvolution *SE) {
+ bool Changed = false;
+
+ // Recurse depth-first through inner loops.
+ for (Loop::iterator LI = L.begin(), LE = L.end(); LI != LE; ++LI)
+ Changed |= formLCSSARecursively(**LI, DT, SE);
+
+ Changed |= formLCSSA(L, DT, SE);
+ return Changed;
+}
+
+namespace {
+struct LCSSA : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ LCSSA() : FunctionPass(ID) {
+ initializeLCSSAPass(*PassRegistry::getPassRegistry());
+ }
+
+ // Cached analysis information for the current function.
+ DominatorTree *DT;
+ LoopInfo *LI;
+ ScalarEvolution *SE;
+
+ bool runOnFunction(Function &F) override;
+
+ /// This transformation requires natural loop information & requires that
+ /// loop preheaders be inserted into the CFG. It maintains both of these,
+ /// as well as the CFG. It also requires dominator information.
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfo>();
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addPreserved<AliasAnalysis>();
+ AU.addPreserved<ScalarEvolution>();
+ }
+
+private:
+ bool processLoop(Loop &L);
+
+ void verifyAnalysis() const override;
+};
+}
+
+char LCSSA::ID = 0;
+INITIALIZE_PASS_BEGIN(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_END(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
+
+Pass *llvm::createLCSSAPass() { return new LCSSA(); }
+char &llvm::LCSSAID = LCSSA::ID;
+
+
+/// Process all loops in the function, inner-most out.
+bool LCSSA::runOnFunction(Function &F) {
+ bool Changed = false;
+ LI = &getAnalysis<LoopInfo>();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ SE = getAnalysisIfAvailable<ScalarEvolution>();
+
+ // Simplify each loop nest in the function.
+ for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
+ Changed |= formLCSSARecursively(**I, *DT, SE);
+
+ return Changed;
+}
+
+static void verifyLoop(Loop &L, DominatorTree &DT) {
+ // Recurse depth-first through inner loops.
+ for (Loop::iterator LI = L.begin(), LE = L.end(); LI != LE; ++LI)
+ verifyLoop(**LI, DT);
+
+ // Check the special guarantees that LCSSA makes.
+ //assert(L.isLCSSAForm(DT) && "LCSSA form not preserved!");
+}
+
+void LCSSA::verifyAnalysis() const {
+ // Verify each loop nest in the function, assuming LI still points at that
+ // function's loop info.
+ for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
+ verifyLoop(**I, *DT);
+}
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 2768041..9d0be8b 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -17,15 +17,17 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/DIBuilder.h"
-#include "llvm/DebugInfo.h"
+#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
@@ -35,11 +37,9 @@
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Operator.h"
-#include "llvm/Support/CFG.h"
+#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -127,8 +127,10 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
// dest. If so, eliminate it as an explicit compare.
if (i.getCaseSuccessor() == DefaultDest) {
MDNode* MD = SI->getMetadata(LLVMContext::MD_prof);
- // MD should have 2 + NumCases operands.
- if (MD && MD->getNumOperands() == 2 + SI->getNumCases()) {
+ unsigned NCases = SI->getNumCases();
+ // Fold the case metadata into the default if there will be any branches
+ // left, unless the metadata doesn't match the switch.
+ if (NCases > 1 && MD && MD->getNumOperands() == 2 + NCases) {
// Collect branch weights into a vector.
SmallVector<uint32_t, 8> Weights;
for (unsigned MD_i = 1, MD_e = MD->getNumOperands(); MD_i < MD_e;
@@ -352,8 +354,8 @@ llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V,
/// true when there are no uses or multiple uses that all refer to the same
/// value.
static bool areAllUsesEqual(Instruction *I) {
- Value::use_iterator UI = I->use_begin();
- Value::use_iterator UE = I->use_end();
+ Value::user_iterator UI = I->user_begin();
+ Value::user_iterator UE = I->user_end();
if (UI == UE)
return true;
@@ -374,7 +376,7 @@ bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN,
const TargetLibraryInfo *TLI) {
SmallPtrSet<Instruction*, 4> Visited;
for (Instruction *I = PN; areAllUsesEqual(I) && !I->mayHaveSideEffects();
- I = cast<Instruction>(*I->use_begin())) {
+ I = cast<Instruction>(*I->user_begin())) {
if (I->use_empty())
return RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
@@ -506,11 +508,12 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList());
if (P) {
- DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
- if (DT) {
- BasicBlock *PredBBIDom = DT->getNode(PredBB)->getIDom()->getBlock();
- DT->changeImmediateDominator(DestBB, PredBBIDom);
- DT->eraseNode(PredBB);
+ if (DominatorTreeWrapperPass *DTWP =
+ P->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) {
+ DominatorTree &DT = DTWP->getDomTree();
+ BasicBlock *PredBBIDom = DT.getNode(PredBB)->getIDom()->getBlock();
+ DT.changeImmediateDominator(DestBB, PredBBIDom);
+ DT.eraseNode(PredBB);
}
}
// Nuke BB.
@@ -749,10 +752,9 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {
if (!Succ->getSinglePredecessor()) {
BasicBlock::iterator BBI = BB->begin();
while (isa<PHINode>(*BBI)) {
- for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end();
- UI != E; ++UI) {
- if (PHINode* PN = dyn_cast<PHINode>(*UI)) {
- if (PN->getIncomingBlock(UI) != BB)
+ for (Use &U : BBI->uses()) {
+ if (PHINode* PN = dyn_cast<PHINode>(U.getUser())) {
+ if (PN->getIncomingBlock(U) != BB)
return false;
} else {
return false;
@@ -1034,17 +1036,16 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
bool llvm::LowerDbgDeclare(Function &F) {
DIBuilder DIB(*F.getParent());
SmallVector<DbgDeclareInst *, 4> Dbgs;
- for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
- for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) {
- if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI))
+ for (auto &FI : F)
+ for (BasicBlock::iterator BI : FI)
+ if (auto DDI = dyn_cast<DbgDeclareInst>(BI))
Dbgs.push_back(DDI);
- }
+
if (Dbgs.empty())
return false;
- for (SmallVectorImpl<DbgDeclareInst *>::iterator I = Dbgs.begin(),
- E = Dbgs.end(); I != E; ++I) {
- DbgDeclareInst *DDI = *I;
+ for (auto &I : Dbgs) {
+ DbgDeclareInst *DDI = I;
AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress());
// If this is an alloca for a scalar variable, insert a dbg.value
// at each load and store to the alloca and erase the dbg.declare.
@@ -1053,11 +1054,10 @@ bool llvm::LowerDbgDeclare(Function &F) {
// We only remove the dbg.declare intrinsic if all uses are
// converted to dbg.value intrinsics.
bool RemoveDDI = true;
- for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
- UI != E; ++UI)
- if (StoreInst *SI = dyn_cast<StoreInst>(*UI))
+ for (User *U : AI->users())
+ if (StoreInst *SI = dyn_cast<StoreInst>(U))
ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
- else if (LoadInst *LI = dyn_cast<LoadInst>(*UI))
+ else if (LoadInst *LI = dyn_cast<LoadInst>(U))
ConvertDebugDeclareToDebugValue(DDI, LI, DIB);
else
RemoveDDI = false;
@@ -1072,9 +1072,8 @@ bool llvm::LowerDbgDeclare(Function &F) {
/// alloca 'V', if any.
DbgDeclareInst *llvm::FindAllocaDbgDeclare(Value *V) {
if (MDNode *DebugNode = MDNode::getIfExists(V->getContext(), V))
- for (Value::use_iterator UI = DebugNode->use_begin(),
- E = DebugNode->use_end(); UI != E; ++UI)
- if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI))
+ for (User *U : DebugNode->users())
+ if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
return DDI;
return 0;
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index 6d5f16c..47083ea 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -42,20 +42,21 @@
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/DependenceAnalysis.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Type.h"
-#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -65,303 +66,41 @@ using namespace llvm;
STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted");
STATISTIC(NumNested , "Number of nested loops split out");
-namespace {
- struct LoopSimplify : public LoopPass {
- static char ID; // Pass identification, replacement for typeid
- LoopSimplify() : LoopPass(ID) {
- initializeLoopSimplifyPass(*PassRegistry::getPassRegistry());
- }
-
- // AA - If we have an alias analysis object to update, this is it, otherwise
- // this is null.
- AliasAnalysis *AA;
- LoopInfo *LI;
- DominatorTree *DT;
- ScalarEvolution *SE;
- Loop *L;
- virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- // We need loop information to identify the loops...
- AU.addRequired<DominatorTree>();
- AU.addPreserved<DominatorTree>();
-
- AU.addRequired<LoopInfo>();
- AU.addPreserved<LoopInfo>();
-
- AU.addPreserved<AliasAnalysis>();
- AU.addPreserved<ScalarEvolution>();
- AU.addPreserved<DependenceAnalysis>();
- AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added.
- }
-
- /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees.
- void verifyAnalysis() const;
-
- private:
- bool ProcessLoop(Loop *L, LPPassManager &LPM);
- BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit);
- Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM,
- BasicBlock *Preheader);
- BasicBlock *InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader);
- };
-}
-
-static void PlaceSplitBlockCarefully(BasicBlock *NewBB,
- SmallVectorImpl<BasicBlock*> &SplitPreds,
- Loop *L);
-
-char LoopSimplify::ID = 0;
-INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify",
- "Canonicalize natural loops", true, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
-INITIALIZE_PASS_END(LoopSimplify, "loop-simplify",
- "Canonicalize natural loops", true, false)
-
-// Publicly exposed interface to pass...
-char &llvm::LoopSimplifyID = LoopSimplify::ID;
-Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
-
-/// runOnLoop - Run down all loops in the CFG (recursively, but we could do
-/// it in any convenient order) inserting preheaders...
-///
-bool LoopSimplify::runOnLoop(Loop *l, LPPassManager &LPM) {
- L = l;
- bool Changed = false;
- LI = &getAnalysis<LoopInfo>();
- AA = getAnalysisIfAvailable<AliasAnalysis>();
- DT = &getAnalysis<DominatorTree>();
- SE = getAnalysisIfAvailable<ScalarEvolution>();
-
- Changed |= ProcessLoop(L, LPM);
-
- return Changed;
-}
-
-/// ProcessLoop - Walk the loop structure in depth first order, ensuring that
-/// all loops have preheaders.
-///
-bool LoopSimplify::ProcessLoop(Loop *L, LPPassManager &LPM) {
- bool Changed = false;
-ReprocessLoop:
-
- // Check to see that no blocks (other than the header) in this loop have
- // predecessors that are not in the loop. This is not valid for natural
- // loops, but can occur if the blocks are unreachable. Since they are
- // unreachable we can just shamelessly delete those CFG edges!
- for (Loop::block_iterator BB = L->block_begin(), E = L->block_end();
- BB != E; ++BB) {
- if (*BB == L->getHeader()) continue;
-
- SmallPtrSet<BasicBlock*, 4> BadPreds;
- for (pred_iterator PI = pred_begin(*BB),
- PE = pred_end(*BB); PI != PE; ++PI) {
- BasicBlock *P = *PI;
- if (!L->contains(P))
- BadPreds.insert(P);
- }
-
- // Delete each unique out-of-loop (and thus dead) predecessor.
- for (SmallPtrSet<BasicBlock*, 4>::iterator I = BadPreds.begin(),
- E = BadPreds.end(); I != E; ++I) {
-
- DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor "
- << (*I)->getName() << "\n");
-
- // Inform each successor of each dead pred.
- for (succ_iterator SI = succ_begin(*I), SE = succ_end(*I); SI != SE; ++SI)
- (*SI)->removePredecessor(*I);
- // Zap the dead pred's terminator and replace it with unreachable.
- TerminatorInst *TI = (*I)->getTerminator();
- TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
- (*I)->getTerminator()->eraseFromParent();
- new UnreachableInst((*I)->getContext(), *I);
- Changed = true;
- }
- }
-
- // If there are exiting blocks with branches on undef, resolve the undef in
- // the direction which will exit the loop. This will help simplify loop
- // trip count computations.
- SmallVector<BasicBlock*, 8> ExitingBlocks;
- L->getExitingBlocks(ExitingBlocks);
- for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
- E = ExitingBlocks.end(); I != E; ++I)
- if (BranchInst *BI = dyn_cast<BranchInst>((*I)->getTerminator()))
- if (BI->isConditional()) {
- if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) {
-
- DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in "
- << (*I)->getName() << "\n");
-
- BI->setCondition(ConstantInt::get(Cond->getType(),
- !L->contains(BI->getSuccessor(0))));
-
- // This may make the loop analyzable, force SCEV recomputation.
- if (SE)
- SE->forgetLoop(L);
-
- Changed = true;
- }
- }
-
- // Does the loop already have a preheader? If so, don't insert one.
- BasicBlock *Preheader = L->getLoopPreheader();
- if (!Preheader) {
- Preheader = InsertPreheaderForLoop(L, this);
- if (Preheader) {
- ++NumInserted;
- Changed = true;
- }
- }
-
- // Next, check to make sure that all exit nodes of the loop only have
- // predecessors that are inside of the loop. This check guarantees that the
- // loop preheader/header will dominate the exit blocks. If the exit block has
- // predecessors from outside of the loop, split the edge now.
- SmallVector<BasicBlock*, 8> ExitBlocks;
- L->getExitBlocks(ExitBlocks);
-
- SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(),
- ExitBlocks.end());
- for (SmallSetVector<BasicBlock *, 8>::iterator I = ExitBlockSet.begin(),
- E = ExitBlockSet.end(); I != E; ++I) {
- BasicBlock *ExitBlock = *I;
- for (pred_iterator PI = pred_begin(ExitBlock), PE = pred_end(ExitBlock);
- PI != PE; ++PI)
- // Must be exactly this loop: no subloops, parent loops, or non-loop preds
- // allowed.
- if (!L->contains(*PI)) {
- if (RewriteLoopExitBlock(L, ExitBlock)) {
- ++NumInserted;
- Changed = true;
- }
- break;
- }
- }
-
- // If the header has more than two predecessors at this point (from the
- // preheader and from multiple backedges), we must adjust the loop.
- BasicBlock *LoopLatch = L->getLoopLatch();
- if (!LoopLatch) {
- // If this is really a nested loop, rip it out into a child loop. Don't do
- // this for loops with a giant number of backedges, just factor them into a
- // common backedge instead.
- if (L->getNumBackEdges() < 8) {
- if (SeparateNestedLoop(L, LPM, Preheader)) {
- ++NumNested;
- // This is a big restructuring change, reprocess the whole loop.
- Changed = true;
- // GCC doesn't tail recursion eliminate this.
- goto ReprocessLoop;
- }
- }
-
- // If we either couldn't, or didn't want to, identify nesting of the loops,
- // insert a new block that all backedges target, then make it jump to the
- // loop header.
- LoopLatch = InsertUniqueBackedgeBlock(L, Preheader);
- if (LoopLatch) {
- ++NumInserted;
- Changed = true;
- }
+// If the block isn't already, move the new block to right after some 'outside
+// block' block. This prevents the preheader from being placed inside the loop
+// body, e.g. when the loop hasn't been rotated.
+static void placeSplitBlockCarefully(BasicBlock *NewBB,
+ SmallVectorImpl<BasicBlock *> &SplitPreds,
+ Loop *L) {
+ // Check to see if NewBB is already well placed.
+ Function::iterator BBI = NewBB; --BBI;
+ for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
+ if (&*BBI == SplitPreds[i])
+ return;
}
- // Scan over the PHI nodes in the loop header. Since they now have only two
- // incoming values (the loop is canonicalized), we may have simplified the PHI
- // down to 'X = phi [X, Y]', which should be replaced with 'Y'.
- PHINode *PN;
- for (BasicBlock::iterator I = L->getHeader()->begin();
- (PN = dyn_cast<PHINode>(I++)); )
- if (Value *V = SimplifyInstruction(PN, 0, 0, DT)) {
- if (AA) AA->deleteValue(PN);
- if (SE) SE->forgetValue(PN);
- PN->replaceAllUsesWith(V);
- PN->eraseFromParent();
- }
-
- // If this loop has multiple exits and the exits all go to the same
- // block, attempt to merge the exits. This helps several passes, such
- // as LoopRotation, which do not support loops with multiple exits.
- // SimplifyCFG also does this (and this code uses the same utility
- // function), however this code is loop-aware, where SimplifyCFG is
- // not. That gives it the advantage of being able to hoist
- // loop-invariant instructions out of the way to open up more
- // opportunities, and the disadvantage of having the responsibility
- // to preserve dominator information.
- bool UniqueExit = true;
- if (!ExitBlocks.empty())
- for (unsigned i = 1, e = ExitBlocks.size(); i != e; ++i)
- if (ExitBlocks[i] != ExitBlocks[0]) {
- UniqueExit = false;
- break;
- }
- if (UniqueExit) {
- for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
- BasicBlock *ExitingBlock = ExitingBlocks[i];
- if (!ExitingBlock->getSinglePredecessor()) continue;
- BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
- if (!BI || !BI->isConditional()) continue;
- CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
- if (!CI || CI->getParent() != ExitingBlock) continue;
-
- // Attempt to hoist out all instructions except for the
- // comparison and the branch.
- bool AllInvariant = true;
- for (BasicBlock::iterator I = ExitingBlock->begin(); &*I != BI; ) {
- Instruction *Inst = I++;
- // Skip debug info intrinsics.
- if (isa<DbgInfoIntrinsic>(Inst))
- continue;
- if (Inst == CI)
- continue;
- if (!L->makeLoopInvariant(Inst, Changed,
- Preheader ? Preheader->getTerminator() : 0)) {
- AllInvariant = false;
- break;
- }
- }
- if (!AllInvariant) continue;
-
- // The block has now been cleared of all instructions except for
- // a comparison and a conditional branch. SimplifyCFG may be able
- // to fold it now.
- if (!FoldBranchToCommonDest(BI)) continue;
-
- // Success. The block is now dead, so remove it from the loop,
- // update the dominator tree and delete it.
- DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block "
- << ExitingBlock->getName() << "\n");
-
- // If any reachable control flow within this loop has changed, notify
- // ScalarEvolution. Currently assume the parent loop doesn't change
- // (spliting edges doesn't count). If blocks, CFG edges, or other values
- // in the parent loop change, then we need call to forgetLoop() for the
- // parent instead.
- if (SE)
- SE->forgetLoop(L);
-
- assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock));
- Changed = true;
- LI->removeBlock(ExitingBlock);
-
- DomTreeNode *Node = DT->getNode(ExitingBlock);
- const std::vector<DomTreeNodeBase<BasicBlock> *> &Children =
- Node->getChildren();
- while (!Children.empty()) {
- DomTreeNode *Child = Children.front();
- DT->changeImmediateDominator(Child, Node->getIDom());
- }
- DT->eraseNode(ExitingBlock);
+ // If it isn't already after an outside block, move it after one. This is
+ // always good as it makes the uncond branch from the outside block into a
+ // fall-through.
- BI->getSuccessor(0)->removePredecessor(ExitingBlock);
- BI->getSuccessor(1)->removePredecessor(ExitingBlock);
- ExitingBlock->eraseFromParent();
+ // Figure out *which* outside block to put this after. Prefer an outside
+ // block that neighbors a BB actually in the loop.
+ BasicBlock *FoundBB = 0;
+ for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
+ Function::iterator BBI = SplitPreds[i];
+ if (++BBI != NewBB->getParent()->end() &&
+ L->contains(BBI)) {
+ FoundBB = SplitPreds[i];
+ break;
}
}
- return Changed;
+ // If our heuristic for a *good* bb to place this after doesn't find
+ // anything, just pick something. It's likely better than leaving it within
+ // the loop.
+ if (!FoundBB)
+ FoundBB = SplitPreds[0];
+ NewBB->moveAfter(FoundBB);
}
/// InsertPreheaderForLoop - Once we discover that a loop doesn't have a
@@ -406,15 +145,16 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) {
// Make sure that NewBB is put someplace intelligent, which doesn't mess up
// code layout too horribly.
- PlaceSplitBlockCarefully(PreheaderBB, OutsideBlocks, L);
+ placeSplitBlockCarefully(PreheaderBB, OutsideBlocks, L);
return PreheaderBB;
}
-/// RewriteLoopExitBlock - Ensure that the loop preheader dominates all exit
-/// blocks. This method is used to split exit blocks that have predecessors
-/// outside of the loop.
-BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) {
+/// \brief Ensure that the loop preheader dominates all exit blocks.
+///
+/// This method is used to split exit blocks that have predecessors outside of
+/// the loop.
+static BasicBlock *rewriteLoopExitBlock(Loop *L, BasicBlock *Exit, Pass *PP) {
SmallVector<BasicBlock*, 8> LoopBlocks;
for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) {
BasicBlock *P = *I;
@@ -434,10 +174,10 @@ BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) {
SplitLandingPadPredecessors(Exit, ArrayRef<BasicBlock*>(&LoopBlocks[0],
LoopBlocks.size()),
".loopexit", ".nonloopexit",
- this, NewBBs);
+ PP, NewBBs);
NewExitBB = NewBBs[0];
} else {
- NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", this);
+ NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", PP);
}
DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
@@ -445,29 +185,29 @@ BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) {
return NewExitBB;
}
-/// AddBlockAndPredsToSet - Add the specified block, and all of its
-/// predecessors, to the specified set, if it's not already in there. Stop
-/// predecessor traversal when we reach StopBlock.
-static void AddBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock,
+/// Add the specified block, and all of its predecessors, to the specified set,
+/// if it's not already in there. Stop predecessor traversal when we reach
+/// StopBlock.
+static void addBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock,
std::set<BasicBlock*> &Blocks) {
- std::vector<BasicBlock *> WorkList;
- WorkList.push_back(InputBB);
+ SmallVector<BasicBlock *, 8> Worklist;
+ Worklist.push_back(InputBB);
do {
- BasicBlock *BB = WorkList.back(); WorkList.pop_back();
+ BasicBlock *BB = Worklist.pop_back_val();
if (Blocks.insert(BB).second && BB != StopBlock)
// If BB is not already processed and it is not a stop block then
// insert its predecessor in the work list
for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
BasicBlock *WBB = *I;
- WorkList.push_back(WBB);
+ Worklist.push_back(WBB);
}
- } while(!WorkList.empty());
+ } while (!Worklist.empty());
}
-/// FindPHIToPartitionLoops - The first part of loop-nestification is to find a
-/// PHI node that tells us how to partition the loops.
-static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT,
- AliasAnalysis *AA, LoopInfo *LI) {
+/// \brief The first part of loop-nestification is to find a PHI node that tells
+/// us how to partition the loops.
+static PHINode *findPHIToPartitionLoops(Loop *L, AliasAnalysis *AA,
+ DominatorTree *DT) {
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) {
PHINode *PN = cast<PHINode>(I);
++I;
@@ -489,46 +229,10 @@ static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT,
return 0;
}
-// PlaceSplitBlockCarefully - If the block isn't already, move the new block to
-// right after some 'outside block' block. This prevents the preheader from
-// being placed inside the loop body, e.g. when the loop hasn't been rotated.
-void PlaceSplitBlockCarefully(BasicBlock *NewBB,
- SmallVectorImpl<BasicBlock*> &SplitPreds,
- Loop *L) {
- // Check to see if NewBB is already well placed.
- Function::iterator BBI = NewBB; --BBI;
- for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
- if (&*BBI == SplitPreds[i])
- return;
- }
-
- // If it isn't already after an outside block, move it after one. This is
- // always good as it makes the uncond branch from the outside block into a
- // fall-through.
-
- // Figure out *which* outside block to put this after. Prefer an outside
- // block that neighbors a BB actually in the loop.
- BasicBlock *FoundBB = 0;
- for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
- Function::iterator BBI = SplitPreds[i];
- if (++BBI != NewBB->getParent()->end() &&
- L->contains(BBI)) {
- FoundBB = SplitPreds[i];
- break;
- }
- }
-
- // If our heuristic for a *good* bb to place this after doesn't find
- // anything, just pick something. It's likely better than leaving it within
- // the loop.
- if (!FoundBB)
- FoundBB = SplitPreds[0];
- NewBB->moveAfter(FoundBB);
-}
-
-
-/// SeparateNestedLoop - If this loop has multiple backedges, try to pull one of
-/// them out into a nested loop. This is important for code that looks like
+/// \brief If this loop has multiple backedges, try to pull one of them out into
+/// a nested loop.
+///
+/// This is important for code that looks like
/// this:
///
/// Loop:
@@ -544,8 +248,9 @@ void PlaceSplitBlockCarefully(BasicBlock *NewBB,
/// If we are able to separate out a loop, return the new outer loop that was
/// created.
///
-Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM,
- BasicBlock *Preheader) {
+static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
+ AliasAnalysis *AA, DominatorTree *DT,
+ LoopInfo *LI, ScalarEvolution *SE, Pass *PP) {
// Don't try to separate loops without a preheader.
if (!Preheader)
return 0;
@@ -554,7 +259,7 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM,
assert(!L->getHeader()->isLandingPad() &&
"Can't insert backedge to landing pad");
- PHINode *PN = FindPHIToPartitionLoops(L, DT, AA, LI);
+ PHINode *PN = findPHIToPartitionLoops(L, AA, DT);
if (PN == 0) return 0; // No known way to partition.
// Pull out all predecessors that have varying values in the loop. This
@@ -580,11 +285,11 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM,
BasicBlock *Header = L->getHeader();
BasicBlock *NewBB =
- SplitBlockPredecessors(Header, OuterLoopPreds, ".outer", this);
+ SplitBlockPredecessors(Header, OuterLoopPreds, ".outer", PP);
// Make sure that NewBB is put someplace intelligent, which doesn't mess up
// code layout too horribly.
- PlaceSplitBlockCarefully(NewBB, OuterLoopPreds, L);
+ placeSplitBlockCarefully(NewBB, OuterLoopPreds, L);
// Create the new outer loop.
Loop *NewOuter = new Loop();
@@ -598,9 +303,6 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM,
// L is now a subloop of our outer loop.
NewOuter->addChildLoop(L);
- // Add the new loop to the pass manager queue.
- LPM.insertLoopIntoQueue(NewOuter);
-
for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
I != E; ++I)
NewOuter->addBlockEntry(*I);
@@ -615,7 +317,7 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM,
for (pred_iterator PI=pred_begin(Header), E = pred_end(Header); PI!=E; ++PI) {
BasicBlock *P = *PI;
if (DT->dominates(Header, P))
- AddBlockAndPredsToSet(P, Header, BlocksInL);
+ addBlockAndPredsToSet(P, Header, BlocksInL);
}
// Scan all of the loop children of L, moving them to OuterLoop if they are
@@ -643,15 +345,15 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM,
return NewOuter;
}
-
-
-/// InsertUniqueBackedgeBlock - This method is called when the specified loop
-/// has more than one backedge in it. If this occurs, revector all of these
-/// backedges to target a new basic block and have that block branch to the loop
-/// header. This ensures that loops have exactly one backedge.
+/// \brief This method is called when the specified loop has more than one
+/// backedge in it.
///
-BasicBlock *
-LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
+/// If this occurs, revector all of these backedges to target a new basic block
+/// and have that block branch to the loop header. This ensures that loops
+/// have exactly one backedge.
+static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
+ AliasAnalysis *AA,
+ DominatorTree *DT, LoopInfo *LI) {
assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!");
// Get information about the loop
@@ -762,7 +464,349 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
return BEBlock;
}
-void LoopSimplify::verifyAnalysis() const {
+/// \brief Simplify one loop and queue further loops for simplification.
+///
+/// FIXME: Currently this accepts both lots of analyses that it uses and a raw
+/// Pass pointer. The Pass pointer is used by numerous utilities to update
+/// specific analyses. Rather than a pass it would be much cleaner and more
+/// explicit if they accepted the analysis directly and then updated it.
+static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist,
+ AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI,
+ ScalarEvolution *SE, Pass *PP) {
+ bool Changed = false;
+ReprocessLoop:
+
+ // Check to see that no blocks (other than the header) in this loop have
+ // predecessors that are not in the loop. This is not valid for natural
+ // loops, but can occur if the blocks are unreachable. Since they are
+ // unreachable we can just shamelessly delete those CFG edges!
+ for (Loop::block_iterator BB = L->block_begin(), E = L->block_end();
+ BB != E; ++BB) {
+ if (*BB == L->getHeader()) continue;
+
+ SmallPtrSet<BasicBlock*, 4> BadPreds;
+ for (pred_iterator PI = pred_begin(*BB),
+ PE = pred_end(*BB); PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ if (!L->contains(P))
+ BadPreds.insert(P);
+ }
+
+ // Delete each unique out-of-loop (and thus dead) predecessor.
+ for (SmallPtrSet<BasicBlock*, 4>::iterator I = BadPreds.begin(),
+ E = BadPreds.end(); I != E; ++I) {
+
+ DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor "
+ << (*I)->getName() << "\n");
+
+ // Inform each successor of each dead pred.
+ for (succ_iterator SI = succ_begin(*I), SE = succ_end(*I); SI != SE; ++SI)
+ (*SI)->removePredecessor(*I);
+ // Zap the dead pred's terminator and replace it with unreachable.
+ TerminatorInst *TI = (*I)->getTerminator();
+ TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
+ (*I)->getTerminator()->eraseFromParent();
+ new UnreachableInst((*I)->getContext(), *I);
+ Changed = true;
+ }
+ }
+
+ // If there are exiting blocks with branches on undef, resolve the undef in
+ // the direction which will exit the loop. This will help simplify loop
+ // trip count computations.
+ SmallVector<BasicBlock*, 8> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+ for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
+ E = ExitingBlocks.end(); I != E; ++I)
+ if (BranchInst *BI = dyn_cast<BranchInst>((*I)->getTerminator()))
+ if (BI->isConditional()) {
+ if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) {
+
+ DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in "
+ << (*I)->getName() << "\n");
+
+ BI->setCondition(ConstantInt::get(Cond->getType(),
+ !L->contains(BI->getSuccessor(0))));
+
+ // This may make the loop analyzable, force SCEV recomputation.
+ if (SE)
+ SE->forgetLoop(L);
+
+ Changed = true;
+ }
+ }
+
+ // Does the loop already have a preheader? If so, don't insert one.
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) {
+ Preheader = InsertPreheaderForLoop(L, PP);
+ if (Preheader) {
+ ++NumInserted;
+ Changed = true;
+ }
+ }
+
+ // Next, check to make sure that all exit nodes of the loop only have
+ // predecessors that are inside of the loop. This check guarantees that the
+ // loop preheader/header will dominate the exit blocks. If the exit block has
+ // predecessors from outside of the loop, split the edge now.
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ L->getExitBlocks(ExitBlocks);
+
+ SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(),
+ ExitBlocks.end());
+ for (SmallSetVector<BasicBlock *, 8>::iterator I = ExitBlockSet.begin(),
+ E = ExitBlockSet.end(); I != E; ++I) {
+ BasicBlock *ExitBlock = *I;
+ for (pred_iterator PI = pred_begin(ExitBlock), PE = pred_end(ExitBlock);
+ PI != PE; ++PI)
+ // Must be exactly this loop: no subloops, parent loops, or non-loop preds
+ // allowed.
+ if (!L->contains(*PI)) {
+ if (rewriteLoopExitBlock(L, ExitBlock, PP)) {
+ ++NumInserted;
+ Changed = true;
+ }
+ break;
+ }
+ }
+
+ // If the header has more than two predecessors at this point (from the
+ // preheader and from multiple backedges), we must adjust the loop.
+ BasicBlock *LoopLatch = L->getLoopLatch();
+ if (!LoopLatch) {
+ // If this is really a nested loop, rip it out into a child loop. Don't do
+ // this for loops with a giant number of backedges, just factor them into a
+ // common backedge instead.
+ if (L->getNumBackEdges() < 8) {
+ if (Loop *OuterL = separateNestedLoop(L, Preheader, AA, DT, LI, SE, PP)) {
+ ++NumNested;
+ // Enqueue the outer loop as it should be processed next in our
+ // depth-first nest walk.
+ Worklist.push_back(OuterL);
+
+ // This is a big restructuring change, reprocess the whole loop.
+ Changed = true;
+ // GCC doesn't tail recursion eliminate this.
+ // FIXME: It isn't clear we can't rely on LLVM to TRE this.
+ goto ReprocessLoop;
+ }
+ }
+
+ // If we either couldn't, or didn't want to, identify nesting of the loops,
+ // insert a new block that all backedges target, then make it jump to the
+ // loop header.
+ LoopLatch = insertUniqueBackedgeBlock(L, Preheader, AA, DT, LI);
+ if (LoopLatch) {
+ ++NumInserted;
+ Changed = true;
+ }
+ }
+
+ // Scan over the PHI nodes in the loop header. Since they now have only two
+ // incoming values (the loop is canonicalized), we may have simplified the PHI
+ // down to 'X = phi [X, Y]', which should be replaced with 'Y'.
+ PHINode *PN;
+ for (BasicBlock::iterator I = L->getHeader()->begin();
+ (PN = dyn_cast<PHINode>(I++)); )
+ if (Value *V = SimplifyInstruction(PN, 0, 0, DT)) {
+ if (AA) AA->deleteValue(PN);
+ if (SE) SE->forgetValue(PN);
+ PN->replaceAllUsesWith(V);
+ PN->eraseFromParent();
+ }
+
+ // If this loop has multiple exits and the exits all go to the same
+ // block, attempt to merge the exits. This helps several passes, such
+ // as LoopRotation, which do not support loops with multiple exits.
+ // SimplifyCFG also does this (and this code uses the same utility
+ // function), however this code is loop-aware, where SimplifyCFG is
+ // not. That gives it the advantage of being able to hoist
+ // loop-invariant instructions out of the way to open up more
+ // opportunities, and the disadvantage of having the responsibility
+ // to preserve dominator information.
+ bool UniqueExit = true;
+ if (!ExitBlocks.empty())
+ for (unsigned i = 1, e = ExitBlocks.size(); i != e; ++i)
+ if (ExitBlocks[i] != ExitBlocks[0]) {
+ UniqueExit = false;
+ break;
+ }
+ if (UniqueExit) {
+ for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitingBlock = ExitingBlocks[i];
+ if (!ExitingBlock->getSinglePredecessor()) continue;
+ BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+ if (!BI || !BI->isConditional()) continue;
+ CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
+ if (!CI || CI->getParent() != ExitingBlock) continue;
+
+ // Attempt to hoist out all instructions except for the
+ // comparison and the branch.
+ bool AllInvariant = true;
+ bool AnyInvariant = false;
+ for (BasicBlock::iterator I = ExitingBlock->begin(); &*I != BI; ) {
+ Instruction *Inst = I++;
+ // Skip debug info intrinsics.
+ if (isa<DbgInfoIntrinsic>(Inst))
+ continue;
+ if (Inst == CI)
+ continue;
+ if (!L->makeLoopInvariant(Inst, AnyInvariant,
+ Preheader ? Preheader->getTerminator() : 0)) {
+ AllInvariant = false;
+ break;
+ }
+ }
+ if (AnyInvariant) {
+ Changed = true;
+ // The loop disposition of all SCEV expressions that depend on any
+ // hoisted values have also changed.
+ if (SE)
+ SE->forgetLoopDispositions(L);
+ }
+ if (!AllInvariant) continue;
+
+ // The block has now been cleared of all instructions except for
+ // a comparison and a conditional branch. SimplifyCFG may be able
+ // to fold it now.
+ if (!FoldBranchToCommonDest(BI)) continue;
+
+ // Success. The block is now dead, so remove it from the loop,
+ // update the dominator tree and delete it.
+ DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block "
+ << ExitingBlock->getName() << "\n");
+
+ // Notify ScalarEvolution before deleting this block. Currently assume the
+ // parent loop doesn't change (spliting edges doesn't count). If blocks,
+ // CFG edges, or other values in the parent loop change, then we need call
+ // to forgetLoop() for the parent instead.
+ if (SE)
+ SE->forgetLoop(L);
+
+ assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock));
+ Changed = true;
+ LI->removeBlock(ExitingBlock);
+
+ DomTreeNode *Node = DT->getNode(ExitingBlock);
+ const std::vector<DomTreeNodeBase<BasicBlock> *> &Children =
+ Node->getChildren();
+ while (!Children.empty()) {
+ DomTreeNode *Child = Children.front();
+ DT->changeImmediateDominator(Child, Node->getIDom());
+ }
+ DT->eraseNode(ExitingBlock);
+
+ BI->getSuccessor(0)->removePredecessor(ExitingBlock);
+ BI->getSuccessor(1)->removePredecessor(ExitingBlock);
+ ExitingBlock->eraseFromParent();
+ }
+ }
+
+ return Changed;
+}
+
+bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP,
+ AliasAnalysis *AA, ScalarEvolution *SE) {
+ bool Changed = false;
+
+ // Worklist maintains our depth-first queue of loops in this nest to process.
+ SmallVector<Loop *, 4> Worklist;
+ Worklist.push_back(L);
+
+ // Walk the worklist from front to back, pushing newly found sub loops onto
+ // the back. This will let us process loops from back to front in depth-first
+ // order. We can use this simple process because loops form a tree.
+ for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) {
+ Loop *L2 = Worklist[Idx];
+ for (Loop::iterator I = L2->begin(), E = L2->end(); I != E; ++I)
+ Worklist.push_back(*I);
+ }
+
+ while (!Worklist.empty())
+ Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, AA, DT, LI, SE, PP);
+
+ return Changed;
+}
+
+namespace {
+ struct LoopSimplify : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ LoopSimplify() : FunctionPass(ID) {
+ initializeLoopSimplifyPass(*PassRegistry::getPassRegistry());
+ }
+
+ // AA - If we have an alias analysis object to update, this is it, otherwise
+ // this is null.
+ AliasAnalysis *AA;
+ DominatorTree *DT;
+ LoopInfo *LI;
+ ScalarEvolution *SE;
+
+ bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ // We need loop information to identify the loops...
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<LoopInfo>();
+
+ AU.addPreserved<AliasAnalysis>();
+ AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved<DependenceAnalysis>();
+ AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added.
+ }
+
+ /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees.
+ void verifyAnalysis() const override;
+
+ private:
+ bool ProcessLoop(Loop *L);
+ BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit);
+ Loop *SeparateNestedLoop(Loop *L, BasicBlock *Preheader);
+ BasicBlock *InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader);
+ };
+}
+
+char LoopSimplify::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify",
+ "Canonicalize natural loops", true, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_END(LoopSimplify, "loop-simplify",
+ "Canonicalize natural loops", true, false)
+
+// Publicly exposed interface to pass...
+char &llvm::LoopSimplifyID = LoopSimplify::ID;
+Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
+
+/// runOnLoop - Run down all loops in the CFG (recursively, but we could do
+/// it in any convenient order) inserting preheaders...
+///
+bool LoopSimplify::runOnFunction(Function &F) {
+ bool Changed = false;
+ AA = getAnalysisIfAvailable<AliasAnalysis>();
+ LI = &getAnalysis<LoopInfo>();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ SE = getAnalysisIfAvailable<ScalarEvolution>();
+
+ // Simplify each loop nest in the function.
+ for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
+ Changed |= simplifyLoop(*I, DT, LI, this, AA, SE);
+
+ return Changed;
+}
+
+// FIXME: Restore this code when we re-enable verification in verifyAnalysis
+// below.
+#if 0
+static void verifyLoop(Loop *L) {
+ // Verify subloops.
+ for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+ verifyLoop(*I);
+
// It used to be possible to just assert L->isLoopSimplifyForm(), however
// with the introduction of indirectbr, there are now cases where it's
// not possible to transform a loop as necessary. We can at least check
@@ -799,3 +843,15 @@ void LoopSimplify::verifyAnalysis() const {
(void)HasIndBrExiting;
}
}
+#endif
+
+void LoopSimplify::verifyAnalysis() const {
+ // FIXME: This routine is being called mid-way through the loop pass manager
+ // as loop passes destroy this analysis. That's actually fine, but we have no
+ // way of expressing that here. Once all of the passes that destroy this are
+ // hoisted out of the loop pass manager we can add back verification here.
+#if 0
+ for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
+ verifyLoop(*I);
+#endif
+}
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index 162807d..d2dfc20 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -24,11 +24,13 @@
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
using namespace llvm;
@@ -137,10 +139,10 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI,
/// removed from the LoopPassManager as well. LPM can also be NULL.
///
/// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are
-/// available it must also preserve those analyses.
+/// available from the Pass it must also preserve those analyses.
bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
bool AllowRuntime, unsigned TripMultiple,
- LoopInfo *LI, LPPassManager *LPM) {
+ LoopInfo *LI, Pass *PP, LPPassManager *LPM) {
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) {
DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n");
@@ -208,8 +210,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
// Notify ScalarEvolution that the loop will be substantially changed,
// if not outright eliminated.
- if (LPM) {
- ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
+ if (PP) {
+ ScalarEvolution *SE = PP->getAnalysisIfAvailable<ScalarEvolution>();
if (SE)
SE->forgetLoop(L);
}
@@ -409,14 +411,18 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
}
}
- if (LPM) {
+ DominatorTree *DT = 0;
+ if (PP) {
// FIXME: Reconstruct dom info, because it is not preserved properly.
// Incrementally updating domtree after loop unrolling would be easy.
- if (DominatorTree *DT = LPM->getAnalysisIfAvailable<DominatorTree>())
- DT->runOnFunction(*L->getHeader()->getParent());
+ if (DominatorTreeWrapperPass *DTWP =
+ PP->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) {
+ DT = &DTWP->getDomTree();
+ DT->recalculate(*L->getHeader()->getParent());
+ }
// Simplify any new induction variables in the partially unrolled loop.
- ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
+ ScalarEvolution *SE = PP->getAnalysisIfAvailable<ScalarEvolution>();
if (SE && !CompletelyUnroll) {
SmallVector<WeakVH, 16> DeadInsts;
simplifyLoopIVs(L, SE, LPM, DeadInsts);
@@ -449,9 +455,25 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
NumCompletelyUnrolled += CompletelyUnroll;
++NumUnrolled;
+
+ Loop *OuterL = L->getParentLoop();
// Remove the loop from the LoopPassManager if it's completely removed.
if (CompletelyUnroll && LPM != NULL)
LPM->deleteLoopFromQueue(L);
+ // If we have a pass and a DominatorTree we should re-simplify impacted loops
+ // to ensure subsequent analyses can rely on this form. We want to simplify
+ // at least one layer outside of the loop that was unrolled so that any
+ // changes to the parent loop exposed by the unrolling are considered.
+ if (PP && DT) {
+ if (!OuterL && !CompletelyUnroll)
+ OuterL = L;
+ if (OuterL) {
+ ScalarEvolution *SE = PP->getAnalysisIfAvailable<ScalarEvolution>();
+ simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ 0, SE);
+ formLCSSARecursively(*OuterL, *DT, SE);
+ }
+ }
+
return true;
}
diff --git a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
index e017f50..3e61289 100644
--- a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
+++ b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
@@ -52,7 +52,7 @@ namespace {
initializeLowerExpectIntrinsicPass(*PassRegistry::getPassRegistry());
}
- bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
};
}
@@ -94,15 +94,25 @@ bool LowerExpectIntrinsic::HandleIfExpect(BranchInst *BI) {
return false;
// Handle non-optimized IR code like:
- // %expval = call i64 @llvm.expect.i64.i64(i64 %conv1, i64 1)
+ // %expval = call i64 @llvm.expect.i64(i64 %conv1, i64 1)
// %tobool = icmp ne i64 %expval, 0
// br i1 %tobool, label %if.then, label %if.end
+ //
+ // Or the following simpler case:
+ // %expval = call i1 @llvm.expect.i1(i1 %cmp, i1 1)
+ // br i1 %expval, label %if.then, label %if.end
+
+ CallInst *CI;
ICmpInst *CmpI = dyn_cast<ICmpInst>(BI->getCondition());
- if (!CmpI || CmpI->getPredicate() != CmpInst::ICMP_NE)
- return false;
+ if (!CmpI) {
+ CI = dyn_cast<CallInst>(BI->getCondition());
+ } else {
+ if (CmpI->getPredicate() != CmpInst::ICMP_NE)
+ return false;
+ CI = dyn_cast<CallInst>(CmpI->getOperand(0));
+ }
- CallInst *CI = dyn_cast<CallInst>(CmpI->getOperand(0));
if (!CI)
return false;
@@ -127,7 +137,10 @@ bool LowerExpectIntrinsic::HandleIfExpect(BranchInst *BI) {
BI->setMetadata(LLVMContext::MD_prof, Node);
- CmpI->setOperand(0, ArgValue);
+ if (CmpI)
+ CmpI->setOperand(0, ArgValue);
+ else
+ BI->setCondition(ArgValue);
return true;
}
diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp
index 9799a30..b1f758e 100644
--- a/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/lib/Transforms/Utils/LowerInvoke.cpp
@@ -1,4 +1,4 @@
-//===- LowerInvoke.cpp - Eliminate Invoke & Unwind instructions -----------===//
+//===- LowerInvoke.cpp - Eliminate Invoke instructions --------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,29 +8,9 @@
//===----------------------------------------------------------------------===//
//
// This transformation is designed for use by code generators which do not yet
-// support stack unwinding. This pass supports two models of exception handling
-// lowering, the 'cheap' support and the 'expensive' support.
-//
-// 'Cheap' exception handling support gives the program the ability to execute
-// any program which does not "throw an exception", by turning 'invoke'
-// instructions into calls and by turning 'unwind' instructions into calls to
-// abort(). If the program does dynamically use the unwind instruction, the
-// program will print a message then abort.
-//
-// 'Expensive' exception handling support gives the full exception handling
-// support to the program at the cost of making the 'invoke' instruction
-// really expensive. It basically inserts setjmp/longjmp calls to emulate the
-// exception handling as necessary.
-//
-// Because the 'expensive' support slows down programs a lot, and EH is only
-// used for a subset of the programs, it must be specifically enabled by an
-// option.
-//
-// Note that after this pass runs the CFG is not entirely accurate (exceptional
-// control flow edges are not correct anymore) so only very simple things should
-// be done after the lowerinvoke pass has run (like generation of native code).
-// This should not be used as a general purpose "my LLVM-to-LLVM pass doesn't
-// support the invoke instruction yet" lowering pass.
+// support stack unwinding. This pass converts 'invoke' instructions to 'call'
+// instructions, so that any exception-handling 'landingpad' blocks become dead
+// code (which can be removed by running the '-simplifycfg' pass afterwards).
//
//===----------------------------------------------------------------------===//
@@ -38,64 +18,23 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include <csetjmp>
-#include <set>
using namespace llvm;
STATISTIC(NumInvokes, "Number of invokes replaced");
-STATISTIC(NumSpilled, "Number of registers live across unwind edges");
-
-static cl::opt<bool> ExpensiveEHSupport("enable-correct-eh-support",
- cl::desc("Make the -lowerinvoke pass insert expensive, but correct, EH code"));
namespace {
class LowerInvoke : public FunctionPass {
- const TargetMachine *TM;
-
- // Used for both models.
- Constant *AbortFn;
-
- // Used for expensive EH support.
- StructType *JBLinkTy;
- GlobalVariable *JBListHead;
- Constant *SetJmpFn, *LongJmpFn, *StackSaveFn, *StackRestoreFn;
- bool useExpensiveEHSupport;
-
public:
static char ID; // Pass identification, replacement for typeid
- explicit LowerInvoke(const TargetMachine *TM = 0,
- bool useExpensiveEHSupport = ExpensiveEHSupport)
- : FunctionPass(ID), TM(TM),
- useExpensiveEHSupport(useExpensiveEHSupport) {
+ explicit LowerInvoke() : FunctionPass(ID) {
initializeLowerInvokePass(*PassRegistry::getPassRegistry());
}
- bool doInitialization(Module &M);
- bool runOnFunction(Function &F);
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- // This is a cluster of orthogonal Transforms
- AU.addPreserved("mem2reg");
- AU.addPreservedID(LowerSwitchID);
- }
-
- private:
- bool insertCheapEHSupport(Function &F);
- void splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*>&Invokes);
- void rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
- AllocaInst *InvokeNum, AllocaInst *StackPtr,
- SwitchInst *CatchSwitch);
- bool insertExpensiveEHSupport(Function &F);
+ bool runOnFunction(Function &F) override;
};
}
@@ -107,65 +46,11 @@ INITIALIZE_PASS(LowerInvoke, "lowerinvoke",
char &llvm::LowerInvokePassID = LowerInvoke::ID;
// Public Interface To the LowerInvoke pass.
-FunctionPass *llvm::createLowerInvokePass(const TargetMachine *TM,
- bool useExpensiveEHSupport) {
- return new LowerInvoke(TM, useExpensiveEHSupport || ExpensiveEHSupport);
+FunctionPass *llvm::createLowerInvokePass() {
+ return new LowerInvoke();
}
-// doInitialization - Make sure that there is a prototype for abort in the
-// current module.
-bool LowerInvoke::doInitialization(Module &M) {
- Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
- if (useExpensiveEHSupport) {
- // Insert a type for the linked list of jump buffers.
- const TargetLowering *TLI = TM ? TM->getTargetLowering() : 0;
- unsigned JBSize = TLI ? TLI->getJumpBufSize() : 0;
- JBSize = JBSize ? JBSize : 200;
- Type *JmpBufTy = ArrayType::get(VoidPtrTy, JBSize);
-
- JBLinkTy = StructType::create(M.getContext(), "llvm.sjljeh.jmpbufty");
- Type *Elts[] = { JmpBufTy, PointerType::getUnqual(JBLinkTy) };
- JBLinkTy->setBody(Elts);
-
- Type *PtrJBList = PointerType::getUnqual(JBLinkTy);
-
- // Now that we've done that, insert the jmpbuf list head global, unless it
- // already exists.
- if (!(JBListHead = M.getGlobalVariable("llvm.sjljeh.jblist", PtrJBList))) {
- JBListHead = new GlobalVariable(M, PtrJBList, false,
- GlobalValue::LinkOnceAnyLinkage,
- Constant::getNullValue(PtrJBList),
- "llvm.sjljeh.jblist");
- }
-
-// VisualStudio defines setjmp as _setjmp
-#if defined(_MSC_VER) && defined(setjmp) && \
- !defined(setjmp_undefined_for_msvc)
-# pragma push_macro("setjmp")
-# undef setjmp
-# define setjmp_undefined_for_msvc
-#endif
-
- SetJmpFn = Intrinsic::getDeclaration(&M, Intrinsic::setjmp);
-
-#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
- // let's return it to _setjmp state
-# pragma pop_macro("setjmp")
-# undef setjmp_undefined_for_msvc
-#endif
-
- LongJmpFn = Intrinsic::getDeclaration(&M, Intrinsic::longjmp);
- StackSaveFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave);
- StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore);
- }
-
- // We need the 'write' and 'abort' functions for both models.
- AbortFn = M.getOrInsertFunction("abort", Type::getVoidTy(M.getContext()),
- (Type *)0);
- return true;
-}
-
-bool LowerInvoke::insertCheapEHSupport(Function &F) {
+bool LowerInvoke::runOnFunction(Function &F) {
bool Changed = false;
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
@@ -192,388 +77,3 @@ bool LowerInvoke::insertCheapEHSupport(Function &F) {
}
return Changed;
}
-
-/// rewriteExpensiveInvoke - Insert code and hack the function to replace the
-/// specified invoke instruction with a call.
-void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
- AllocaInst *InvokeNum,
- AllocaInst *StackPtr,
- SwitchInst *CatchSwitch) {
- ConstantInt *InvokeNoC = ConstantInt::get(Type::getInt32Ty(II->getContext()),
- InvokeNo);
-
- // If the unwind edge has phi nodes, split the edge.
- if (isa<PHINode>(II->getUnwindDest()->begin())) {
- SplitCriticalEdge(II, 1, this);
-
- // If there are any phi nodes left, they must have a single predecessor.
- while (PHINode *PN = dyn_cast<PHINode>(II->getUnwindDest()->begin())) {
- PN->replaceAllUsesWith(PN->getIncomingValue(0));
- PN->eraseFromParent();
- }
- }
-
- // Insert a store of the invoke num before the invoke and store zero into the
- // location afterward.
- new StoreInst(InvokeNoC, InvokeNum, true, II); // volatile
-
- // Insert a store of the stack ptr before the invoke, so we can restore it
- // later in the exception case.
- CallInst* StackSaveRet = CallInst::Create(StackSaveFn, "ssret", II);
- new StoreInst(StackSaveRet, StackPtr, true, II); // volatile
-
- BasicBlock::iterator NI = II->getNormalDest()->getFirstInsertionPt();
- // nonvolatile.
- new StoreInst(Constant::getNullValue(Type::getInt32Ty(II->getContext())),
- InvokeNum, false, NI);
-
- Instruction* StackPtrLoad =
- new LoadInst(StackPtr, "stackptr.restore", true,
- II->getUnwindDest()->getFirstInsertionPt());
- CallInst::Create(StackRestoreFn, StackPtrLoad, "")->insertAfter(StackPtrLoad);
-
- // Add a switch case to our unwind block.
- CatchSwitch->addCase(InvokeNoC, II->getUnwindDest());
-
- // Insert a normal call instruction.
- SmallVector<Value*,16> CallArgs(II->op_begin(), II->op_end() - 3);
- CallInst *NewCall = CallInst::Create(II->getCalledValue(),
- CallArgs, "", II);
- NewCall->takeName(II);
- NewCall->setCallingConv(II->getCallingConv());
- NewCall->setAttributes(II->getAttributes());
- NewCall->setDebugLoc(II->getDebugLoc());
- II->replaceAllUsesWith(NewCall);
-
- // Replace the invoke with an uncond branch.
- BranchInst::Create(II->getNormalDest(), NewCall->getParent());
- II->eraseFromParent();
-}
-
-/// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until
-/// we reach blocks we've already seen.
-static void MarkBlocksLiveIn(BasicBlock *BB, std::set<BasicBlock*> &LiveBBs) {
- if (!LiveBBs.insert(BB).second) return; // already been here.
-
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
- MarkBlocksLiveIn(*PI, LiveBBs);
-}
-
-// First thing we need to do is scan the whole function for values that are
-// live across unwind edges. Each value that is live across an unwind edge
-// we spill into a stack location, guaranteeing that there is nothing live
-// across the unwind edge. This process also splits all critical edges
-// coming out of invoke's.
-void LowerInvoke::
-splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*> &Invokes) {
- // First step, split all critical edges from invoke instructions.
- for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
- InvokeInst *II = Invokes[i];
- SplitCriticalEdge(II, 0, this);
- SplitCriticalEdge(II, 1, this);
- assert(!isa<PHINode>(II->getNormalDest()) &&
- !isa<PHINode>(II->getUnwindDest()) &&
- "critical edge splitting left single entry phi nodes?");
- }
-
- Function *F = Invokes.back()->getParent()->getParent();
-
- // To avoid having to handle incoming arguments specially, we lower each arg
- // to a copy instruction in the entry block. This ensures that the argument
- // value itself cannot be live across the entry block.
- BasicBlock::iterator AfterAllocaInsertPt = F->begin()->begin();
- while (isa<AllocaInst>(AfterAllocaInsertPt) &&
- isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsertPt)->getArraySize()))
- ++AfterAllocaInsertPt;
- for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
- AI != E; ++AI) {
- Type *Ty = AI->getType();
- // Aggregate types can't be cast, but are legal argument types, so we have
- // to handle them differently. We use an extract/insert pair as a
- // lightweight method to achieve the same goal.
- if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
- Instruction *EI = ExtractValueInst::Create(AI, 0, "",AfterAllocaInsertPt);
- Instruction *NI = InsertValueInst::Create(AI, EI, 0);
- NI->insertAfter(EI);
- AI->replaceAllUsesWith(NI);
- // Set the operand of the instructions back to the AllocaInst.
- EI->setOperand(0, AI);
- NI->setOperand(0, AI);
- } else {
- // This is always a no-op cast because we're casting AI to AI->getType()
- // so src and destination types are identical. BitCast is the only
- // possibility.
- CastInst *NC = new BitCastInst(
- AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt);
- AI->replaceAllUsesWith(NC);
- // Set the operand of the cast instruction back to the AllocaInst.
- // Normally it's forbidden to replace a CastInst's operand because it
- // could cause the opcode to reflect an illegal conversion. However,
- // we're replacing it here with the same value it was constructed with.
- // We do this because the above replaceAllUsesWith() clobbered the
- // operand, but we want this one to remain.
- NC->setOperand(0, AI);
- }
- }
-
- // Finally, scan the code looking for instructions with bad live ranges.
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
- for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
- // Ignore obvious cases we don't have to handle. In particular, most
- // instructions either have no uses or only have a single use inside the
- // current block. Ignore them quickly.
- Instruction *Inst = II;
- if (Inst->use_empty()) continue;
- if (Inst->hasOneUse() &&
- cast<Instruction>(Inst->use_back())->getParent() == BB &&
- !isa<PHINode>(Inst->use_back())) continue;
-
- // If this is an alloca in the entry block, it's not a real register
- // value.
- if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst))
- if (isa<ConstantInt>(AI->getArraySize()) && BB == F->begin())
- continue;
-
- // Avoid iterator invalidation by copying users to a temporary vector.
- SmallVector<Instruction*,16> Users;
- for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end();
- UI != E; ++UI) {
- Instruction *User = cast<Instruction>(*UI);
- if (User->getParent() != BB || isa<PHINode>(User))
- Users.push_back(User);
- }
-
- // Scan all of the uses and see if the live range is live across an unwind
- // edge. If we find a use live across an invoke edge, create an alloca
- // and spill the value.
-
- // Find all of the blocks that this value is live in.
- std::set<BasicBlock*> LiveBBs;
- LiveBBs.insert(Inst->getParent());
- while (!Users.empty()) {
- Instruction *U = Users.back();
- Users.pop_back();
-
- if (!isa<PHINode>(U)) {
- MarkBlocksLiveIn(U->getParent(), LiveBBs);
- } else {
- // Uses for a PHI node occur in their predecessor block.
- PHINode *PN = cast<PHINode>(U);
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (PN->getIncomingValue(i) == Inst)
- MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs);
- }
- }
-
- // Now that we know all of the blocks that this thing is live in, see if
- // it includes any of the unwind locations.
- bool NeedsSpill = false;
- for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
- BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest();
- if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) {
- NeedsSpill = true;
- }
- }
-
- // If we decided we need a spill, do it.
- if (NeedsSpill) {
- ++NumSpilled;
- DemoteRegToStack(*Inst, true);
- }
- }
-}
-
-bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
- SmallVector<ReturnInst*,16> Returns;
- SmallVector<InvokeInst*,16> Invokes;
- UnreachableInst* UnreachablePlaceholder = 0;
-
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
- if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
- // Remember all return instructions in case we insert an invoke into this
- // function.
- Returns.push_back(RI);
- } else if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
- Invokes.push_back(II);
- }
-
- if (Invokes.empty()) return false;
-
- NumInvokes += Invokes.size();
-
- // TODO: This is not an optimal way to do this. In particular, this always
- // inserts setjmp calls into the entries of functions with invoke instructions
- // even though there are possibly paths through the function that do not
- // execute any invokes. In particular, for functions with early exits, e.g.
- // the 'addMove' method in hexxagon, it would be nice to not have to do the
- // setjmp stuff on the early exit path. This requires a bit of dataflow, but
- // would not be too hard to do.
-
- // If we have an invoke instruction, insert a setjmp that dominates all
- // invokes. After the setjmp, use a cond branch that goes to the original
- // code path on zero, and to a designated 'catch' block of nonzero.
- Value *OldJmpBufPtr = 0;
- if (!Invokes.empty()) {
- // First thing we need to do is scan the whole function for values that are
- // live across unwind edges. Each value that is live across an unwind edge
- // we spill into a stack location, guaranteeing that there is nothing live
- // across the unwind edge. This process also splits all critical edges
- // coming out of invoke's.
- splitLiveRangesLiveAcrossInvokes(Invokes);
-
- BasicBlock *EntryBB = F.begin();
-
- // Create an alloca for the incoming jump buffer ptr and the new jump buffer
- // that needs to be restored on all exits from the function. This is an
- // alloca because the value needs to be live across invokes.
- const TargetLowering *TLI = TM ? TM->getTargetLowering() : 0;
- unsigned Align = TLI ? TLI->getJumpBufAlignment() : 0;
- AllocaInst *JmpBuf =
- new AllocaInst(JBLinkTy, 0, Align,
- "jblink", F.begin()->begin());
-
- Value *Idx[] = { Constant::getNullValue(Type::getInt32Ty(F.getContext())),
- ConstantInt::get(Type::getInt32Ty(F.getContext()), 1) };
- OldJmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx, "OldBuf",
- EntryBB->getTerminator());
-
- // Copy the JBListHead to the alloca.
- Value *OldBuf = new LoadInst(JBListHead, "oldjmpbufptr", true,
- EntryBB->getTerminator());
- new StoreInst(OldBuf, OldJmpBufPtr, true, EntryBB->getTerminator());
-
- // Add the new jumpbuf to the list.
- new StoreInst(JmpBuf, JBListHead, true, EntryBB->getTerminator());
-
- // Create the catch block. The catch block is basically a big switch
- // statement that goes to all of the invoke catch blocks.
- BasicBlock *CatchBB =
- BasicBlock::Create(F.getContext(), "setjmp.catch", &F);
-
- // Create an alloca which keeps track of the stack pointer before every
- // invoke, this allows us to properly restore the stack pointer after
- // long jumping.
- AllocaInst *StackPtr = new AllocaInst(Type::getInt8PtrTy(F.getContext()), 0,
- "stackptr", EntryBB->begin());
-
- // Create an alloca which keeps track of which invoke is currently
- // executing. For normal calls it contains zero.
- AllocaInst *InvokeNum = new AllocaInst(Type::getInt32Ty(F.getContext()), 0,
- "invokenum",EntryBB->begin());
- new StoreInst(ConstantInt::get(Type::getInt32Ty(F.getContext()), 0),
- InvokeNum, true, EntryBB->getTerminator());
-
- // Insert a load in the Catch block, and a switch on its value. By default,
- // we go to a block that just does an unwind (which is the correct action
- // for a standard call). We insert an unreachable instruction here and
- // modify the block to jump to the correct unwinding pad later.
- BasicBlock *UnwindBB = BasicBlock::Create(F.getContext(), "unwindbb", &F);
- UnreachablePlaceholder = new UnreachableInst(F.getContext(), UnwindBB);
-
- Value *CatchLoad = new LoadInst(InvokeNum, "invoke.num", true, CatchBB);
- SwitchInst *CatchSwitch =
- SwitchInst::Create(CatchLoad, UnwindBB, Invokes.size(), CatchBB);
-
- // Now that things are set up, insert the setjmp call itself.
-
- // Split the entry block to insert the conditional branch for the setjmp.
- BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(),
- "setjmp.cont");
-
- Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 0);
- Value *JmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx, "TheJmpBuf",
- EntryBB->getTerminator());
- JmpBufPtr = new BitCastInst(JmpBufPtr,
- Type::getInt8PtrTy(F.getContext()),
- "tmp", EntryBB->getTerminator());
- Value *SJRet = CallInst::Create(SetJmpFn, JmpBufPtr, "sjret",
- EntryBB->getTerminator());
-
- // Compare the return value to zero.
- Value *IsNormal = new ICmpInst(EntryBB->getTerminator(),
- ICmpInst::ICMP_EQ, SJRet,
- Constant::getNullValue(SJRet->getType()),
- "notunwind");
- // Nuke the uncond branch.
- EntryBB->getTerminator()->eraseFromParent();
-
- // Put in a new condbranch in its place.
- BranchInst::Create(ContBlock, CatchBB, IsNormal, EntryBB);
-
- // At this point, we are all set up, rewrite each invoke instruction.
- for (unsigned i = 0, e = Invokes.size(); i != e; ++i)
- rewriteExpensiveInvoke(Invokes[i], i+1, InvokeNum, StackPtr, CatchSwitch);
- }
-
- // We know that there is at least one unwind.
-
- // Create three new blocks, the block to load the jmpbuf ptr and compare
- // against null, the block to do the longjmp, and the error block for if it
- // is null. Add them at the end of the function because they are not hot.
- BasicBlock *UnwindHandler = BasicBlock::Create(F.getContext(),
- "dounwind", &F);
- BasicBlock *UnwindBlock = BasicBlock::Create(F.getContext(), "unwind", &F);
- BasicBlock *TermBlock = BasicBlock::Create(F.getContext(), "unwinderror", &F);
-
- // If this function contains an invoke, restore the old jumpbuf ptr.
- Value *BufPtr;
- if (OldJmpBufPtr) {
- // Before the return, insert a copy from the saved value to the new value.
- BufPtr = new LoadInst(OldJmpBufPtr, "oldjmpbufptr", UnwindHandler);
- new StoreInst(BufPtr, JBListHead, UnwindHandler);
- } else {
- BufPtr = new LoadInst(JBListHead, "ehlist", UnwindHandler);
- }
-
- // Load the JBList, if it's null, then there was no catch!
- Value *NotNull = new ICmpInst(*UnwindHandler, ICmpInst::ICMP_NE, BufPtr,
- Constant::getNullValue(BufPtr->getType()),
- "notnull");
- BranchInst::Create(UnwindBlock, TermBlock, NotNull, UnwindHandler);
-
- // Create the block to do the longjmp.
- // Get a pointer to the jmpbuf and longjmp.
- Value *Idx[] = { Constant::getNullValue(Type::getInt32Ty(F.getContext())),
- ConstantInt::get(Type::getInt32Ty(F.getContext()), 0) };
- Idx[0] = GetElementPtrInst::Create(BufPtr, Idx, "JmpBuf", UnwindBlock);
- Idx[0] = new BitCastInst(Idx[0],
- Type::getInt8PtrTy(F.getContext()),
- "tmp", UnwindBlock);
- Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1);
- CallInst::Create(LongJmpFn, Idx, "", UnwindBlock);
- new UnreachableInst(F.getContext(), UnwindBlock);
-
- // Set up the term block ("throw without a catch").
- new UnreachableInst(F.getContext(), TermBlock);
-
- // Insert a call to abort()
- CallInst::Create(AbortFn, "",
- TermBlock->getTerminator())->setTailCall();
-
- // Replace the inserted unreachable with a branch to the unwind handler.
- if (UnreachablePlaceholder) {
- BranchInst::Create(UnwindHandler, UnreachablePlaceholder);
- UnreachablePlaceholder->eraseFromParent();
- }
-
- // Finally, for any returns from this function, if this function contains an
- // invoke, restore the old jmpbuf pointer to its input value.
- if (OldJmpBufPtr) {
- for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
- ReturnInst *R = Returns[i];
-
- // Before the return, insert a copy from the saved value to the new value.
- Value *OldBuf = new LoadInst(OldJmpBufPtr, "oldjmpbufptr", true, R);
- new StoreInst(OldBuf, JBListHead, true, R);
- }
- }
-
- return true;
-}
-
-bool LowerInvoke::runOnFunction(Function &F) {
- if (useExpensiveEHSupport)
- return insertExpensiveEHSupport(F);
- else
- return insertCheapEHSupport(F);
-}
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index 2d2a8a5..6fb7410 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -37,9 +37,9 @@ namespace {
initializeLowerSwitchPass(*PassRegistry::getPassRegistry());
}
- virtual bool runOnFunction(Function &F);
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
// This is a cluster of orthogonal Transforms
AU.addPreserved<UnifyFunctionExitNodes>();
AU.addPreserved("mem2reg");
@@ -245,7 +245,8 @@ unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
// Merge case into clusters
if (Cases.size()>=2)
- for (CaseItr I=Cases.begin(), J=llvm::next(Cases.begin()); J!=Cases.end(); ) {
+ for (CaseItr I = Cases.begin(), J = std::next(Cases.begin());
+ J != Cases.end();) {
int64_t nextValue = cast<ConstantInt>(J->Low)->getSExtValue();
int64_t currentValue = cast<ConstantInt>(I->High)->getSExtValue();
BasicBlock* nextBB = J->BB;
diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp
index 61b3965..a188ac5 100644
--- a/lib/Transforms/Utils/Mem2Reg.cpp
+++ b/lib/Transforms/Utils/Mem2Reg.cpp
@@ -15,7 +15,7 @@
#define DEBUG_TYPE "mem2reg"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
@@ -34,10 +34,10 @@ namespace {
// runOnFunction - To run this pass, first we calculate the alloca
// instructions that are safe for promotion, then we promote each one.
//
- virtual bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<DominatorTree>();
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.setPreservesCFG();
// This is a cluster of orthogonal Transforms
AU.addPreserved<UnifyFunctionExitNodes>();
@@ -50,7 +50,7 @@ namespace {
char PromotePass::ID = 0;
INITIALIZE_PASS_BEGIN(PromotePass, "mem2reg", "Promote Memory to Register",
false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(PromotePass, "mem2reg", "Promote Memory to Register",
false, false)
@@ -61,7 +61,7 @@ bool PromotePass::runOnFunction(Function &F) {
bool Changed = false;
- DominatorTree &DT = getAnalysis<DominatorTree>();
+ DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
while (1) {
Allocas.clear();
diff --git a/lib/Transforms/Utils/MetaRenamer.cpp b/lib/Transforms/Utils/MetaRenamer.cpp
index c370453..395a46b 100644
--- a/lib/Transforms/Utils/MetaRenamer.cpp
+++ b/lib/Transforms/Utils/MetaRenamer.cpp
@@ -48,11 +48,11 @@ namespace {
initializeMetaRenamerPass(*PassRegistry::getPassRegistry());
}
- void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
}
- bool runOnModule(Module &M) {
+ bool runOnModule(Module &M) override {
static const char *const metaNames[] = {
// See http://en.wikipedia.org/wiki/Metasyntactic_variable
"foo", "bar", "baz", "quux", "barney", "snork", "zot", "blam", "hoge",
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 8f6eee3..25fab89 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -34,18 +34,18 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasSetTracker.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/DIBuilder.h"
-#include "llvm/DebugInfo.h"
+#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Metadata.h"
-#include "llvm/Support/CFG.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <queue>
@@ -61,9 +61,7 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) {
// assignments to subsections of the memory unit.
// Only allow direct and non-volatile loads and stores...
- for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end();
- UI != UE; ++UI) { // Loop over all of the uses of the alloca
- const User *U = *UI;
+ for (const User *U : AI->users()) {
if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
// Note that atomic loads can be transformed; atomic semantics do
// not have any meaning for a local alloca.
@@ -131,8 +129,7 @@ struct AllocaInfo {
// As we scan the uses of the alloca instruction, keep track of stores,
// and decide whether all of the loads and stores to the alloca are within
// the same basic block.
- for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
- UI != E;) {
+ for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) {
Instruction *User = cast<Instruction>(*UI++);
if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
@@ -317,8 +314,7 @@ static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
// Knowing that this alloca is promotable, we know that it's safe to kill all
// instructions except for load and store.
- for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end();
- UI != UE;) {
+ for (auto UI = AI->user_begin(), UE = AI->user_end(); UI != UE;) {
Instruction *I = cast<Instruction>(*UI);
++UI;
if (isa<LoadInst>(I) || isa<StoreInst>(I))
@@ -328,10 +324,9 @@ static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
// The only users of this bitcast/GEP instruction are lifetime intrinsics.
// Follow the use/def chain to erase them now instead of leaving it for
// dead code elimination later.
- for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
- UI != UE;) {
- Instruction *Inst = cast<Instruction>(*UI);
- ++UI;
+ for (auto UUI = I->user_begin(), UUE = I->user_end(); UUI != UUE;) {
+ Instruction *Inst = cast<Instruction>(*UUI);
+ ++UUI;
Inst->eraseFromParent();
}
}
@@ -359,7 +354,7 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
// Clear out UsingBlocks. We will reconstruct it here if needed.
Info.UsingBlocks.clear();
- for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) {
+ for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) {
Instruction *UserInst = cast<Instruction>(*UI++);
if (!isa<LoadInst>(UserInst)) {
assert(UserInst == OnlyStore && "Should only have load/stores");
@@ -456,9 +451,8 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
typedef SmallVector<std::pair<unsigned, StoreInst *>, 64> StoresByIndexTy;
StoresByIndexTy StoresByIndex;
- for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;
- ++UI)
- if (StoreInst *SI = dyn_cast<StoreInst>(*UI))
+ for (User *U : AI->users())
+ if (StoreInst *SI = dyn_cast<StoreInst>(U))
StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI));
// Sort the stores by their index, making it efficient to do a lookup with a
@@ -467,7 +461,7 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
// Walk all of the loads from this alloca, replacing them with the nearest
// store above them, if any.
- for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) {
+ for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) {
LoadInst *LI = dyn_cast<LoadInst>(*UI++);
if (!LI)
continue;
@@ -485,7 +479,7 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
LI->replaceAllUsesWith(UndefValue::get(LI->getType()));
else
// Otherwise, there was a store before this load, the load takes its value.
- LI->replaceAllUsesWith(llvm::prior(I)->second->getOperand(0));
+ LI->replaceAllUsesWith(std::prev(I)->second->getOperand(0));
if (AST && LI->getType()->isPointerTy())
AST->deleteValue(LI);
@@ -495,7 +489,7 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
// Remove the (now dead) stores and alloca.
while (!AI->use_empty()) {
- StoreInst *SI = cast<StoreInst>(AI->use_back());
+ StoreInst *SI = cast<StoreInst>(AI->user_back());
// Record debuginfo for the store before removing it.
if (DbgDeclareInst *DDI = Info.DbgDeclare) {
DIBuilder DIB(*AI->getParent()->getParent()->getParent());
@@ -679,8 +673,8 @@ void PromoteMem2Reg::run() {
// Iterating over NewPhiNodes is deterministic, so it is safe to try to
// simplify and RAUW them as we go. If it was not, we could add uses to
- // the values we replace with in a non deterministic order, thus creating
- // non deterministic def->use chains.
+ // the values we replace with in a non-deterministic order, thus creating
+ // non-deterministic def->use chains.
for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator
I = NewPhiNodes.begin(),
E = NewPhiNodes.end();
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index 30adbfa..28f5c44 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -16,12 +16,10 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/Support/AlignOf.h"
-#include "llvm/Support/Allocator.h"
-#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index ff50b12..1e88587 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -23,6 +23,8 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
@@ -34,14 +36,12 @@
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/NoFolder.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
-#include "llvm/Support/CFG.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ConstantRange.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/NoFolder.h"
-#include "llvm/Support/PatternMatch.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include <algorithm>
@@ -62,12 +62,13 @@ static cl::opt<bool>
SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
cl::desc("Sink common instructions down to the end block"));
-static cl::opt<bool>
-HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
- cl::desc("Hoist conditional stores if an unconditional store preceeds"));
+static cl::opt<bool> HoistCondStores(
+ "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
+ cl::desc("Hoist conditional stores if an unconditional store precedes"));
STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables");
+STATISTIC(NumLookupTablesHoles, "Number of switch instructions turned into lookup tables (holes checked)");
STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block");
STATISTIC(NumSpeculations, "Number of speculative executed instructions");
@@ -90,7 +91,7 @@ namespace {
class SimplifyCFGOpt {
const TargetTransformInfo &TTI;
- const DataLayout *const TD;
+ const DataLayout *const DL;
Value *isValueEqualityComparison(TerminatorInst *TI);
BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI,
std::vector<ValueEqualityComparisonCase> &Cases);
@@ -109,8 +110,8 @@ class SimplifyCFGOpt {
bool SimplifyCondBranch(BranchInst *BI, IRBuilder <>&Builder);
public:
- SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout *TD)
- : TTI(TTI), TD(TD) {}
+ SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout *DL)
+ : TTI(TTI), DL(DL) {}
bool run(BasicBlock *BB);
};
}
@@ -306,15 +307,15 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
/// GetConstantInt - Extract ConstantInt from value, looking through IntToPtr
/// and PointerNullValue. Return NULL if value is not a constant int.
-static ConstantInt *GetConstantInt(Value *V, const DataLayout *TD) {
+static ConstantInt *GetConstantInt(Value *V, const DataLayout *DL) {
// Normal constant int.
ConstantInt *CI = dyn_cast<ConstantInt>(V);
- if (CI || !TD || !isa<Constant>(V) || !V->getType()->isPointerTy())
+ if (CI || !DL || !isa<Constant>(V) || !V->getType()->isPointerTy())
return CI;
// This is some kind of pointer constant. Turn it into a pointer-sized
// ConstantInt if possible.
- IntegerType *PtrTy = cast<IntegerType>(TD->getIntPtrType(V->getType()));
+ IntegerType *PtrTy = cast<IntegerType>(DL->getIntPtrType(V->getType()));
// Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
if (isa<ConstantPointerNull>(V))
@@ -340,13 +341,13 @@ static ConstantInt *GetConstantInt(Value *V, const DataLayout *TD) {
/// Values vector.
static Value *
GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
- const DataLayout *TD, bool isEQ, unsigned &UsedICmps) {
+ const DataLayout *DL, bool isEQ, unsigned &UsedICmps) {
Instruction *I = dyn_cast<Instruction>(V);
if (I == 0) return 0;
// If this is an icmp against a constant, handle this as one of the cases.
if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
- if (ConstantInt *C = GetConstantInt(I->getOperand(1), TD)) {
+ if (ConstantInt *C = GetConstantInt(I->getOperand(1), DL)) {
Value *RHSVal;
ConstantInt *RHSC;
@@ -405,11 +406,11 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
unsigned NumValsBeforeLHS = Vals.size();
unsigned UsedICmpsBeforeLHS = UsedICmps;
- if (Value *LHS = GatherConstantCompares(I->getOperand(0), Vals, Extra, TD,
+ if (Value *LHS = GatherConstantCompares(I->getOperand(0), Vals, Extra, DL,
isEQ, UsedICmps)) {
unsigned NumVals = Vals.size();
unsigned UsedICmpsBeforeRHS = UsedICmps;
- if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, TD,
+ if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, DL,
isEQ, UsedICmps)) {
if (LHS == RHS)
return LHS;
@@ -434,7 +435,7 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
if (Extra == 0 || Extra == I->getOperand(0)) {
Value *OldExtra = Extra;
Extra = I->getOperand(0);
- if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, TD,
+ if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, DL,
isEQ, UsedICmps))
return RHS;
assert(Vals.size() == NumValsBeforeLHS);
@@ -472,14 +473,14 @@ Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) {
} else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
if (BI->isConditional() && BI->getCondition()->hasOneUse())
if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition()))
- if (ICI->isEquality() && GetConstantInt(ICI->getOperand(1), TD))
+ if (ICI->isEquality() && GetConstantInt(ICI->getOperand(1), DL))
CV = ICI->getOperand(0);
// Unwrap any lossless ptrtoint cast.
- if (TD && CV) {
+ if (DL && CV) {
if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
Value *Ptr = PTII->getPointerOperand();
- if (PTII->getType() == TD->getIntPtrType(Ptr->getType()))
+ if (PTII->getType() == DL->getIntPtrType(Ptr->getType()))
CV = Ptr;
}
}
@@ -504,7 +505,7 @@ GetValueEqualityComparisonCases(TerminatorInst *TI,
ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
BasicBlock *Succ = BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_NE);
Cases.push_back(ValueEqualityComparisonCase(GetConstantInt(ICI->getOperand(1),
- TD),
+ DL),
Succ));
return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ);
}
@@ -732,8 +733,7 @@ static void GetBranchWeights(TerminatorInst *TI,
MDNode* MD = TI->getMetadata(LLVMContext::MD_prof);
assert(MD);
for (unsigned i = 1, e = MD->getNumOperands(); i < e; ++i) {
- ConstantInt* CI = dyn_cast<ConstantInt>(MD->getOperand(i));
- assert(CI);
+ ConstantInt *CI = cast<ConstantInt>(MD->getOperand(i));
Weights.push_back(CI->getValue().getZExtValue());
}
@@ -748,21 +748,14 @@ static void GetBranchWeights(TerminatorInst *TI,
}
}
-/// Sees if any of the weights are too big for a uint32_t, and halves all the
-/// weights if any are.
+/// Keep halving the weights until all can fit in uint32_t.
static void FitWeights(MutableArrayRef<uint64_t> Weights) {
- bool Halve = false;
- for (unsigned i = 0; i < Weights.size(); ++i)
- if (Weights[i] > UINT_MAX) {
- Halve = true;
- break;
- }
-
- if (! Halve)
- return;
-
- for (unsigned i = 0; i < Weights.size(); ++i)
- Weights[i] /= 2;
+ uint64_t Max = *std::max_element(Weights.begin(), Weights.end());
+ if (Max > UINT_MAX) {
+ unsigned Offset = 32 - countLeadingZeros(Max);
+ for (uint64_t &I : Weights)
+ I >>= Offset;
+ }
}
/// FoldValueComparisonIntoPredecessors - The specified terminator is a value
@@ -929,8 +922,8 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
Builder.SetInsertPoint(PTI);
// Convert pointer to int before we switch.
if (CV->getType()->isPointerTy()) {
- assert(TD && "Cannot switch on pointer without DataLayout");
- CV = Builder.CreatePtrToInt(CV, TD->getIntPtrType(CV->getType()),
+ assert(DL && "Cannot switch on pointer without DataLayout");
+ CV = Builder.CreatePtrToInt(CV, DL->getIntPtrType(CV->getType()),
"magicptr");
}
@@ -1421,7 +1414,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
Value *SpeculatedStoreValue = 0;
StoreInst *SpeculatedStore = 0;
for (BasicBlock::iterator BBI = ThenBB->begin(),
- BBE = llvm::prior(ThenBB->end());
+ BBE = std::prev(ThenBB->end());
BBI != BBE; ++BBI) {
Instruction *I = BBI;
// Skip debug info.
@@ -1531,7 +1524,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
// Hoist the instructions.
BB->getInstList().splice(BI, ThenBB->getInstList(), ThenBB->begin(),
- llvm::prior(ThenBB->end()));
+ std::prev(ThenBB->end()));
// Insert selects and rewrite the PHI operands.
IRBuilder<true, NoFolder> Builder(BI);
@@ -1589,10 +1582,9 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
// We can only support instructions that do not define values that are
// live outside of the current basic block.
- for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end();
- UI != E; ++UI) {
- Instruction *U = cast<Instruction>(*UI);
- if (U->getParent() != BB || isa<PHINode>(U)) return false;
+ for (User *U : BBI->users()) {
+ Instruction *UI = cast<Instruction>(U);
+ if (UI->getParent() != BB || isa<PHINode>(UI)) return false;
}
// Looks ok, continue checking.
@@ -1605,7 +1597,7 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
/// that is defined in the same block as the branch and if any PHI entries are
/// constants, thread edges corresponding to that entry to be branches to their
/// ultimate destination.
-static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *TD) {
+static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *DL) {
BasicBlock *BB = BI->getParent();
PHINode *PN = dyn_cast<PHINode>(BI->getCondition());
// NOTE: we currently cannot transform this case if the PHI node is used
@@ -1674,7 +1666,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *TD) {
}
// Check for trivial simplification.
- if (Value *V = SimplifyInstruction(N, TD)) {
+ if (Value *V = SimplifyInstruction(N, DL)) {
TranslateMap[BBI] = V;
delete N; // Instruction folded away, don't need actual inst
} else {
@@ -1695,7 +1687,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *TD) {
}
// Recurse, simplifying any other constants.
- return FoldCondBranchOnPHI(BI, TD) | true;
+ return FoldCondBranchOnPHI(BI, DL) | true;
}
return false;
@@ -1703,7 +1695,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *TD) {
/// FoldTwoEntryPHINode - Given a BB that starts with the specified two-entry
/// PHI node, see if we can eliminate it.
-static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *TD) {
+static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL) {
// Ok, this is a two entry PHI node. Check to see if this is a simple "if
// statement", which has a very simple dominance structure. Basically, we
// are trying to find the condition that is being branched on, which
@@ -1737,7 +1729,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *TD) {
for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
PHINode *PN = cast<PHINode>(II++);
- if (Value *V = SimplifyInstruction(PN, TD)) {
+ if (Value *V = SimplifyInstruction(PN, DL)) {
PN->replaceAllUsesWith(V);
PN->eraseFromParent();
continue;
@@ -2015,7 +2007,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
// register pressure or inhibit out-of-order execution.
Instruction *BonusInst = 0;
if (&*FrontIt != Cond &&
- FrontIt->hasOneUse() && *FrontIt->use_begin() == Cond &&
+ FrontIt->hasOneUse() && FrontIt->user_back() == Cond &&
isSafeToSpeculativelyExecute(FrontIt)) {
BonusInst = &*FrontIt;
++FrontIt;
@@ -2094,7 +2086,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
// instructions that are used by the terminator's condition because it
// exposes more merging opportunities.
bool UsedByBranch = (BonusInst && BonusInst->hasOneUse() &&
- *BonusInst->use_begin() == Cond);
+ BonusInst->user_back() == Cond);
if (BonusInst && !UsedByBranch) {
// Collect the values used by the bonus inst
@@ -2153,6 +2145,14 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
Instruction *NewBonus = 0;
if (BonusInst) {
NewBonus = BonusInst->clone();
+
+ // If we moved a load, we cannot any longer claim any knowledge about
+ // its potential value. The previous information might have been valid
+ // only given the branch precondition.
+ // For an analogous reason, we must also drop all the metadata whose
+ // semantics we don't understand.
+ NewBonus->dropUnknownMetadata(LLVMContext::MD_dbg);
+
PredBlock->getInstList().insert(PBI, NewBonus);
NewBonus->takeName(BonusInst);
BonusInst->setName(BonusInst->getName()+".old");
@@ -2625,7 +2625,7 @@ static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) {
/// the PHI, merging the third icmp into the switch.
static bool TryToSimplifyUncondBranchWithICmpInIt(
ICmpInst *ICI, IRBuilder<> &Builder, const TargetTransformInfo &TTI,
- const DataLayout *TD) {
+ const DataLayout *DL) {
BasicBlock *BB = ICI->getParent();
// If the block has any PHIs in it or the icmp has multiple uses, it is too
@@ -2653,12 +2653,12 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(
assert(VVal && "Should have a unique destination value");
ICI->setOperand(0, VVal);
- if (Value *V = SimplifyInstruction(ICI, TD)) {
+ if (Value *V = SimplifyInstruction(ICI, DL)) {
ICI->replaceAllUsesWith(V);
ICI->eraseFromParent();
}
// BB is now empty, so it is likely to simplify away.
- return SimplifyCFG(BB, TTI, TD) | true;
+ return SimplifyCFG(BB, TTI, DL) | true;
}
// Ok, the block is reachable from the default dest. If the constant we're
@@ -2674,13 +2674,13 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(
ICI->replaceAllUsesWith(V);
ICI->eraseFromParent();
// BB is now empty, so it is likely to simplify away.
- return SimplifyCFG(BB, TTI, TD) | true;
+ return SimplifyCFG(BB, TTI, DL) | true;
}
// The use of the icmp has to be in the 'end' block, by the only PHI node in
// the block.
BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
- PHINode *PHIUse = dyn_cast<PHINode>(ICI->use_back());
+ PHINode *PHIUse = dyn_cast<PHINode>(ICI->user_back());
if (PHIUse == 0 || PHIUse != &SuccBlock->front() ||
isa<PHINode>(++BasicBlock::iterator(PHIUse)))
return false;
@@ -2730,7 +2730,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(
/// SimplifyBranchOnICmpChain - The specified branch is a conditional branch.
/// Check to see if it is branching on an or/and chain of icmp instructions, and
/// fold it into a switch instruction if so.
-static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *TD,
+static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *DL,
IRBuilder<> &Builder) {
Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
if (Cond == 0) return false;
@@ -2746,10 +2746,10 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *TD,
unsigned UsedICmps = 0;
if (Cond->getOpcode() == Instruction::Or) {
- CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, true,
+ CompVal = GatherConstantCompares(Cond, Values, ExtraCase, DL, true,
UsedICmps);
} else if (Cond->getOpcode() == Instruction::And) {
- CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, false,
+ CompVal = GatherConstantCompares(Cond, Values, ExtraCase, DL, false,
UsedICmps);
TrueWhenEqual = false;
}
@@ -2811,9 +2811,9 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *TD,
Builder.SetInsertPoint(BI);
// Convert pointer to int before we switch.
if (CompVal->getType()->isPointerTy()) {
- assert(TD && "Cannot switch on pointer without DataLayout");
+ assert(DL && "Cannot switch on pointer without DataLayout");
CompVal = Builder.CreatePtrToInt(CompVal,
- TD->getIntPtrType(CompVal->getType()),
+ DL->getIntPtrType(CompVal->getType()),
"magicptr");
}
@@ -3222,7 +3222,7 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI) {
Case.getCaseSuccessor()->removePredecessor(SI->getParent());
SI->removeCase(Case);
}
- if (HasWeight) {
+ if (HasWeight && Weights.size() >= 2) {
SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
SI->setMetadata(LLVMContext::MD_prof,
MDBuilder(SI->getParent()->getContext()).
@@ -3428,7 +3428,7 @@ GetCaseResults(SwitchInst *SI,
Res.push_back(std::make_pair(PHI, ConstVal));
}
- return true;
+ return Res.size() > 0;
}
namespace {
@@ -3444,7 +3444,7 @@ namespace {
ConstantInt *Offset,
const SmallVectorImpl<std::pair<ConstantInt*, Constant*> >& Values,
Constant *DefaultValue,
- const DataLayout *TD);
+ const DataLayout *DL);
/// BuildLookup - Build instructions with Builder to retrieve the value at
/// the position given by Index in the lookup table.
@@ -3452,7 +3452,7 @@ namespace {
/// WouldFitInRegister - Return true if a table with TableSize elements of
/// type ElementType would fit in a target-legal register.
- static bool WouldFitInRegister(const DataLayout *TD,
+ static bool WouldFitInRegister(const DataLayout *DL,
uint64_t TableSize,
const Type *ElementType);
@@ -3491,7 +3491,7 @@ SwitchLookupTable::SwitchLookupTable(Module &M,
ConstantInt *Offset,
const SmallVectorImpl<std::pair<ConstantInt*, Constant*> >& Values,
Constant *DefaultValue,
- const DataLayout *TD)
+ const DataLayout *DL)
: SingleValue(0), BitMap(0), BitMapElementTy(0), Array(0) {
assert(Values.size() && "Can't build lookup table without values!");
assert(TableSize >= Values.size() && "Can't fit values in table!");
@@ -3499,12 +3499,14 @@ SwitchLookupTable::SwitchLookupTable(Module &M,
// If all values in the table are equal, this is that value.
SingleValue = Values.begin()->second;
+ Type *ValueType = Values.begin()->second->getType();
+
// Build up the table contents.
SmallVector<Constant*, 64> TableContents(TableSize);
for (size_t I = 0, E = Values.size(); I != E; ++I) {
ConstantInt *CaseVal = Values[I].first;
Constant *CaseRes = Values[I].second;
- assert(CaseRes->getType() == DefaultValue->getType());
+ assert(CaseRes->getType() == ValueType);
uint64_t Idx = (CaseVal->getValue() - Offset->getValue())
.getLimitedValue();
@@ -3516,6 +3518,8 @@ SwitchLookupTable::SwitchLookupTable(Module &M,
// Fill in any holes in the table with the default result.
if (Values.size() < TableSize) {
+ assert(DefaultValue && "Need a default value to fill the lookup table holes.");
+ assert(DefaultValue->getType() == ValueType);
for (uint64_t I = 0; I < TableSize; ++I) {
if (!TableContents[I])
TableContents[I] = DefaultValue;
@@ -3533,8 +3537,8 @@ SwitchLookupTable::SwitchLookupTable(Module &M,
}
// If the type is integer and the table fits in a register, build a bitmap.
- if (WouldFitInRegister(TD, TableSize, DefaultValue->getType())) {
- IntegerType *IT = cast<IntegerType>(DefaultValue->getType());
+ if (WouldFitInRegister(DL, TableSize, ValueType)) {
+ IntegerType *IT = cast<IntegerType>(ValueType);
APInt TableInt(TableSize * IT->getBitWidth(), 0);
for (uint64_t I = TableSize; I > 0; --I) {
TableInt <<= IT->getBitWidth();
@@ -3552,7 +3556,7 @@ SwitchLookupTable::SwitchLookupTable(Module &M,
}
// Store the table in an array.
- ArrayType *ArrayTy = ArrayType::get(DefaultValue->getType(), TableSize);
+ ArrayType *ArrayTy = ArrayType::get(ValueType, TableSize);
Constant *Initializer = ConstantArray::get(ArrayTy, TableContents);
Array = new GlobalVariable(M, ArrayTy, /*constant=*/ true,
@@ -3598,10 +3602,10 @@ Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
llvm_unreachable("Unknown lookup table kind!");
}
-bool SwitchLookupTable::WouldFitInRegister(const DataLayout *TD,
+bool SwitchLookupTable::WouldFitInRegister(const DataLayout *DL,
uint64_t TableSize,
const Type *ElementType) {
- if (!TD)
+ if (!DL)
return false;
const IntegerType *IT = dyn_cast<IntegerType>(ElementType);
if (!IT)
@@ -3612,7 +3616,7 @@ bool SwitchLookupTable::WouldFitInRegister(const DataLayout *TD,
// Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
if (TableSize >= UINT_MAX/IT->getBitWidth())
return false;
- return TD->fitsInLegalInteger(TableSize * IT->getBitWidth());
+ return DL->fitsInLegalInteger(TableSize * IT->getBitWidth());
}
/// ShouldBuildLookupTable - Determine whether a lookup table should be built
@@ -3621,7 +3625,7 @@ bool SwitchLookupTable::WouldFitInRegister(const DataLayout *TD,
static bool ShouldBuildLookupTable(SwitchInst *SI,
uint64_t TableSize,
const TargetTransformInfo &TTI,
- const DataLayout *TD,
+ const DataLayout *DL,
const SmallDenseMap<PHINode*, Type*>& ResultTypes) {
if (SI->getNumCases() > TableSize || TableSize >= UINT64_MAX / 10)
return false; // TableSize overflowed, or mul below might overflow.
@@ -3637,7 +3641,7 @@ static bool ShouldBuildLookupTable(SwitchInst *SI,
// Saturate this flag to false.
AllTablesFitInRegister = AllTablesFitInRegister &&
- SwitchLookupTable::WouldFitInRegister(TD, TableSize, Ty);
+ SwitchLookupTable::WouldFitInRegister(DL, TableSize, Ty);
// If both flags saturate, we're done. NOTE: This *only* works with
// saturating flags, and all flags have to saturate first due to the
@@ -3666,7 +3670,7 @@ static bool ShouldBuildLookupTable(SwitchInst *SI,
static bool SwitchToLookupTable(SwitchInst *SI,
IRBuilder<> &Builder,
const TargetTransformInfo &TTI,
- const DataLayout* TD) {
+ const DataLayout* DL) {
assert(SI->getNumCases() > 1 && "Degenerate switch?");
// Only build lookup table when we have a target that supports it.
@@ -3680,11 +3684,9 @@ static bool SwitchToLookupTable(SwitchInst *SI,
// GEP needs a runtime relocation in PIC code. We should just build one big
// string and lookup indices into that.
- // Ignore the switch if the number of cases is too small.
- // This is similar to the check when building jump tables in
- // SelectionDAGBuilder::handleJTSwitchCase.
- // FIXME: Determine the best cut-off.
- if (SI->getNumCases() < 4)
+ // Ignore switches with less than three cases. Lookup tables will not make them
+ // faster, so we don't analyze them.
+ if (SI->getNumCases() < 3)
return false;
// Figure out the corresponding result for each case value and phi node in the
@@ -3712,7 +3714,7 @@ static bool SwitchToLookupTable(SwitchInst *SI,
typedef SmallVector<std::pair<PHINode*, Constant*>, 4> ResultsTy;
ResultsTy Results;
if (!GetCaseResults(SI, CaseVal, CI.getCaseSuccessor(), &CommonDest,
- Results, TD))
+ Results, DL))
return false;
// Append the result from this case to the list for each phi.
@@ -3723,21 +3725,41 @@ static bool SwitchToLookupTable(SwitchInst *SI,
}
}
- // Get the resulting values for the default case.
+ // Keep track of the result types.
+ for (size_t I = 0, E = PHIs.size(); I != E; ++I) {
+ PHINode *PHI = PHIs[I];
+ ResultTypes[PHI] = ResultLists[PHI][0].second->getType();
+ }
+
+ uint64_t NumResults = ResultLists[PHIs[0]].size();
+ APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue();
+ uint64_t TableSize = RangeSpread.getLimitedValue() + 1;
+ bool TableHasHoles = (NumResults < TableSize);
+
+ // If the table has holes, we need a constant result for the default case
+ // or a bitmask that fits in a register.
SmallVector<std::pair<PHINode*, Constant*>, 4> DefaultResultsList;
- if (!GetCaseResults(SI, 0, SI->getDefaultDest(), &CommonDest,
- DefaultResultsList, TD))
- return false;
+ bool HasDefaultResults = false;
+ if (TableHasHoles) {
+ HasDefaultResults = GetCaseResults(SI, 0, SI->getDefaultDest(), &CommonDest,
+ DefaultResultsList, DL);
+ }
+ bool NeedMask = (TableHasHoles && !HasDefaultResults);
+ if (NeedMask) {
+ // As an extra penalty for the validity test we require more cases.
+ if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
+ return false;
+ if (!(DL && DL->fitsInLegalInteger(TableSize)))
+ return false;
+ }
+
for (size_t I = 0, E = DefaultResultsList.size(); I != E; ++I) {
PHINode *PHI = DefaultResultsList[I].first;
Constant *Result = DefaultResultsList[I].second;
DefaultResults[PHI] = Result;
- ResultTypes[PHI] = Result->getType();
}
- APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue();
- uint64_t TableSize = RangeSpread.getLimitedValue() + 1;
- if (!ShouldBuildLookupTable(SI, TableSize, TTI, TD, ResultTypes))
+ if (!ShouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
return false;
// Create the BB that does the lookups.
@@ -3755,7 +3777,7 @@ static bool SwitchToLookupTable(SwitchInst *SI,
// Compute the maximum table size representable by the integer type we are
// switching upon.
unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
- uint64_t MaxTableSize = CaseSize > 63? UINT64_MAX : 1ULL << CaseSize;
+ uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
assert(MaxTableSize >= TableSize &&
"It is impossible for a switch to have more entries than the max "
"representable value of its input integer type's size.");
@@ -3776,19 +3798,61 @@ static bool SwitchToLookupTable(SwitchInst *SI,
// Populate the BB that does the lookups.
Builder.SetInsertPoint(LookupBB);
+
+ if (NeedMask) {
+ // Before doing the lookup we do the hole check.
+ // The LookupBB is therefore re-purposed to do the hole check
+ // and we create a new LookupBB.
+ BasicBlock *MaskBB = LookupBB;
+ MaskBB->setName("switch.hole_check");
+ LookupBB = BasicBlock::Create(Mod.getContext(),
+ "switch.lookup",
+ CommonDest->getParent(),
+ CommonDest);
+
+ // Build bitmask; fill in a 1 bit for every case.
+ APInt MaskInt(TableSize, 0);
+ APInt One(TableSize, 1);
+ const ResultListTy &ResultList = ResultLists[PHIs[0]];
+ for (size_t I = 0, E = ResultList.size(); I != E; ++I) {
+ uint64_t Idx = (ResultList[I].first->getValue() -
+ MinCaseVal->getValue()).getLimitedValue();
+ MaskInt |= One << Idx;
+ }
+ ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt);
+
+ // Get the TableIndex'th bit of the bitmask.
+ // If this bit is 0 (meaning hole) jump to the default destination,
+ // else continue with table lookup.
+ IntegerType *MapTy = TableMask->getType();
+ Value *MaskIndex = Builder.CreateZExtOrTrunc(TableIndex, MapTy,
+ "switch.maskindex");
+ Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex,
+ "switch.shifted");
+ Value *LoBit = Builder.CreateTrunc(Shifted,
+ Type::getInt1Ty(Mod.getContext()),
+ "switch.lobit");
+ Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
+
+ Builder.SetInsertPoint(LookupBB);
+ AddPredecessorToBlock(SI->getDefaultDest(), MaskBB, SI->getParent());
+ }
+
bool ReturnedEarly = false;
for (size_t I = 0, E = PHIs.size(); I != E; ++I) {
PHINode *PHI = PHIs[I];
+ // If using a bitmask, use any value to fill the lookup table holes.
+ Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI];
SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultLists[PHI],
- DefaultResults[PHI], TD);
+ DV, DL);
Value *Result = Table.BuildLookup(TableIndex, Builder);
// If the result is used to return immediately from the function, we want to
// do that right here.
- if (PHI->hasOneUse() && isa<ReturnInst>(*PHI->use_begin()) &&
- *PHI->use_begin() == CommonDest->getFirstNonPHIOrDbg()) {
+ if (PHI->hasOneUse() && isa<ReturnInst>(*PHI->user_begin()) &&
+ PHI->user_back() == CommonDest->getFirstNonPHIOrDbg()) {
Builder.CreateRet(Result);
ReturnedEarly = true;
break;
@@ -3811,6 +3875,8 @@ static bool SwitchToLookupTable(SwitchInst *SI,
SI->eraseFromParent();
++NumLookupTables;
+ if (NeedMask)
+ ++NumLookupTablesHoles;
return true;
}
@@ -3822,12 +3888,12 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
// see if that predecessor totally determines the outcome of this switch.
if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
- return SimplifyCFG(BB, TTI, TD) | true;
+ return SimplifyCFG(BB, TTI, DL) | true;
Value *Cond = SI->getCondition();
if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
if (SimplifySwitchOnSelect(SI, Select))
- return SimplifyCFG(BB, TTI, TD) | true;
+ return SimplifyCFG(BB, TTI, DL) | true;
// If the block only contains the switch, see if we can fold the block
// away into any preds.
@@ -3837,22 +3903,22 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
++BBI;
if (SI == &*BBI)
if (FoldValueComparisonIntoPredecessors(SI, Builder))
- return SimplifyCFG(BB, TTI, TD) | true;
+ return SimplifyCFG(BB, TTI, DL) | true;
}
// Try to transform the switch into an icmp and a branch.
if (TurnSwitchRangeIntoICmp(SI, Builder))
- return SimplifyCFG(BB, TTI, TD) | true;
+ return SimplifyCFG(BB, TTI, DL) | true;
// Remove unreachable cases.
if (EliminateDeadSwitchCases(SI))
- return SimplifyCFG(BB, TTI, TD) | true;
+ return SimplifyCFG(BB, TTI, DL) | true;
if (ForwardSwitchConditionToPHI(SI))
- return SimplifyCFG(BB, TTI, TD) | true;
+ return SimplifyCFG(BB, TTI, DL) | true;
- if (SwitchToLookupTable(SI, Builder, TTI, TD))
- return SimplifyCFG(BB, TTI, TD) | true;
+ if (SwitchToLookupTable(SI, Builder, TTI, DL))
+ return SimplifyCFG(BB, TTI, DL) | true;
return false;
}
@@ -3889,7 +3955,7 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
if (SimplifyIndirectBrOnSelect(IBI, SI))
- return SimplifyCFG(BB, TTI, TD) | true;
+ return SimplifyCFG(BB, TTI, DL) | true;
}
return Changed;
}
@@ -3913,7 +3979,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
for (++I; isa<DbgInfoIntrinsic>(I); ++I)
;
if (I->isTerminator() &&
- TryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, TTI, TD))
+ TryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, TTI, DL))
return true;
}
@@ -3922,7 +3988,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
// predecessor and use logical operations to update the incoming value
// for PHI nodes in common successor.
if (FoldBranchToCommonDest(BI))
- return SimplifyCFG(BB, TTI, TD) | true;
+ return SimplifyCFG(BB, TTI, DL) | true;
return false;
}
@@ -3937,7 +4003,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
// switch.
if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
- return SimplifyCFG(BB, TTI, TD) | true;
+ return SimplifyCFG(BB, TTI, DL) | true;
// This block must be empty, except for the setcond inst, if it exists.
// Ignore dbg intrinsics.
@@ -3947,26 +4013,26 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
++I;
if (&*I == BI) {
if (FoldValueComparisonIntoPredecessors(BI, Builder))
- return SimplifyCFG(BB, TTI, TD) | true;
+ return SimplifyCFG(BB, TTI, DL) | true;
} else if (&*I == cast<Instruction>(BI->getCondition())){
++I;
// Ignore dbg intrinsics.
while (isa<DbgInfoIntrinsic>(I))
++I;
if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder))
- return SimplifyCFG(BB, TTI, TD) | true;
+ return SimplifyCFG(BB, TTI, DL) | true;
}
}
// Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
- if (SimplifyBranchOnICmpChain(BI, TD, Builder))
+ if (SimplifyBranchOnICmpChain(BI, DL, Builder))
return true;
// If this basic block is ONLY a compare and a branch, and if a predecessor
// branches to us and one of our successors, fold the comparison into the
// predecessor and use logical operations to pick the right destination.
if (FoldBranchToCommonDest(BI))
- return SimplifyCFG(BB, TTI, TD) | true;
+ return SimplifyCFG(BB, TTI, DL) | true;
// We have a conditional branch to two blocks that are only reachable
// from BI. We know that the condbr dominates the two blocks, so see if
@@ -3975,7 +4041,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (BI->getSuccessor(0)->getSinglePredecessor() != 0) {
if (BI->getSuccessor(1)->getSinglePredecessor() != 0) {
if (HoistThenElseCodeToIf(BI))
- return SimplifyCFG(BB, TTI, TD) | true;
+ return SimplifyCFG(BB, TTI, DL) | true;
} else {
// If Successor #1 has multiple preds, we may be able to conditionally
// execute Successor #0 if it branches to successor #1.
@@ -3983,7 +4049,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (Succ0TI->getNumSuccessors() == 1 &&
Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0)))
- return SimplifyCFG(BB, TTI, TD) | true;
+ return SimplifyCFG(BB, TTI, DL) | true;
}
} else if (BI->getSuccessor(1)->getSinglePredecessor() != 0) {
// If Successor #0 has multiple preds, we may be able to conditionally
@@ -3992,22 +4058,22 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (Succ1TI->getNumSuccessors() == 1 &&
Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1)))
- return SimplifyCFG(BB, TTI, TD) | true;
+ return SimplifyCFG(BB, TTI, DL) | true;
}
// If this is a branch on a phi node in the current block, thread control
// through this block if any PHI node entries are constants.
if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
if (PN->getParent() == BI->getParent())
- if (FoldCondBranchOnPHI(BI, TD))
- return SimplifyCFG(BB, TTI, TD) | true;
+ if (FoldCondBranchOnPHI(BI, DL))
+ return SimplifyCFG(BB, TTI, DL) | true;
// Scan predecessor blocks for conditional branches.
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
if (PBI != BI && PBI->isConditional())
if (SimplifyCondBranchToCondBranch(PBI, BI))
- return SimplifyCFG(BB, TTI, TD) | true;
+ return SimplifyCFG(BB, TTI, DL) | true;
return false;
}
@@ -4023,7 +4089,7 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) {
if (C->isNullValue()) {
// Only look at the first use, avoid hurting compile time with long uselists
- User *Use = *I->use_begin();
+ User *Use = *I->user_begin();
// Now make sure that there are no instructions in between that can alter
// control flow (eg. calls)
@@ -4119,7 +4185,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
// eliminate it, do so now.
if (PHINode *PN = dyn_cast<PHINode>(BB->begin()))
if (PN->getNumIncomingValues() == 2)
- Changed |= FoldTwoEntryPHINode(PN, TD);
+ Changed |= FoldTwoEntryPHINode(PN, DL);
Builder.SetInsertPoint(BB->getTerminator());
if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
@@ -4151,6 +4217,6 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
/// of the CFG. It returns true if a modification was made.
///
bool llvm::SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
- const DataLayout *TD) {
- return SimplifyCFGOpt(TTI, TD).run(BB);
+ const DataLayout *DL) {
+ return SimplifyCFGOpt(TTI, DL).run(BB);
}
diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp
index bf3442a..30f56be 100644
--- a/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -16,6 +16,7 @@
#define DEBUG_TYPE "indvars"
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/IVUsers.h"
@@ -23,7 +24,10 @@
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -44,7 +48,7 @@ namespace {
Loop *L;
LoopInfo *LI;
ScalarEvolution *SE;
- const DataLayout *TD; // May be NULL
+ const DataLayout *DL; // May be NULL
SmallVectorImpl<WeakVH> &DeadInsts;
@@ -56,9 +60,10 @@ namespace {
L(Loop),
LI(LPM->getAnalysisIfAvailable<LoopInfo>()),
SE(SE),
- TD(LPM->getAnalysisIfAvailable<DataLayout>()),
DeadInsts(Dead),
Changed(false) {
+ DataLayoutPass *DLP = LPM->getAnalysisIfAvailable<DataLayoutPass>();
+ DL = DLP ? &DLP->getDataLayout() : 0;
assert(LI && "IV simplification requires LoopInfo");
}
@@ -75,6 +80,9 @@ namespace {
void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand);
void eliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand,
bool IsSigned);
+
+ Instruction *splitOverflowIntrinsic(Instruction *IVUser,
+ const DominatorTree *DT);
};
}
@@ -263,6 +271,69 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,
return true;
}
+/// \brief Split sadd.with.overflow into add + sadd.with.overflow to allow
+/// analysis and optimization.
+///
+/// \return A new value representing the non-overflowing add if possible,
+/// otherwise return the original value.
+Instruction *SimplifyIndvar::splitOverflowIntrinsic(Instruction *IVUser,
+ const DominatorTree *DT) {
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(IVUser);
+ if (!II || II->getIntrinsicID() != Intrinsic::sadd_with_overflow)
+ return IVUser;
+
+ // Find a branch guarded by the overflow check.
+ BranchInst *Branch = 0;
+ Instruction *AddVal = 0;
+ for (User *U : II->users()) {
+ if (ExtractValueInst *ExtractInst = dyn_cast<ExtractValueInst>(U)) {
+ if (ExtractInst->getNumIndices() != 1)
+ continue;
+ if (ExtractInst->getIndices()[0] == 0)
+ AddVal = ExtractInst;
+ else if (ExtractInst->getIndices()[0] == 1 && ExtractInst->hasOneUse())
+ Branch = dyn_cast<BranchInst>(ExtractInst->user_back());
+ }
+ }
+ if (!AddVal || !Branch)
+ return IVUser;
+
+ BasicBlock *ContinueBB = Branch->getSuccessor(1);
+ if (std::next(pred_begin(ContinueBB)) != pred_end(ContinueBB))
+ return IVUser;
+
+ // Check if all users of the add are provably NSW.
+ bool AllNSW = true;
+ for (Use &U : AddVal->uses()) {
+ if (Instruction *UseInst = dyn_cast<Instruction>(U.getUser())) {
+ BasicBlock *UseBB = UseInst->getParent();
+ if (PHINode *PHI = dyn_cast<PHINode>(UseInst))
+ UseBB = PHI->getIncomingBlock(U);
+ if (!DT->dominates(ContinueBB, UseBB)) {
+ AllNSW = false;
+ break;
+ }
+ }
+ }
+ if (!AllNSW)
+ return IVUser;
+
+ // Go for it...
+ IRBuilder<> Builder(IVUser);
+ Instruction *AddInst = dyn_cast<Instruction>(
+ Builder.CreateNSWAdd(II->getOperand(0), II->getOperand(1)));
+
+ // The caller expects the new add to have the same form as the intrinsic. The
+ // IV operand position must be the same.
+ assert((AddInst->getOpcode() == Instruction::Add &&
+ AddInst->getOperand(0) == II->getOperand(0)) &&
+ "Bad add instruction created from overflow intrinsic.");
+
+ AddVal->replaceAllUsesWith(AddInst);
+ DeadInsts.push_back(AddVal);
+ return AddInst;
+}
+
/// pushIVUsers - Add all uses of Def to the current IV's worklist.
///
static void pushIVUsers(
@@ -270,16 +341,15 @@ static void pushIVUsers(
SmallPtrSet<Instruction*,16> &Simplified,
SmallVectorImpl< std::pair<Instruction*,Instruction*> > &SimpleIVUsers) {
- for (Value::use_iterator UI = Def->use_begin(), E = Def->use_end();
- UI != E; ++UI) {
- Instruction *User = cast<Instruction>(*UI);
+ for (User *U : Def->users()) {
+ Instruction *UI = cast<Instruction>(U);
// Avoid infinite or exponential worklist processing.
// Also ensure unique worklist users.
// If Def is a LoopPhi, it may not be in the Simplified set, so check for
// self edges first.
- if (User != Def && Simplified.insert(User))
- SimpleIVUsers.push_back(std::make_pair(User, Def));
+ if (UI != Def && Simplified.insert(UI))
+ SimpleIVUsers.push_back(std::make_pair(UI, Def));
}
}
@@ -334,8 +404,16 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
while (!SimpleIVUsers.empty()) {
std::pair<Instruction*, Instruction*> UseOper =
SimpleIVUsers.pop_back_val();
+ Instruction *UseInst = UseOper.first;
+
// Bypass back edges to avoid extra work.
- if (UseOper.first == CurrIV) continue;
+ if (UseInst == CurrIV) continue;
+
+ if (V && V->shouldSplitOverflowInstrinsics()) {
+ UseInst = splitOverflowIntrinsic(UseInst, V->getDomTree());
+ if (!UseInst)
+ continue;
+ }
Instruction *IVOperand = UseOper.second;
for (unsigned N = 0; IVOperand; ++N) {
diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp
index f9687e4..bbd65f1 100644
--- a/lib/Transforms/Utils/SimplifyInstructions.cpp
+++ b/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -19,9 +19,9 @@
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Type.h"
#include "llvm/Pass.h"
@@ -38,15 +38,18 @@ namespace {
initializeInstSimplifierPass(*PassRegistry::getPassRegistry());
}
- void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<TargetLibraryInfo>();
}
/// runOnFunction - Remove instructions that simplify.
- bool runOnFunction(Function &F) {
- const DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
- const DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
+ bool runOnFunction(Function &F) override {
+ const DominatorTreeWrapperPass *DTWP =
+ getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ const DominatorTree *DT = DTWP ? &DTWP->getDomTree() : 0;
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0;
const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
bool Changed = false;
@@ -63,11 +66,10 @@ namespace {
continue;
// Don't waste time simplifying unused instructions.
if (!I->use_empty())
- if (Value *V = SimplifyInstruction(I, TD, TLI, DT)) {
+ if (Value *V = SimplifyInstruction(I, DL, TLI, DT)) {
// Mark all uses for resimplification next time round the loop.
- for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
- UI != UE; ++UI)
- Next->insert(cast<Instruction>(*UI));
+ for (User *U : I->users())
+ Next->insert(cast<Instruction>(U));
I->replaceAllUsesWith(V);
++NumSimplified;
Changed = true;
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 15b3e66..b5bc391 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -43,7 +43,7 @@ namespace {
class LibCallOptimization {
protected:
Function *Caller;
- const DataLayout *TD;
+ const DataLayout *DL;
const TargetLibraryInfo *TLI;
const LibCallSimplifier *LCS;
LLVMContext* Context;
@@ -63,11 +63,11 @@ public:
/// change the calling convention.
virtual bool ignoreCallingConv() { return false; }
- Value *optimizeCall(CallInst *CI, const DataLayout *TD,
+ Value *optimizeCall(CallInst *CI, const DataLayout *DL,
const TargetLibraryInfo *TLI,
const LibCallSimplifier *LCS, IRBuilder<> &B) {
Caller = CI->getParent()->getParent();
- this->TD = TD;
+ this->DL = DL;
this->TLI = TLI;
this->LCS = LCS;
if (CI->getCalledFunction())
@@ -88,9 +88,8 @@ public:
/// isOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
/// value is equal or not-equal to zero.
static bool isOnlyUsedInZeroEqualityComparison(Value *V) {
- for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
- UI != E; ++UI) {
- if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
+ for (User *U : V->users()) {
+ if (ICmpInst *IC = dyn_cast<ICmpInst>(U))
if (IC->isEquality())
if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
if (C->isNullValue())
@@ -104,9 +103,8 @@ static bool isOnlyUsedInZeroEqualityComparison(Value *V) {
/// isOnlyUsedInEqualityComparison - Return true if it is only used in equality
/// comparisons with With.
static bool isOnlyUsedInEqualityComparison(Value *V, Value *With) {
- for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
- UI != E; ++UI) {
- if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
+ for (User *U : V->users()) {
+ if (ICmpInst *IC = dyn_cast<ICmpInst>(U))
if (IC->isEquality() && IC->getOperand(1) == With)
continue;
// Unknown instruction.
@@ -152,7 +150,8 @@ protected:
struct InstFortifiedLibCallOptimization : public FortifiedLibCallOptimization {
CallInst *CI;
- bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, bool isString) const {
+ bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp,
+ bool isString) const override {
if (CI->getArgOperand(SizeCIOp) == CI->getArgOperand(SizeArgOp))
return true;
if (ConstantInt *SizeCI =
@@ -175,7 +174,8 @@ struct InstFortifiedLibCallOptimization : public FortifiedLibCallOptimization {
};
struct MemCpyChkOpt : public InstFortifiedLibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
this->CI = CI;
FunctionType *FT = Callee->getFunctionType();
LLVMContext &Context = CI->getParent()->getContext();
@@ -184,8 +184,8 @@ struct MemCpyChkOpt : public InstFortifiedLibCallOptimization {
if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
- FT->getParamType(2) != TD->getIntPtrType(Context) ||
- FT->getParamType(3) != TD->getIntPtrType(Context))
+ FT->getParamType(2) != DL->getIntPtrType(Context) ||
+ FT->getParamType(3) != DL->getIntPtrType(Context))
return 0;
if (isFoldable(3, 2, false)) {
@@ -198,7 +198,8 @@ struct MemCpyChkOpt : public InstFortifiedLibCallOptimization {
};
struct MemMoveChkOpt : public InstFortifiedLibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
this->CI = CI;
FunctionType *FT = Callee->getFunctionType();
LLVMContext &Context = CI->getParent()->getContext();
@@ -207,8 +208,8 @@ struct MemMoveChkOpt : public InstFortifiedLibCallOptimization {
if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
- FT->getParamType(2) != TD->getIntPtrType(Context) ||
- FT->getParamType(3) != TD->getIntPtrType(Context))
+ FT->getParamType(2) != DL->getIntPtrType(Context) ||
+ FT->getParamType(3) != DL->getIntPtrType(Context))
return 0;
if (isFoldable(3, 2, false)) {
@@ -221,7 +222,8 @@ struct MemMoveChkOpt : public InstFortifiedLibCallOptimization {
};
struct MemSetChkOpt : public InstFortifiedLibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
this->CI = CI;
FunctionType *FT = Callee->getFunctionType();
LLVMContext &Context = CI->getParent()->getContext();
@@ -230,8 +232,8 @@ struct MemSetChkOpt : public InstFortifiedLibCallOptimization {
if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isIntegerTy() ||
- FT->getParamType(2) != TD->getIntPtrType(Context) ||
- FT->getParamType(3) != TD->getIntPtrType(Context))
+ FT->getParamType(2) != DL->getIntPtrType(Context) ||
+ FT->getParamType(3) != DL->getIntPtrType(Context))
return 0;
if (isFoldable(3, 2, false)) {
@@ -245,7 +247,8 @@ struct MemSetChkOpt : public InstFortifiedLibCallOptimization {
};
struct StrCpyChkOpt : public InstFortifiedLibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
this->CI = CI;
StringRef Name = Callee->getName();
FunctionType *FT = Callee->getFunctionType();
@@ -256,7 +259,7 @@ struct StrCpyChkOpt : public InstFortifiedLibCallOptimization {
FT->getReturnType() != FT->getParamType(0) ||
FT->getParamType(0) != FT->getParamType(1) ||
FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
- FT->getParamType(2) != TD->getIntPtrType(Context))
+ FT->getParamType(2) != DL->getIntPtrType(Context))
return 0;
Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
@@ -269,7 +272,7 @@ struct StrCpyChkOpt : public InstFortifiedLibCallOptimization {
// TODO: It might be nice to get a maximum length out of the possible
// string lengths for varying.
if (isFoldable(2, 1, true)) {
- Value *Ret = EmitStrCpy(Dst, Src, B, TD, TLI, Name.substr(2, 6));
+ Value *Ret = EmitStrCpy(Dst, Src, B, DL, TLI, Name.substr(2, 6));
return Ret;
} else {
// Maybe we can stil fold __strcpy_chk to __memcpy_chk.
@@ -277,12 +280,12 @@ struct StrCpyChkOpt : public InstFortifiedLibCallOptimization {
if (Len == 0) return 0;
// This optimization require DataLayout.
- if (!TD) return 0;
+ if (!DL) return 0;
Value *Ret =
EmitMemCpyChk(Dst, Src,
- ConstantInt::get(TD->getIntPtrType(Context), Len),
- CI->getArgOperand(2), B, TD, TLI);
+ ConstantInt::get(DL->getIntPtrType(Context), Len),
+ CI->getArgOperand(2), B, DL, TLI);
return Ret;
}
return 0;
@@ -290,7 +293,8 @@ struct StrCpyChkOpt : public InstFortifiedLibCallOptimization {
};
struct StpCpyChkOpt : public InstFortifiedLibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
this->CI = CI;
StringRef Name = Callee->getName();
FunctionType *FT = Callee->getFunctionType();
@@ -301,12 +305,12 @@ struct StpCpyChkOpt : public InstFortifiedLibCallOptimization {
FT->getReturnType() != FT->getParamType(0) ||
FT->getParamType(0) != FT->getParamType(1) ||
FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
- FT->getParamType(2) != TD->getIntPtrType(FT->getParamType(0)))
+ FT->getParamType(2) != DL->getIntPtrType(FT->getParamType(0)))
return 0;
Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x)
- Value *StrLen = EmitStrLen(Src, B, TD, TLI);
+ Value *StrLen = EmitStrLen(Src, B, DL, TLI);
return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : 0;
}
@@ -316,7 +320,7 @@ struct StpCpyChkOpt : public InstFortifiedLibCallOptimization {
// TODO: It might be nice to get a maximum length out of the possible
// string lengths for varying.
if (isFoldable(2, 1, true)) {
- Value *Ret = EmitStrCpy(Dst, Src, B, TD, TLI, Name.substr(2, 6));
+ Value *Ret = EmitStrCpy(Dst, Src, B, DL, TLI, Name.substr(2, 6));
return Ret;
} else {
// Maybe we can stil fold __stpcpy_chk to __memcpy_chk.
@@ -324,14 +328,14 @@ struct StpCpyChkOpt : public InstFortifiedLibCallOptimization {
if (Len == 0) return 0;
// This optimization require DataLayout.
- if (!TD) return 0;
+ if (!DL) return 0;
Type *PT = FT->getParamType(0);
- Value *LenV = ConstantInt::get(TD->getIntPtrType(PT), Len);
+ Value *LenV = ConstantInt::get(DL->getIntPtrType(PT), Len);
Value *DstEnd = B.CreateGEP(Dst,
- ConstantInt::get(TD->getIntPtrType(PT),
+ ConstantInt::get(DL->getIntPtrType(PT),
Len - 1));
- if (!EmitMemCpyChk(Dst, Src, LenV, CI->getArgOperand(2), B, TD, TLI))
+ if (!EmitMemCpyChk(Dst, Src, LenV, CI->getArgOperand(2), B, DL, TLI))
return 0;
return DstEnd;
}
@@ -340,7 +344,8 @@ struct StpCpyChkOpt : public InstFortifiedLibCallOptimization {
};
struct StrNCpyChkOpt : public InstFortifiedLibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
this->CI = CI;
StringRef Name = Callee->getName();
FunctionType *FT = Callee->getFunctionType();
@@ -351,12 +356,12 @@ struct StrNCpyChkOpt : public InstFortifiedLibCallOptimization {
FT->getParamType(0) != FT->getParamType(1) ||
FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
!FT->getParamType(2)->isIntegerTy() ||
- FT->getParamType(3) != TD->getIntPtrType(Context))
+ FT->getParamType(3) != DL->getIntPtrType(Context))
return 0;
if (isFoldable(3, 2, false)) {
Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), B, TD, TLI,
+ CI->getArgOperand(2), B, DL, TLI,
Name.substr(2, 7));
return Ret;
}
@@ -369,7 +374,8 @@ struct StrNCpyChkOpt : public InstFortifiedLibCallOptimization {
//===----------------------------------------------------------------------===//
struct StrCatOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
// Verify the "strcat" function prototype.
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
@@ -392,7 +398,7 @@ struct StrCatOpt : public LibCallOptimization {
return Dst;
// These optimizations require DataLayout.
- if (!TD) return 0;
+ if (!DL) return 0;
return emitStrLenMemCpy(Src, Dst, Len, B);
}
@@ -401,7 +407,7 @@ struct StrCatOpt : public LibCallOptimization {
IRBuilder<> &B) {
// We need to find the end of the destination string. That's where the
// memory is to be moved to. We just generate a call to strlen.
- Value *DstLen = EmitStrLen(Dst, B, TD, TLI);
+ Value *DstLen = EmitStrLen(Dst, B, DL, TLI);
if (!DstLen)
return 0;
@@ -413,13 +419,14 @@ struct StrCatOpt : public LibCallOptimization {
// We have enough information to now generate the memcpy call to do the
// concatenation for us. Make a memcpy to copy the nul byte with align = 1.
B.CreateMemCpy(CpyDst, Src,
- ConstantInt::get(TD->getIntPtrType(*Context), Len + 1), 1);
+ ConstantInt::get(DL->getIntPtrType(*Context), Len + 1), 1);
return Dst;
}
};
struct StrNCatOpt : public StrCatOpt {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
// Verify the "strncat" function prototype.
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 ||
@@ -451,7 +458,7 @@ struct StrNCatOpt : public StrCatOpt {
if (SrcLen == 0 || Len == 0) return Dst;
// These optimizations require DataLayout.
- if (!TD) return 0;
+ if (!DL) return 0;
// We don't optimize this case
if (Len < SrcLen) return 0;
@@ -463,7 +470,8 @@ struct StrNCatOpt : public StrCatOpt {
};
struct StrChrOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
// Verify the "strchr" function prototype.
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
@@ -479,22 +487,25 @@ struct StrChrOpt : public LibCallOptimization {
ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
if (CharC == 0) {
// These optimizations require DataLayout.
- if (!TD) return 0;
+ if (!DL) return 0;
uint64_t Len = GetStringLength(SrcStr);
if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32.
return 0;
return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
- ConstantInt::get(TD->getIntPtrType(*Context), Len),
- B, TD, TLI);
+ ConstantInt::get(DL->getIntPtrType(*Context), Len),
+ B, DL, TLI);
}
// Otherwise, the character is a constant, see if the first argument is
// a string literal. If so, we can constant fold.
StringRef Str;
- if (!getConstantStringInfo(SrcStr, Str))
+ if (!getConstantStringInfo(SrcStr, Str)) {
+ if (DL && CharC->isZero()) // strchr(p, 0) -> p + strlen(p)
+ return B.CreateGEP(SrcStr, EmitStrLen(SrcStr, B, DL, TLI), "strchr");
return 0;
+ }
// Compute the offset, make sure to handle the case when we're searching for
// zero (a weird way to spell strlen).
@@ -509,7 +520,8 @@ struct StrChrOpt : public LibCallOptimization {
};
struct StrRChrOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
// Verify the "strrchr" function prototype.
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
@@ -528,8 +540,8 @@ struct StrRChrOpt : public LibCallOptimization {
StringRef Str;
if (!getConstantStringInfo(SrcStr, Str)) {
// strrchr(s, 0) -> strchr(s, 0)
- if (TD && CharC->isZero())
- return EmitStrChr(SrcStr, '\0', B, TD, TLI);
+ if (DL && CharC->isZero())
+ return EmitStrChr(SrcStr, '\0', B, DL, TLI);
return 0;
}
@@ -545,7 +557,8 @@ struct StrRChrOpt : public LibCallOptimization {
};
struct StrCmpOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
// Verify the "strcmp" function prototype.
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
@@ -578,11 +591,11 @@ struct StrCmpOpt : public LibCallOptimization {
uint64_t Len2 = GetStringLength(Str2P);
if (Len1 && Len2) {
// These optimizations require DataLayout.
- if (!TD) return 0;
+ if (!DL) return 0;
return EmitMemCmp(Str1P, Str2P,
- ConstantInt::get(TD->getIntPtrType(*Context),
- std::min(Len1, Len2)), B, TD, TLI);
+ ConstantInt::get(DL->getIntPtrType(*Context),
+ std::min(Len1, Len2)), B, DL, TLI);
}
return 0;
@@ -590,7 +603,8 @@ struct StrCmpOpt : public LibCallOptimization {
};
struct StrNCmpOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
// Verify the "strncmp" function prototype.
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 ||
@@ -614,8 +628,8 @@ struct StrNCmpOpt : public LibCallOptimization {
if (Length == 0) // strncmp(x,y,0) -> 0
return ConstantInt::get(CI->getType(), 0);
- if (TD && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
- return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD, TLI);
+ if (DL && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
+ return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, DL, TLI);
StringRef Str1, Str2;
bool HasStr1 = getConstantStringInfo(Str1P, Str1);
@@ -640,7 +654,8 @@ struct StrNCmpOpt : public LibCallOptimization {
};
struct StrCpyOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
// Verify the "strcpy" function prototype.
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
@@ -654,7 +669,7 @@ struct StrCpyOpt : public LibCallOptimization {
return Src;
// These optimizations require DataLayout.
- if (!TD) return 0;
+ if (!DL) return 0;
// See if we can get the length of the input string.
uint64_t Len = GetStringLength(Src);
@@ -663,13 +678,14 @@ struct StrCpyOpt : public LibCallOptimization {
// We have enough information to now generate the memcpy call to do the
// copy for us. Make a memcpy to copy the nul byte with align = 1.
B.CreateMemCpy(Dst, Src,
- ConstantInt::get(TD->getIntPtrType(*Context), Len), 1);
+ ConstantInt::get(DL->getIntPtrType(*Context), Len), 1);
return Dst;
}
};
struct StpCpyOpt: public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
// Verify the "stpcpy" function prototype.
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
@@ -679,11 +695,11 @@ struct StpCpyOpt: public LibCallOptimization {
return 0;
// These optimizations require DataLayout.
- if (!TD) return 0;
+ if (!DL) return 0;
Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x)
- Value *StrLen = EmitStrLen(Src, B, TD, TLI);
+ Value *StrLen = EmitStrLen(Src, B, DL, TLI);
return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : 0;
}
@@ -692,9 +708,9 @@ struct StpCpyOpt: public LibCallOptimization {
if (Len == 0) return 0;
Type *PT = FT->getParamType(0);
- Value *LenV = ConstantInt::get(TD->getIntPtrType(PT), Len);
+ Value *LenV = ConstantInt::get(DL->getIntPtrType(PT), Len);
Value *DstEnd = B.CreateGEP(Dst,
- ConstantInt::get(TD->getIntPtrType(PT),
+ ConstantInt::get(DL->getIntPtrType(PT),
Len - 1));
// We have enough information to now generate the memcpy call to do the
@@ -705,7 +721,8 @@ struct StpCpyOpt: public LibCallOptimization {
};
struct StrNCpyOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
FT->getParamType(0) != FT->getParamType(1) ||
@@ -737,7 +754,7 @@ struct StrNCpyOpt : public LibCallOptimization {
if (Len == 0) return Dst; // strncpy(x, y, 0) -> x
// These optimizations require DataLayout.
- if (!TD) return 0;
+ if (!DL) return 0;
// Let strncpy handle the zero padding
if (Len > SrcLen+1) return 0;
@@ -745,15 +762,16 @@ struct StrNCpyOpt : public LibCallOptimization {
Type *PT = FT->getParamType(0);
// strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant]
B.CreateMemCpy(Dst, Src,
- ConstantInt::get(TD->getIntPtrType(PT), Len), 1);
+ ConstantInt::get(DL->getIntPtrType(PT), Len), 1);
return Dst;
}
};
struct StrLenOpt : public LibCallOptimization {
- virtual bool ignoreCallingConv() { return true; }
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ bool ignoreCallingConv() override { return true; }
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 1 ||
FT->getParamType(0) != B.getInt8PtrTy() ||
@@ -775,7 +793,8 @@ struct StrLenOpt : public LibCallOptimization {
};
struct StrPBrkOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
FT->getParamType(0) != B.getInt8PtrTy() ||
@@ -802,15 +821,16 @@ struct StrPBrkOpt : public LibCallOptimization {
}
// strpbrk(s, "a") -> strchr(s, 'a')
- if (TD && HasS2 && S2.size() == 1)
- return EmitStrChr(CI->getArgOperand(0), S2[0], B, TD, TLI);
+ if (DL && HasS2 && S2.size() == 1)
+ return EmitStrChr(CI->getArgOperand(0), S2[0], B, DL, TLI);
return 0;
}
};
struct StrToOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
FunctionType *FT = Callee->getFunctionType();
if ((FT->getNumParams() != 2 && FT->getNumParams() != 3) ||
!FT->getParamType(0)->isPointerTy() ||
@@ -829,7 +849,8 @@ struct StrToOpt : public LibCallOptimization {
};
struct StrSpnOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
FT->getParamType(0) != B.getInt8PtrTy() ||
@@ -858,7 +879,8 @@ struct StrSpnOpt : public LibCallOptimization {
};
struct StrCSpnOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
FT->getParamType(0) != B.getInt8PtrTy() ||
@@ -882,15 +904,16 @@ struct StrCSpnOpt : public LibCallOptimization {
}
// strcspn(s, "") -> strlen(s)
- if (TD && HasS2 && S2.empty())
- return EmitStrLen(CI->getArgOperand(0), B, TD, TLI);
+ if (DL && HasS2 && S2.empty())
+ return EmitStrLen(CI->getArgOperand(0), B, DL, TLI);
return 0;
}
};
struct StrStrOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
!FT->getParamType(0)->isPointerTy() ||
@@ -903,16 +926,15 @@ struct StrStrOpt : public LibCallOptimization {
return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
// fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0
- if (TD && isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) {
- Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD, TLI);
+ if (DL && isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) {
+ Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, DL, TLI);
if (!StrLen)
return 0;
Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1),
- StrLen, B, TD, TLI);
+ StrLen, B, DL, TLI);
if (!StrNCmp)
return 0;
- for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end();
- UI != UE; ) {
+ for (auto UI = CI->user_begin(), UE = CI->user_end(); UI != UE;) {
ICmpInst *Old = cast<ICmpInst>(*UI++);
Value *Cmp = B.CreateICmp(Old->getPredicate(), StrNCmp,
ConstantInt::getNullValue(StrNCmp->getType()),
@@ -946,7 +968,7 @@ struct StrStrOpt : public LibCallOptimization {
// fold strstr(x, "y") -> strchr(x, 'y').
if (HasStr2 && ToFindStr.size() == 1) {
- Value *StrChr= EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TD, TLI);
+ Value *StrChr= EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, DL, TLI);
return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : 0;
}
return 0;
@@ -954,7 +976,8 @@ struct StrStrOpt : public LibCallOptimization {
};
struct MemCmpOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
@@ -1006,15 +1029,16 @@ struct MemCmpOpt : public LibCallOptimization {
};
struct MemCpyOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
// These optimizations require DataLayout.
- if (!TD) return 0;
+ if (!DL) return 0;
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
- FT->getParamType(2) != TD->getIntPtrType(*Context))
+ FT->getParamType(2) != DL->getIntPtrType(*Context))
return 0;
// memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1)
@@ -1025,15 +1049,16 @@ struct MemCpyOpt : public LibCallOptimization {
};
struct MemMoveOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
// These optimizations require DataLayout.
- if (!TD) return 0;
+ if (!DL) return 0;
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
- FT->getParamType(2) != TD->getIntPtrType(*Context))
+ FT->getParamType(2) != DL->getIntPtrType(*Context))
return 0;
// memmove(x, y, n) -> llvm.memmove(x, y, n, 1)
@@ -1044,15 +1069,16 @@ struct MemMoveOpt : public LibCallOptimization {
};
struct MemSetOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
// These optimizations require DataLayout.
- if (!TD) return 0;
+ if (!DL) return 0;
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isIntegerTy() ||
- FT->getParamType(2) != TD->getIntPtrType(FT->getParamType(0)))
+ FT->getParamType(2) != DL->getIntPtrType(FT->getParamType(0)))
return 0;
// memset(p, v, n) -> llvm.memset(p, v, n, 1)
@@ -1072,7 +1098,8 @@ struct MemSetOpt : public LibCallOptimization {
struct UnaryDoubleFPOpt : public LibCallOptimization {
bool CheckRetType;
UnaryDoubleFPOpt(bool CheckReturnType): CheckRetType(CheckReturnType) {}
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() ||
!FT->getParamType(0)->isDoubleTy())
@@ -1080,9 +1107,8 @@ struct UnaryDoubleFPOpt : public LibCallOptimization {
if (CheckRetType) {
// Check if all the uses for function like 'sin' are converted to float.
- for (Value::use_iterator UseI = CI->use_begin(); UseI != CI->use_end();
- ++UseI) {
- FPTruncInst *Cast = dyn_cast<FPTruncInst>(*UseI);
+ for (User *U : CI->users()) {
+ FPTruncInst *Cast = dyn_cast<FPTruncInst>(U);
if (Cast == 0 || !Cast->getType()->isFloatTy())
return 0;
}
@@ -1100,6 +1126,49 @@ struct UnaryDoubleFPOpt : public LibCallOptimization {
}
};
+// Double -> Float Shrinking Optimizations for Binary Functions like 'fmin/fmax'
+struct BinaryDoubleFPOpt : public LibCallOptimization {
+ bool CheckRetType;
+ BinaryDoubleFPOpt(bool CheckReturnType): CheckRetType(CheckReturnType) {}
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
+ FunctionType *FT = Callee->getFunctionType();
+ // Just make sure this has 2 arguments of the same FP type, which match the
+ // result type.
+ if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ !FT->getParamType(0)->isFloatingPointTy())
+ return 0;
+
+ if (CheckRetType) {
+ // Check if all the uses for function like 'fmin/fmax' are converted to
+ // float.
+ for (User *U : CI->users()) {
+ FPTruncInst *Cast = dyn_cast<FPTruncInst>(U);
+ if (Cast == 0 || !Cast->getType()->isFloatTy())
+ return 0;
+ }
+ }
+
+ // If this is something like 'fmin((double)floatval1, (double)floatval2)',
+ // we convert it to fminf.
+ FPExtInst *Cast1 = dyn_cast<FPExtInst>(CI->getArgOperand(0));
+ FPExtInst *Cast2 = dyn_cast<FPExtInst>(CI->getArgOperand(1));
+ if (Cast1 == 0 || !Cast1->getOperand(0)->getType()->isFloatTy() ||
+ Cast2 == 0 || !Cast2->getOperand(0)->getType()->isFloatTy())
+ return 0;
+
+ // fmin((double)floatval1, (double)floatval2)
+ // -> (double)fmin(floatval1, floatval2)
+ Value *V = NULL;
+ Value *V1 = Cast1->getOperand(0);
+ Value *V2 = Cast2->getOperand(0);
+ V = EmitBinaryFloatFnCall(V1, V2, Callee->getName(), B,
+ Callee->getAttributes());
+ return B.CreateFPExt(V, B.getDoubleTy());
+ }
+};
+
struct UnsafeFPLibCallOptimization : public LibCallOptimization {
bool UnsafeFPShrink;
UnsafeFPLibCallOptimization(bool UnsafeFPShrink) {
@@ -1109,7 +1178,8 @@ struct UnsafeFPLibCallOptimization : public LibCallOptimization {
struct CosOpt : public UnsafeFPLibCallOptimization {
CosOpt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {}
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
Value *Ret = NULL;
if (UnsafeFPShrink && Callee->getName() == "cos" &&
TLI->has(LibFunc::cosf)) {
@@ -1136,7 +1206,8 @@ struct CosOpt : public UnsafeFPLibCallOptimization {
struct PowOpt : public UnsafeFPLibCallOptimization {
PowOpt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {}
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
Value *Ret = NULL;
if (UnsafeFPShrink && Callee->getName() == "pow" &&
TLI->has(LibFunc::powf)) {
@@ -1162,6 +1233,12 @@ struct PowOpt : public UnsafeFPLibCallOptimization {
hasUnaryFloatFn(TLI, Op1->getType(), LibFunc::exp2, LibFunc::exp2f,
LibFunc::exp2l))
return EmitUnaryFloatFnCall(Op2, "exp2", B, Callee->getAttributes());
+ // pow(10.0, x) -> exp10(x)
+ if (Op1C->isExactlyValue(10.0) &&
+ hasUnaryFloatFn(TLI, Op1->getType(), LibFunc::exp10, LibFunc::exp10f,
+ LibFunc::exp10l))
+ return EmitUnaryFloatFnCall(Op2, TLI->getName(LibFunc::exp10), B,
+ Callee->getAttributes());
}
ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2);
@@ -1204,7 +1281,8 @@ struct PowOpt : public UnsafeFPLibCallOptimization {
struct Exp2Opt : public UnsafeFPLibCallOptimization {
Exp2Opt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {}
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
Value *Ret = NULL;
if (UnsafeFPShrink && Callee->getName() == "exp2" &&
TLI->has(LibFunc::exp2f)) {
@@ -1222,37 +1300,37 @@ struct Exp2Opt : public UnsafeFPLibCallOptimization {
Value *Op = CI->getArgOperand(0);
// Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32
// Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32
- Value *LdExpArg = 0;
- if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) {
- if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32)
- LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty());
- } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) {
- if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32)
- LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty());
- }
+ LibFunc::Func LdExp = LibFunc::ldexpl;
+ if (Op->getType()->isFloatTy())
+ LdExp = LibFunc::ldexpf;
+ else if (Op->getType()->isDoubleTy())
+ LdExp = LibFunc::ldexp;
+
+ if (TLI->has(LdExp)) {
+ Value *LdExpArg = 0;
+ if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) {
+ if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32)
+ LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty());
+ } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) {
+ if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32)
+ LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty());
+ }
- if (LdExpArg) {
- const char *Name;
- if (Op->getType()->isFloatTy())
- Name = "ldexpf";
- else if (Op->getType()->isDoubleTy())
- Name = "ldexp";
- else
- Name = "ldexpl";
-
- Constant *One = ConstantFP::get(*Context, APFloat(1.0f));
- if (!Op->getType()->isFloatTy())
- One = ConstantExpr::getFPExtend(One, Op->getType());
-
- Module *M = Caller->getParent();
- Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
- Op->getType(),
- B.getInt32Ty(), NULL);
- CallInst *CI = B.CreateCall2(Callee, One, LdExpArg);
- if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
- CI->setCallingConv(F->getCallingConv());
+ if (LdExpArg) {
+ Constant *One = ConstantFP::get(*Context, APFloat(1.0f));
+ if (!Op->getType()->isFloatTy())
+ One = ConstantExpr::getFPExtend(One, Op->getType());
- return CI;
+ Module *M = Caller->getParent();
+ Value *Callee =
+ M->getOrInsertFunction(TLI->getName(LdExp), Op->getType(),
+ Op->getType(), B.getInt32Ty(), NULL);
+ CallInst *CI = B.CreateCall2(Callee, One, LdExpArg);
+ if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+ }
}
return Ret;
}
@@ -1261,7 +1339,8 @@ struct Exp2Opt : public UnsafeFPLibCallOptimization {
struct SinCosPiOpt : public LibCallOptimization {
SinCosPiOpt() {}
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
// Make sure the prototype is as expected, otherwise the rest of the
// function is probably invalid and likely to abort.
if (!isTrigLibCall(CI))
@@ -1277,9 +1356,8 @@ struct SinCosPiOpt : public LibCallOptimization {
// Look for all compatible sinpi, cospi and sincospi calls with the same
// argument. If there are enough (in some sense) we can make the
// substitution.
- for (Value::use_iterator UI = Arg->use_begin(), UE = Arg->use_end();
- UI != UE; ++UI)
- classifyArgUse(*UI, CI->getParent(), IsFloat, SinCalls, CosCalls,
+ for (User *U : Arg->users())
+ classifyArgUse(U, CI->getParent(), IsFloat, SinCalls, CosCalls,
SinCosCalls);
// It's only worthwhile if both sinpi and cospi are actually used.
@@ -1334,7 +1412,7 @@ struct SinCosPiOpt : public LibCallOptimization {
SinCalls.push_back(CI);
else if (Func == LibFunc::cospif)
CosCalls.push_back(CI);
- else if (Func == LibFunc::sincospi_stretf)
+ else if (Func == LibFunc::sincospif_stret)
SinCosCalls.push_back(CI);
} else {
if (Func == LibFunc::sinpi)
@@ -1363,7 +1441,7 @@ struct SinCosPiOpt : public LibCallOptimization {
Triple T(OrigCallee->getParent()->getTargetTriple());
if (UseFloat) {
- Name = "__sincospi_stretf";
+ Name = "__sincospif_stret";
assert(T.getArch() != Triple::x86 && "x86 messy and unsupported for now");
// x86_64 can't use {float, float} since that would be returned in both
@@ -1412,7 +1490,8 @@ struct SinCosPiOpt : public LibCallOptimization {
//===----------------------------------------------------------------------===//
struct FFSOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
FunctionType *FT = Callee->getFunctionType();
// Just make sure this has 2 arguments of the same FP type, which match the
// result type.
@@ -1445,8 +1524,9 @@ struct FFSOpt : public LibCallOptimization {
};
struct AbsOpt : public LibCallOptimization {
- virtual bool ignoreCallingConv() { return true; }
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ bool ignoreCallingConv() override { return true; }
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
FunctionType *FT = Callee->getFunctionType();
// We require integer(integer) where the types agree.
if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
@@ -1463,7 +1543,8 @@ struct AbsOpt : public LibCallOptimization {
};
struct IsDigitOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
FunctionType *FT = Callee->getFunctionType();
// We require integer(i32)
if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
@@ -1479,7 +1560,8 @@ struct IsDigitOpt : public LibCallOptimization {
};
struct IsAsciiOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
FunctionType *FT = Callee->getFunctionType();
// We require integer(i32)
if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
@@ -1494,7 +1576,8 @@ struct IsAsciiOpt : public LibCallOptimization {
};
struct ToAsciiOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
FunctionType *FT = Callee->getFunctionType();
// We require i32(i32)
if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
@@ -1514,7 +1597,8 @@ struct ToAsciiOpt : public LibCallOptimization {
struct ErrorReportingOpt : public LibCallOptimization {
ErrorReportingOpt(int S = -1) : StreamArg(S) {}
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &) override {
// Error reporting calls should be cold, mark them as such.
// This applies even to non-builtin calls: it is only a hint and applies to
// functions that the frontend might not understand as builtins.
@@ -1580,7 +1664,7 @@ struct PrintFOpt : public LibCallOptimization {
// printf("x") -> putchar('x'), even for '%'.
if (FormatStr.size() == 1) {
- Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TD, TLI);
+ Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, DL, TLI);
if (CI->use_empty() || !Res) return Res;
return B.CreateIntCast(Res, CI->getType(), true);
}
@@ -1592,7 +1676,7 @@ struct PrintFOpt : public LibCallOptimization {
// pass to be run after this pass, to merge duplicate strings.
FormatStr = FormatStr.drop_back();
Value *GV = B.CreateGlobalString(FormatStr, "str");
- Value *NewCI = EmitPutS(GV, B, TD, TLI);
+ Value *NewCI = EmitPutS(GV, B, DL, TLI);
return (CI->use_empty() || !NewCI) ?
NewCI :
ConstantInt::get(CI->getType(), FormatStr.size()+1);
@@ -1602,7 +1686,7 @@ struct PrintFOpt : public LibCallOptimization {
// printf("%c", chr) --> putchar(chr)
if (FormatStr == "%c" && CI->getNumArgOperands() > 1 &&
CI->getArgOperand(1)->getType()->isIntegerTy()) {
- Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD, TLI);
+ Value *Res = EmitPutChar(CI->getArgOperand(1), B, DL, TLI);
if (CI->use_empty() || !Res) return Res;
return B.CreateIntCast(Res, CI->getType(), true);
@@ -1611,12 +1695,13 @@ struct PrintFOpt : public LibCallOptimization {
// printf("%s\n", str) --> puts(str)
if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 &&
CI->getArgOperand(1)->getType()->isPointerTy()) {
- return EmitPutS(CI->getArgOperand(1), B, TD, TLI);
+ return EmitPutS(CI->getArgOperand(1), B, DL, TLI);
}
return 0;
}
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
// Require one fixed pointer argument and an integer/void result.
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
@@ -1660,11 +1745,11 @@ struct SPrintFOpt : public LibCallOptimization {
return 0; // we found a format specifier, bail out.
// These optimizations require DataLayout.
- if (!TD) return 0;
+ if (!DL) return 0;
// sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
- ConstantInt::get(TD->getIntPtrType(*Context), // Copy the
+ ConstantInt::get(DL->getIntPtrType(*Context), // Copy the
FormatStr.size() + 1), 1); // nul byte.
return ConstantInt::get(CI->getType(), FormatStr.size());
}
@@ -1690,12 +1775,12 @@ struct SPrintFOpt : public LibCallOptimization {
if (FormatStr[1] == 's') {
// These optimizations require DataLayout.
- if (!TD) return 0;
+ if (!DL) return 0;
// sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1)
if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0;
- Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD, TLI);
+ Value *Len = EmitStrLen(CI->getArgOperand(2), B, DL, TLI);
if (!Len)
return 0;
Value *IncLen = B.CreateAdd(Len,
@@ -1709,7 +1794,8 @@ struct SPrintFOpt : public LibCallOptimization {
return 0;
}
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
// Require two fixed pointer arguments and an integer result.
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
@@ -1760,12 +1846,12 @@ struct FPrintFOpt : public LibCallOptimization {
return 0; // We found a format specifier.
// These optimizations require DataLayout.
- if (!TD) return 0;
+ if (!DL) return 0;
return EmitFWrite(CI->getArgOperand(1),
- ConstantInt::get(TD->getIntPtrType(*Context),
+ ConstantInt::get(DL->getIntPtrType(*Context),
FormatStr.size()),
- CI->getArgOperand(0), B, TD, TLI);
+ CI->getArgOperand(0), B, DL, TLI);
}
// The remaining optimizations require the format string to be "%s" or "%c"
@@ -1778,19 +1864,20 @@ struct FPrintFOpt : public LibCallOptimization {
if (FormatStr[1] == 'c') {
// fprintf(F, "%c", chr) --> fputc(chr, F)
if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
- return EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI);
+ return EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, DL, TLI);
}
if (FormatStr[1] == 's') {
// fprintf(F, "%s", str) --> fputs(str, F)
if (!CI->getArgOperand(2)->getType()->isPointerTy())
return 0;
- return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI);
+ return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, DL, TLI);
}
return 0;
}
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
// Require two fixed paramters as pointers and integer result.
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
@@ -1818,7 +1905,8 @@ struct FPrintFOpt : public LibCallOptimization {
};
struct FWriteOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
ErrorReportingOpt ER(/* StreamArg = */ 3);
(void) ER.callOptimizer(Callee, CI, B);
@@ -1845,7 +1933,7 @@ struct FWriteOpt : public LibCallOptimization {
// This optimisation is only valid, if the return value is unused.
if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F)
Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char");
- Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, TD, TLI);
+ Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, DL, TLI);
return NewCI ? ConstantInt::get(CI->getType(), 1) : 0;
}
@@ -1854,12 +1942,13 @@ struct FWriteOpt : public LibCallOptimization {
};
struct FPutsOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
ErrorReportingOpt ER(/* StreamArg = */ 1);
(void) ER.callOptimizer(Callee, CI, B);
// These optimizations require DataLayout.
- if (!TD) return 0;
+ if (!DL) return 0;
// Require two pointers. Also, we can't optimize if return value is used.
FunctionType *FT = Callee->getFunctionType();
@@ -1873,13 +1962,14 @@ struct FPutsOpt : public LibCallOptimization {
if (!Len) return 0;
// Known to have no uses (see above).
return EmitFWrite(CI->getArgOperand(0),
- ConstantInt::get(TD->getIntPtrType(*Context), Len-1),
- CI->getArgOperand(1), B, TD, TLI);
+ ConstantInt::get(DL->getIntPtrType(*Context), Len-1),
+ CI->getArgOperand(1), B, DL, TLI);
}
};
struct PutsOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
// Require one fixed pointer argument and an integer/void result.
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
@@ -1894,7 +1984,7 @@ struct PutsOpt : public LibCallOptimization {
if (Str.empty() && CI->use_empty()) {
// puts("") -> putchar('\n')
- Value *Res = EmitPutChar(B.getInt32('\n'), B, TD, TLI);
+ Value *Res = EmitPutChar(B.getInt32('\n'), B, DL, TLI);
if (CI->use_empty() || !Res) return Res;
return B.CreateIntCast(Res, CI->getType(), true);
}
@@ -1908,7 +1998,7 @@ struct PutsOpt : public LibCallOptimization {
namespace llvm {
class LibCallSimplifierImpl {
- const DataLayout *TD;
+ const DataLayout *DL;
const TargetLibraryInfo *TLI;
const LibCallSimplifier *LCS;
bool UnsafeFPShrink;
@@ -1918,11 +2008,11 @@ class LibCallSimplifierImpl {
PowOpt Pow;
Exp2Opt Exp2;
public:
- LibCallSimplifierImpl(const DataLayout *TD, const TargetLibraryInfo *TLI,
+ LibCallSimplifierImpl(const DataLayout *DL, const TargetLibraryInfo *TLI,
const LibCallSimplifier *LCS,
bool UnsafeFPShrink = false)
: Cos(UnsafeFPShrink), Pow(UnsafeFPShrink), Exp2(UnsafeFPShrink) {
- this->TD = TD;
+ this->DL = DL;
this->TLI = TLI;
this->LCS = LCS;
this->UnsafeFPShrink = UnsafeFPShrink;
@@ -1975,6 +2065,7 @@ static MemSetOpt MemSet;
// Math library call optimizations.
static UnaryDoubleFPOpt UnaryDoubleFP(false);
+static BinaryDoubleFPOpt BinaryDoubleFP(false);
static UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
static SinCosPiOpt SinCosPi;
@@ -2144,6 +2235,11 @@ LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) {
if (UnsafeFPShrink && hasFloatVersion(FuncName))
return &UnsafeUnaryDoubleFP;
return 0;
+ case LibFunc::fmin:
+ case LibFunc::fmax:
+ if (hasFloatVersion(FuncName))
+ return &BinaryDoubleFP;
+ return 0;
case LibFunc::memcpy_chk:
return &MemCpyChk;
default:
@@ -2175,15 +2271,15 @@ Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) {
LibCallOptimization *LCO = lookupOptimization(CI);
if (LCO) {
IRBuilder<> Builder(CI);
- return LCO->optimizeCall(CI, TD, TLI, LCS, Builder);
+ return LCO->optimizeCall(CI, DL, TLI, LCS, Builder);
}
return 0;
}
-LibCallSimplifier::LibCallSimplifier(const DataLayout *TD,
+LibCallSimplifier::LibCallSimplifier(const DataLayout *DL,
const TargetLibraryInfo *TLI,
bool UnsafeFPShrink) {
- Impl = new LibCallSimplifierImpl(TD, TLI, this, UnsafeFPShrink);
+ Impl = new LibCallSimplifierImpl(DL, TLI, this, UnsafeFPShrink);
}
LibCallSimplifier::~LibCallSimplifier() {
@@ -2242,8 +2338,6 @@ void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) const {
// * sqrt(Nroot(x)) -> pow(x,1/(2*N))
// * sqrt(pow(x,y)) -> pow(|x|,y*0.5)
//
-// strchr:
-// * strchr(p, 0) -> strlen(p)
// tan, tanf, tanl:
// * tan(atan(x)) -> x
//
diff --git a/lib/Transforms/Utils/SpecialCaseList.cpp b/lib/Transforms/Utils/SpecialCaseList.cpp
index 2ef692c..c318560 100644
--- a/lib/Transforms/Utils/SpecialCaseList.cpp
+++ b/lib/Transforms/Utils/SpecialCaseList.cpp
@@ -15,9 +15,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/SpecialCaseList.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/IR/DerivedTypes.h"
@@ -55,7 +54,7 @@ SpecialCaseList *SpecialCaseList::create(
const StringRef Path, std::string &Error) {
if (Path.empty())
return new SpecialCaseList();
- OwningPtr<MemoryBuffer> File;
+ std::unique_ptr<MemoryBuffer> File;
if (error_code EC = MemoryBuffer::getFile(Path, File)) {
Error = (Twine("Can't open file '") + Path + "': " + EC.message()).str();
return 0;
@@ -65,10 +64,10 @@ SpecialCaseList *SpecialCaseList::create(
SpecialCaseList *SpecialCaseList::create(
const MemoryBuffer *MB, std::string &Error) {
- OwningPtr<SpecialCaseList> SCL(new SpecialCaseList());
+ std::unique_ptr<SpecialCaseList> SCL(new SpecialCaseList());
if (!SCL->parse(MB, Error))
return 0;
- return SCL.take();
+ return SCL.release();
}
SpecialCaseList *SpecialCaseList::createOrDie(const StringRef Path) {
diff --git a/lib/Transforms/Utils/Utils.cpp b/lib/Transforms/Utils/Utils.cpp
index c3df215..ed4f45c 100644
--- a/lib/Transforms/Utils/Utils.cpp
+++ b/lib/Transforms/Utils/Utils.cpp
@@ -13,14 +13,15 @@
//===----------------------------------------------------------------------===//
#include "llvm/InitializePasses.h"
-#include "llvm/PassRegistry.h"
#include "llvm-c/Initialization.h"
+#include "llvm/PassRegistry.h"
using namespace llvm;
/// initializeTransformUtils - Initialize all passes in the TransformUtils
/// library.
void llvm::initializeTransformUtils(PassRegistry &Registry) {
+ initializeAddDiscriminatorsPass(Registry);
initializeBreakCriticalEdgesPass(Registry);
initializeInstNamerPass(Registry);
initializeLCSSAPass(Registry);