aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Transforms/IPO
diff options
context:
space:
mode:
authorStephen Hines <srhines@google.com>2014-12-01 14:51:49 -0800
committerStephen Hines <srhines@google.com>2014-12-02 16:08:10 -0800
commit37ed9c199ca639565f6ce88105f9e39e898d82d0 (patch)
tree8fb36d3910e3ee4c4e1b7422f4f017108efc52f5 /lib/Transforms/IPO
parentd2327b22152ced7bc46dc629fc908959e8a52d03 (diff)
downloadexternal_llvm-37ed9c199ca639565f6ce88105f9e39e898d82d0.zip
external_llvm-37ed9c199ca639565f6ce88105f9e39e898d82d0.tar.gz
external_llvm-37ed9c199ca639565f6ce88105f9e39e898d82d0.tar.bz2
Update aosp/master LLVM for rebase to r222494.
Change-Id: Ic787f5e0124df789bd26f3f24680f45e678eef2d
Diffstat (limited to 'lib/Transforms/IPO')
-rw-r--r--lib/Transforms/IPO/ArgumentPromotion.cpp129
-rw-r--r--lib/Transforms/IPO/ConstantMerge.cpp2
-rw-r--r--lib/Transforms/IPO/DeadArgumentElimination.cpp28
-rw-r--r--lib/Transforms/IPO/ExtractGV.cpp10
-rw-r--r--lib/Transforms/IPO/FunctionAttrs.cpp30
-rw-r--r--lib/Transforms/IPO/GlobalDCE.cpp51
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp109
-rw-r--r--lib/Transforms/IPO/InlineAlways.cpp4
-rw-r--r--lib/Transforms/IPO/InlineSimple.cpp4
-rw-r--r--lib/Transforms/IPO/Inliner.cpp20
-rw-r--r--lib/Transforms/IPO/Internalize.cpp4
-rw-r--r--lib/Transforms/IPO/MergeFunctions.cpp91
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp210
-rw-r--r--lib/Transforms/IPO/StripSymbols.cpp27
14 files changed, 497 insertions, 222 deletions
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index f9de54a..c4706e8 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -78,11 +78,15 @@ namespace {
const DataLayout *DL;
private:
+ bool isDenselyPacked(Type *type);
+ bool canPaddingBeAccessed(Argument *Arg);
CallGraphNode *PromoteArguments(CallGraphNode *CGN);
bool isSafeToPromoteArgument(Argument *Arg, bool isByVal) const;
CallGraphNode *DoPromotion(Function *F,
- SmallPtrSet<Argument*, 8> &ArgsToPromote,
- SmallPtrSet<Argument*, 8> &ByValArgsToTransform);
+ SmallPtrSetImpl<Argument*> &ArgsToPromote,
+ SmallPtrSetImpl<Argument*> &ByValArgsToTransform);
+
+ using llvm::Pass::doInitialization;
bool doInitialization(CallGraph &CG) override;
/// The maximum number of elements to expand, or 0 for unlimited.
unsigned maxElements;
@@ -123,6 +127,78 @@ bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) {
return Changed;
}
+/// \brief Checks if a type could have padding bytes.
+bool ArgPromotion::isDenselyPacked(Type *type) {
+
+ // There is no size information, so be conservative.
+ if (!type->isSized())
+ return false;
+
+ // If the alloc size is not equal to the storage size, then there are padding
+ // bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128.
+ if (!DL || DL->getTypeSizeInBits(type) != DL->getTypeAllocSizeInBits(type))
+ return false;
+
+ if (!isa<CompositeType>(type))
+ return true;
+
+ // For homogenous sequential types, check for padding within members.
+ if (SequentialType *seqTy = dyn_cast<SequentialType>(type))
+ return isa<PointerType>(seqTy) || isDenselyPacked(seqTy->getElementType());
+
+ // Check for padding within and between elements of a struct.
+ StructType *StructTy = cast<StructType>(type);
+ const StructLayout *Layout = DL->getStructLayout(StructTy);
+ uint64_t StartPos = 0;
+ for (unsigned i = 0, E = StructTy->getNumElements(); i < E; ++i) {
+ Type *ElTy = StructTy->getElementType(i);
+ if (!isDenselyPacked(ElTy))
+ return false;
+ if (StartPos != Layout->getElementOffsetInBits(i))
+ return false;
+ StartPos += DL->getTypeAllocSizeInBits(ElTy);
+ }
+
+ return true;
+}
+
+/// \brief Checks if the padding bytes of an argument could be accessed.
+bool ArgPromotion::canPaddingBeAccessed(Argument *arg) {
+
+ assert(arg->hasByValAttr());
+
+ // Track all the pointers to the argument to make sure they are not captured.
+ SmallPtrSet<Value *, 16> PtrValues;
+ PtrValues.insert(arg);
+
+ // Track all of the stores.
+ SmallVector<StoreInst *, 16> Stores;
+
+ // Scan through the uses recursively to make sure the pointer is always used
+ // sanely.
+ SmallVector<Value *, 16> WorkList;
+ WorkList.insert(WorkList.end(), arg->user_begin(), arg->user_end());
+ while (!WorkList.empty()) {
+ Value *V = WorkList.back();
+ WorkList.pop_back();
+ if (isa<GetElementPtrInst>(V) || isa<PHINode>(V)) {
+ if (PtrValues.insert(V).second)
+ WorkList.insert(WorkList.end(), V->user_begin(), V->user_end());
+ } else if (StoreInst *Store = dyn_cast<StoreInst>(V)) {
+ Stores.push_back(Store);
+ } else if (!isa<LoadInst>(V)) {
+ return true;
+ }
+ }
+
+// Check to make sure the pointers aren't captured
+ for (StoreInst *Store : Stores)
+ if (PtrValues.count(Store->getValueOperand()))
+ return true;
+
+ return false;
+}
+
/// PromoteArguments - This method checks the specified function to see if there
/// are any promotable arguments and if it is safe to promote the function (for
/// example, all callers are direct). If safe to promote some arguments, it
@@ -154,6 +230,13 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
isSelfRecursive = true;
}
+ // Don't promote arguments for variadic functions. Adding, removing, or
+ // changing non-pack parameters can change the classification of pack
+ // parameters. Frontends encode that classification at the call site in the
+ // IR, while in the callee the classification is determined dynamically based
+ // on the number of registers consumed so far.
+ if (F->isVarArg()) return nullptr;
+
// Check to see which arguments are promotable. If an argument is promotable,
// add it to ArgsToPromote.
SmallPtrSet<Argument*, 8> ArgsToPromote;
@@ -163,9 +246,13 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();
// If this is a byval argument, and if the aggregate type is small, just
- // pass the elements, which is always safe. This does not apply to
- // inalloca.
- if (PtrArg->hasByValAttr()) {
+ // pass the elements, which is always safe, if the passed value is densely
+ // packed or if we can prove the padding bytes are never accessed. This does
+ // not apply to inalloca.
+ bool isSafeToPromote =
+ PtrArg->hasByValAttr() &&
+ (isDenselyPacked(AgTy) || !canPaddingBeAccessed(PtrArg));
+ if (isSafeToPromote) {
if (StructType *STy = dyn_cast<StructType>(AgTy)) {
if (maxElements > 0 && STy->getNumElements() > maxElements) {
DEBUG(dbgs() << "argpromotion disable promoting argument '"
@@ -443,7 +530,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
// of elements of the aggregate.
return false;
}
- ToPromote.insert(Operands);
+ ToPromote.insert(std::move(Operands));
}
}
@@ -475,10 +562,8 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
// loading block.
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
BasicBlock *P = *PI;
- for (idf_ext_iterator<BasicBlock*, SmallPtrSet<BasicBlock*, 16> >
- I = idf_ext_begin(P, TranspBlocks),
- E = idf_ext_end(P, TranspBlocks); I != E; ++I)
- if (AA.canBasicBlockModify(**I, Loc))
+ for (BasicBlock *TranspBB : inverse_depth_first_ext(P, TranspBlocks))
+ if (AA.canBasicBlockModify(*TranspBB, Loc))
return false;
}
}
@@ -493,8 +578,8 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
/// arguments, and returns the new function. At this point, we know that it's
/// safe to do so.
CallGraphNode *ArgPromotion::DoPromotion(Function *F,
- SmallPtrSet<Argument*, 8> &ArgsToPromote,
- SmallPtrSet<Argument*, 8> &ByValArgsToTransform) {
+ SmallPtrSetImpl<Argument*> &ArgsToPromote,
+ SmallPtrSetImpl<Argument*> &ByValArgsToTransform) {
// Start by computing a new prototype for the function, which is the same as
// the old function, but has modified arguments.
@@ -615,9 +700,15 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
// Patch the pointer to LLVM function in debug info descriptor.
auto DI = FunctionDIs.find(F);
- if (DI != FunctionDIs.end())
- DI->second.replaceFunction(NF);
-
+ if (DI != FunctionDIs.end()) {
+ DISubprogram SP = DI->second;
+ SP.replaceFunction(NF);
+ // Ensure the map is updated so it can be reused on subsequent argument
+ // promotions of the same function.
+ FunctionDIs.erase(DI);
+ FunctionDIs[NF] = SP;
+ }
+
DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n"
<< "From: " << *F);
@@ -716,9 +807,11 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
// of the previous load.
LoadInst *newLoad = new LoadInst(V, V->getName()+".val", Call);
newLoad->setAlignment(OrigLoad->getAlignment());
- // Transfer the TBAA info too.
- newLoad->setMetadata(LLVMContext::MD_tbaa,
- OrigLoad->getMetadata(LLVMContext::MD_tbaa));
+ // Transfer the AA info too.
+ AAMDNodes AAInfo;
+ OrigLoad->getAAMetadata(AAInfo);
+ newLoad->setAAMetadata(AAInfo);
+
Args.push_back(newLoad);
AA.copyValue(OrigLoad, Args.back());
}
diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp
index 23be081..0b6ade9 100644
--- a/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/lib/Transforms/IPO/ConstantMerge.cpp
@@ -66,7 +66,7 @@ ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); }
/// Find values that are marked as llvm.used.
static void FindUsedValues(GlobalVariable *LLVMUsed,
- SmallPtrSet<const GlobalValue*, 8> &UsedValues) {
+ SmallPtrSetImpl<const GlobalValue*> &UsedValues) {
if (!LLVMUsed) return;
ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index ac3853d..4045c09 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -199,10 +199,15 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
return false;
// Okay, we know we can transform this function if safe. Scan its body
- // looking for calls to llvm.vastart.
+ // looking for calls marked musttail or calls to llvm.vastart.
for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ CallInst *CI = dyn_cast<CallInst>(I);
+ if (!CI)
+ continue;
+ if (CI->isMustTailCall())
+ return false;
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
if (II->getIntrinsicID() == Intrinsic::vastart)
return false;
}
@@ -297,8 +302,14 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
// Patch the pointer to LLVM function in debug info descriptor.
auto DI = FunctionDIs.find(&Fn);
- if (DI != FunctionDIs.end())
- DI->second.replaceFunction(NF);
+ if (DI != FunctionDIs.end()) {
+ DISubprogram SP = DI->second;
+ SP.replaceFunction(NF);
+ // Ensure the map is updated so it can be reused on non-varargs argument
+ // eliminations of the same function.
+ FunctionDIs.erase(DI);
+ FunctionDIs[NF] = SP;
+ }
// Fix up any BlockAddresses that refer to the function.
Fn.replaceAllUsesWith(ConstantExpr::getBitCast(NF, Fn.getType()));
@@ -1088,8 +1099,8 @@ bool DAE::runOnModule(Module &M) {
// determine that dead arguments passed into recursive functions are dead).
//
DEBUG(dbgs() << "DAE - Determining liveness\n");
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
- SurveyFunction(*I);
+ for (auto &F : M)
+ SurveyFunction(F);
// Now, remove all dead arguments and return values from each function in
// turn.
@@ -1102,11 +1113,8 @@ bool DAE::runOnModule(Module &M) {
// Finally, look for any unused parameters in functions with non-local
// linkage and replace the passed in parameters with undef.
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
- Function& F = *I;
-
+ for (auto &F : M)
Changed |= RemoveDeadArgumentsFromCallers(F);
- }
return Changed;
}
diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp
index 40ec9fa..2f8c7d9 100644
--- a/lib/Transforms/IPO/ExtractGV.cpp
+++ b/lib/Transforms/IPO/ExtractGV.cpp
@@ -91,7 +91,7 @@ namespace {
continue;
}
- makeVisible(*I, Delete);
+ makeVisible(*I, Delete);
if (Delete)
I->setInitializer(nullptr);
@@ -106,7 +106,7 @@ namespace {
continue;
}
- makeVisible(*I, Delete);
+ makeVisible(*I, Delete);
if (Delete)
I->deleteBody();
@@ -118,8 +118,8 @@ namespace {
Module::alias_iterator CurI = I;
++I;
- bool Delete = deleteStuff == (bool)Named.count(CurI);
- makeVisible(*CurI, Delete);
+ bool Delete = deleteStuff == (bool)Named.count(CurI);
+ makeVisible(*CurI, Delete);
if (Delete) {
Type *Ty = CurI->getType()->getElementType();
@@ -148,7 +148,7 @@ namespace {
char GVExtractorPass::ID = 0;
}
-ModulePass *llvm::createGVExtractionPass(std::vector<GlobalValue*>& GVs,
+ModulePass *llvm::createGVExtractionPass(std::vector<GlobalValue *> &GVs,
bool deleteFn) {
return new GVExtractorPass(GVs, deleteFn);
}
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 8174df9..823ae53 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -161,8 +161,9 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
Function *F = (*I)->getFunction();
- if (!F)
- // External node - may write memory. Just give up.
+ if (!F || F->hasFnAttribute(Attribute::OptimizeNone))
+ // External node or node we don't want to optimize - assume it may write
+ // memory and give up.
return false;
AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(F);
@@ -204,9 +205,11 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
CI != CE; ++CI) {
Value *Arg = *CI;
if (Arg->getType()->isPointerTy()) {
+ AAMDNodes AAInfo;
+ I->getAAMetadata(AAInfo);
+
AliasAnalysis::Location Loc(Arg,
- AliasAnalysis::UnknownSize,
- I->getMetadata(LLVMContext::MD_tbaa));
+ AliasAnalysis::UnknownSize, AAInfo);
if (!AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) {
if (MRB & AliasAnalysis::Mod)
// Writes non-local memory. Give up.
@@ -443,7 +446,7 @@ determinePointerReadAttrs(Argument *A,
case Instruction::AddrSpaceCast:
// The original value is not read/written via this if the new value isn't.
for (Use &UU : I->uses())
- if (Visited.insert(&UU))
+ if (Visited.insert(&UU).second)
Worklist.push_back(&UU);
break;
@@ -457,7 +460,7 @@ determinePointerReadAttrs(Argument *A,
auto AddUsersToWorklistIfCapturing = [&] {
if (Captures)
for (Use &UU : I->uses())
- if (Visited.insert(&UU))
+ if (Visited.insert(&UU).second)
Worklist.push_back(&UU);
};
@@ -525,7 +528,8 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
// looking up whether a given CallGraphNode is in this SCC.
for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
Function *F = (*I)->getFunction();
- if (F && !F->isDeclaration() && !F->mayBeOverridden())
+ if (F && !F->isDeclaration() && !F->mayBeOverridden() &&
+ !F->hasFnAttribute(Attribute::OptimizeNone))
SCCNodes.insert(F);
}
@@ -539,8 +543,9 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
Function *F = (*I)->getFunction();
- if (!F)
- // External node - only a problem for arguments that we pass to it.
+ if (!F || F->hasFnAttribute(Attribute::OptimizeNone))
+ // External node or function we're trying not to optimize - only a problem
+ // for arguments that we pass to it.
continue;
// Definitions with weak linkage may be overridden at linktime with
@@ -792,8 +797,8 @@ bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) {
for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
Function *F = (*I)->getFunction();
- if (!F)
- // External node - skip it;
+ if (!F || F->hasFnAttribute(Attribute::OptimizeNone))
+ // External node or node we don't want to optimize - skip it;
return false;
// Already noalias.
@@ -832,6 +837,9 @@ bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) {
/// given function and set any applicable attributes. Returns true
/// if any attributes were set and false otherwise.
bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
+ if (F.hasFnAttribute(Attribute::OptimizeNone))
+ return false;
+
FunctionType *FTy = F.getFunctionType();
LibFunc::Func TheLibFunc;
if (!(TLI->getLibFunc(F.getName(), TheLibFunc) && TLI->has(TheLibFunc)))
diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp
index 7e7a4c0..705e929 100644
--- a/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/lib/Transforms/IPO/GlobalDCE.cpp
@@ -22,6 +22,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/Transforms/Utils/CtorUtils.h"
+#include "llvm/Transforms/Utils/GlobalStatus.h"
#include "llvm/Pass.h"
using namespace llvm;
@@ -77,9 +78,6 @@ bool GlobalDCE::runOnModule(Module &M) {
// Remove empty functions from the global ctors list.
Changed |= optimizeGlobalCtorsList(M, isEmptyFunction);
- typedef std::multimap<const Comdat *, GlobalValue *> ComdatGVPairsTy;
- ComdatGVPairsTy ComdatGVPairs;
-
// Loop over the module, adding globals which are obviously necessary.
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
Changed |= RemoveUnusedGlobalValue(*I);
@@ -87,8 +85,6 @@ bool GlobalDCE::runOnModule(Module &M) {
if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage()) {
if (!I->isDiscardableIfUnused())
GlobalIsNeeded(I);
- else if (const Comdat *C = I->getComdat())
- ComdatGVPairs.insert(std::make_pair(C, I));
}
}
@@ -100,8 +96,6 @@ bool GlobalDCE::runOnModule(Module &M) {
if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage()) {
if (!I->isDiscardableIfUnused())
GlobalIsNeeded(I);
- else if (const Comdat *C = I->getComdat())
- ComdatGVPairs.insert(std::make_pair(C, I));
}
}
@@ -111,24 +105,7 @@ bool GlobalDCE::runOnModule(Module &M) {
// Externally visible aliases are needed.
if (!I->isDiscardableIfUnused()) {
GlobalIsNeeded(I);
- } else if (const Comdat *C = I->getComdat()) {
- ComdatGVPairs.insert(std::make_pair(C, I));
- }
- }
-
- for (ComdatGVPairsTy::iterator I = ComdatGVPairs.begin(),
- E = ComdatGVPairs.end();
- I != E;) {
- ComdatGVPairsTy::iterator UB = ComdatGVPairs.upper_bound(I->first);
- bool CanDiscard = std::all_of(I, UB, [](ComdatGVPairsTy::value_type Pair) {
- return Pair.second->isDiscardableIfUnused();
- });
- if (!CanDiscard) {
- std::for_each(I, UB, [this](ComdatGVPairsTy::value_type Pair) {
- GlobalIsNeeded(Pair.second);
- });
}
- I = UB;
}
// Now that all globals which are needed are in the AliveGlobals set, we loop
@@ -141,7 +118,12 @@ bool GlobalDCE::runOnModule(Module &M) {
I != E; ++I)
if (!AliveGlobals.count(I)) {
DeadGlobalVars.push_back(I); // Keep track of dead globals
- I->setInitializer(nullptr);
+ if (I->hasInitializer()) {
+ Constant *Init = I->getInitializer();
+ I->setInitializer(nullptr);
+ if (isSafeToDestroyConstant(Init))
+ Init->destroyConstant();
+ }
}
// The second pass drops the bodies of functions which are dead...
@@ -203,9 +185,22 @@ bool GlobalDCE::runOnModule(Module &M) {
/// recursively mark anything that it uses as also needed.
void GlobalDCE::GlobalIsNeeded(GlobalValue *G) {
// If the global is already in the set, no need to reprocess it.
- if (!AliveGlobals.insert(G))
+ if (!AliveGlobals.insert(G).second)
return;
-
+
+ Module *M = G->getParent();
+ if (Comdat *C = G->getComdat()) {
+ for (Function &F : *M)
+ if (F.getComdat() == C)
+ GlobalIsNeeded(&F);
+ for (GlobalVariable &GV : M->globals())
+ if (GV.getComdat() == C)
+ GlobalIsNeeded(&GV);
+ for (GlobalAlias &GA : M->aliases())
+ if (GA.getComdat() == C)
+ GlobalIsNeeded(&GA);
+ }
+
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(G)) {
// If this is a global variable, we must make sure to add any global values
// referenced by the initializer to the alive set.
@@ -243,7 +238,7 @@ void GlobalDCE::MarkUsedGlobalsAsNeeded(Constant *C) {
for (User::op_iterator I = C->op_begin(), E = C->op_end(); I != E; ++I) {
// If we've already processed this constant there's no need to do it again.
Constant *Op = dyn_cast<Constant>(*I);
- if (Op && SeenConstants.insert(Op))
+ if (Op && SeenConstants.insert(Op).second)
MarkUsedGlobalsAsNeeded(Op);
}
}
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index c1d0d3b..6e0ae83 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -88,6 +88,7 @@ namespace {
const DataLayout *DL;
TargetLibraryInfo *TLI;
+ SmallSet<const Comdat *, 8> NotDiscardableComdats;
};
}
@@ -612,7 +613,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
/// value will trap if the value is dynamically null. PHIs keeps track of any
/// phi nodes we've seen to avoid reprocessing them.
static bool AllUsesOfValueWillTrapIfNull(const Value *V,
- SmallPtrSet<const PHINode*, 8> &PHIs) {
+ SmallPtrSetImpl<const PHINode*> &PHIs) {
for (const User *U : V->users())
if (isa<LoadInst>(U)) {
// Will trap.
@@ -638,7 +639,7 @@ static bool AllUsesOfValueWillTrapIfNull(const Value *V,
} else if (const PHINode *PN = dyn_cast<PHINode>(U)) {
// If we've already seen this phi node, ignore it, it has already been
// checked.
- if (PHIs.insert(PN) && !AllUsesOfValueWillTrapIfNull(PN, PHIs))
+ if (PHIs.insert(PN).second && !AllUsesOfValueWillTrapIfNull(PN, PHIs))
return false;
} else if (isa<ICmpInst>(U) &&
isa<ConstantPointerNull>(U->getOperand(1))) {
@@ -957,7 +958,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
/// it is to the specified global.
static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
const GlobalVariable *GV,
- SmallPtrSet<const PHINode*, 8> &PHIs) {
+ SmallPtrSetImpl<const PHINode*> &PHIs) {
for (const User *U : V->users()) {
const Instruction *Inst = cast<Instruction>(U);
@@ -981,7 +982,7 @@ static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
if (const PHINode *PN = dyn_cast<PHINode>(Inst)) {
// PHIs are ok if all uses are ok. Don't infinitely recurse through PHI
// cycles.
- if (PHIs.insert(PN))
+ if (PHIs.insert(PN).second)
if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(PN, GV, PHIs))
return false;
continue;
@@ -1047,8 +1048,8 @@ static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
/// of a load) are simple enough to perform heap SRA on. This permits GEP's
/// that index through the array and struct field, icmps of null, and PHIs.
static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V,
- SmallPtrSet<const PHINode*, 32> &LoadUsingPHIs,
- SmallPtrSet<const PHINode*, 32> &LoadUsingPHIsPerLoad) {
+ SmallPtrSetImpl<const PHINode*> &LoadUsingPHIs,
+ SmallPtrSetImpl<const PHINode*> &LoadUsingPHIsPerLoad) {
// We permit two users of the load: setcc comparing against the null
// pointer, and a getelementptr of a specific form.
for (const User *U : V->users()) {
@@ -1072,11 +1073,11 @@ static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V,
}
if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
- if (!LoadUsingPHIsPerLoad.insert(PN))
+ if (!LoadUsingPHIsPerLoad.insert(PN).second)
// This means some phi nodes are dependent on each other.
// Avoid infinite looping!
return false;
- if (!LoadUsingPHIs.insert(PN))
+ if (!LoadUsingPHIs.insert(PN).second)
// If we have already analyzed this PHI, then it is safe.
continue;
@@ -1115,9 +1116,7 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV,
// that all inputs the to the PHI nodes are in the same equivalence sets.
// Check to verify that all operands of the PHIs are either PHIS that can be
// transformed, loads from GV, or MI itself.
- for (SmallPtrSet<const PHINode*, 32>::const_iterator I = LoadUsingPHIs.begin()
- , E = LoadUsingPHIs.end(); I != E; ++I) {
- const PHINode *PN = *I;
+ for (const PHINode *PN : LoadUsingPHIs) {
for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) {
Value *InVal = PN->getIncomingValue(op);
@@ -1910,8 +1909,11 @@ bool GlobalOpt::OptimizeFunctions(Module &M) {
// Functions without names cannot be referenced outside this module.
if (!F->hasName() && !F->isDeclaration() && !F->hasLocalLinkage())
F->setLinkage(GlobalValue::InternalLinkage);
+
+ const Comdat *C = F->getComdat();
+ bool inComdat = C && NotDiscardableComdats.count(C);
F->removeDeadConstantUsers();
- if (F->isDefTriviallyDead()) {
+ if ((!inComdat || F->hasLocalLinkage()) && F->isDefTriviallyDead()) {
F->eraseFromParent();
Changed = true;
++NumFnDeleted;
@@ -1943,12 +1945,6 @@ bool GlobalOpt::OptimizeFunctions(Module &M) {
bool GlobalOpt::OptimizeGlobalVars(Module &M) {
bool Changed = false;
- SmallSet<const Comdat *, 8> NotDiscardableComdats;
- for (const GlobalVariable &GV : M.globals())
- if (const Comdat *C = GV.getComdat())
- if (!GV.isDiscardableIfUnused())
- NotDiscardableComdats.insert(C);
-
for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
GVI != E; ) {
GlobalVariable *GV = GVI++;
@@ -1965,7 +1961,7 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) {
if (GV->isDiscardableIfUnused()) {
if (const Comdat *C = GV->getComdat())
- if (NotDiscardableComdats.count(C))
+ if (NotDiscardableComdats.count(C) && !GV->hasLocalLinkage())
continue;
Changed |= ProcessGlobal(GV, GVI);
}
@@ -1975,7 +1971,7 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) {
static inline bool
isSimpleEnoughValueToCommit(Constant *C,
- SmallPtrSet<Constant*, 8> &SimpleConstants,
+ SmallPtrSetImpl<Constant*> &SimpleConstants,
const DataLayout *DL);
@@ -1988,7 +1984,7 @@ isSimpleEnoughValueToCommit(Constant *C,
/// in SimpleConstants to avoid having to rescan the same constants all the
/// time.
static bool isSimpleEnoughValueToCommitHelper(Constant *C,
- SmallPtrSet<Constant*, 8> &SimpleConstants,
+ SmallPtrSetImpl<Constant*> &SimpleConstants,
const DataLayout *DL) {
// Simple global addresses are supported, do not allow dllimport or
// thread-local globals.
@@ -2046,10 +2042,11 @@ static bool isSimpleEnoughValueToCommitHelper(Constant *C,
static inline bool
isSimpleEnoughValueToCommit(Constant *C,
- SmallPtrSet<Constant*, 8> &SimpleConstants,
+ SmallPtrSetImpl<Constant*> &SimpleConstants,
const DataLayout *DL) {
// If we already checked this constant, we win.
- if (!SimpleConstants.insert(C)) return true;
+ if (!SimpleConstants.insert(C).second)
+ return true;
// Check the constant.
return isSimpleEnoughValueToCommitHelper(C, SimpleConstants, DL);
}
@@ -2217,7 +2214,7 @@ public:
return MutatedMemory;
}
- const SmallPtrSet<GlobalVariable*, 8> &getInvariants() const {
+ const SmallPtrSetImpl<GlobalVariable*> &getInvariants() const {
return Invariants;
}
@@ -2394,6 +2391,17 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
getVal(SI->getOperand(2)));
DEBUG(dbgs() << "Found a Select! Simplifying: " << *InstResult
<< "\n");
+ } else if (auto *EVI = dyn_cast<ExtractValueInst>(CurInst)) {
+ InstResult = ConstantExpr::getExtractValue(
+ getVal(EVI->getAggregateOperand()), EVI->getIndices());
+ DEBUG(dbgs() << "Found an ExtractValueInst! Simplifying: " << *InstResult
+ << "\n");
+ } else if (auto *IVI = dyn_cast<InsertValueInst>(CurInst)) {
+ InstResult = ConstantExpr::getInsertValue(
+ getVal(IVI->getAggregateOperand()),
+ getVal(IVI->getInsertedValueOperand()), IVI->getIndices());
+ DEBUG(dbgs() << "Found an InsertValueInst! Simplifying: " << *InstResult
+ << "\n");
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) {
Constant *P = getVal(GEP->getOperand(0));
SmallVector<Constant*, 8> GEPOps;
@@ -2663,7 +2671,7 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
// Okay, we succeeded in evaluating this control flow. See if we have
// executed the new block before. If so, we have a looping function,
// which we cannot evaluate in reasonable time.
- if (!ExecutedBlocks.insert(NextBB))
+ if (!ExecutedBlocks.insert(NextBB).second)
return false; // looped!
// Okay, we have never been in this block before. Check to see if there
@@ -2700,10 +2708,8 @@ static bool EvaluateStaticConstructor(Function *F, const DataLayout *DL,
Eval.getMutatedMemory().begin(), E = Eval.getMutatedMemory().end();
I != E; ++I)
CommitValueTo(I->second, I->first);
- for (SmallPtrSet<GlobalVariable*, 8>::const_iterator I =
- Eval.getInvariants().begin(), E = Eval.getInvariants().end();
- I != E; ++I)
- (*I)->setConstant(true);
+ for (GlobalVariable *GV : Eval.getInvariants())
+ GV->setConstant(true);
}
return EvalSuccess;
@@ -2714,7 +2720,7 @@ static int compareNames(Constant *const *A, Constant *const *B) {
}
static void setUsedInitializer(GlobalVariable &V,
- SmallPtrSet<GlobalValue *, 8> Init) {
+ const SmallPtrSet<GlobalValue *, 8> &Init) {
if (Init.empty()) {
V.eraseFromParent();
return;
@@ -2724,10 +2730,9 @@ static void setUsedInitializer(GlobalVariable &V,
PointerType *Int8PtrTy = Type::getInt8PtrTy(V.getContext(), 0);
SmallVector<llvm::Constant *, 8> UsedArray;
- for (SmallPtrSet<GlobalValue *, 8>::iterator I = Init.begin(), E = Init.end();
- I != E; ++I) {
+ for (GlobalValue *GV : Init) {
Constant *Cast
- = ConstantExpr::getPointerBitCastOrAddrSpaceCast(*I, Int8PtrTy);
+ = ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, Int8PtrTy);
UsedArray.push_back(Cast);
}
// Sort to get deterministic order.
@@ -2758,18 +2763,27 @@ public:
CompilerUsedV = collectUsedGlobalVariables(M, CompilerUsed, true);
}
typedef SmallPtrSet<GlobalValue *, 8>::iterator iterator;
+ typedef iterator_range<iterator> used_iterator_range;
iterator usedBegin() { return Used.begin(); }
iterator usedEnd() { return Used.end(); }
+ used_iterator_range used() {
+ return used_iterator_range(usedBegin(), usedEnd());
+ }
iterator compilerUsedBegin() { return CompilerUsed.begin(); }
iterator compilerUsedEnd() { return CompilerUsed.end(); }
+ used_iterator_range compilerUsed() {
+ return used_iterator_range(compilerUsedBegin(), compilerUsedEnd());
+ }
bool usedCount(GlobalValue *GV) const { return Used.count(GV); }
bool compilerUsedCount(GlobalValue *GV) const {
return CompilerUsed.count(GV);
}
bool usedErase(GlobalValue *GV) { return Used.erase(GV); }
bool compilerUsedErase(GlobalValue *GV) { return CompilerUsed.erase(GV); }
- bool usedInsert(GlobalValue *GV) { return Used.insert(GV); }
- bool compilerUsedInsert(GlobalValue *GV) { return CompilerUsed.insert(GV); }
+ bool usedInsert(GlobalValue *GV) { return Used.insert(GV).second; }
+ bool compilerUsedInsert(GlobalValue *GV) {
+ return CompilerUsed.insert(GV).second;
+ }
void syncVariablesAndSets() {
if (UsedV)
@@ -2814,7 +2828,8 @@ static bool mayHaveOtherReferences(GlobalAlias &GA, const LLVMUsed &U) {
return U.usedCount(&GA) || U.compilerUsedCount(&GA);
}
-static bool hasUsesToReplace(GlobalAlias &GA, LLVMUsed &U, bool &RenameTarget) {
+static bool hasUsesToReplace(GlobalAlias &GA, const LLVMUsed &U,
+ bool &RenameTarget) {
RenameTarget = false;
bool Ret = false;
if (hasUseOtherThanLLVMUsed(GA, U))
@@ -2849,10 +2864,8 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
bool Changed = false;
LLVMUsed Used(M);
- for (SmallPtrSet<GlobalValue *, 8>::iterator I = Used.usedBegin(),
- E = Used.usedEnd();
- I != E; ++I)
- Used.compilerUsedErase(*I);
+ for (GlobalValue *GV : Used.used())
+ Used.compilerUsedErase(GV);
for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
I != E;) {
@@ -2963,7 +2976,7 @@ static bool cxxDtorIsEmpty(const Function &Fn,
SmallPtrSet<const Function *, 8> NewCalledFunctions(CalledFunctions);
// Don't treat recursive functions as empty.
- if (!NewCalledFunctions.insert(CalledFn))
+ if (!NewCalledFunctions.insert(CalledFn).second)
return false;
if (!cxxDtorIsEmpty(*CalledFn, NewCalledFunctions))
@@ -3035,6 +3048,20 @@ bool GlobalOpt::runOnModule(Module &M) {
while (LocalChange) {
LocalChange = false;
+ NotDiscardableComdats.clear();
+ for (const GlobalVariable &GV : M.globals())
+ if (const Comdat *C = GV.getComdat())
+ if (!GV.isDiscardableIfUnused() || !GV.use_empty())
+ NotDiscardableComdats.insert(C);
+ for (Function &F : M)
+ if (const Comdat *C = F.getComdat())
+ if (!F.isDefTriviallyDead())
+ NotDiscardableComdats.insert(C);
+ for (GlobalAlias &GA : M.aliases())
+ if (const Comdat *C = GA.getComdat())
+ if (!GA.isDiscardableIfUnused() || !GA.use_empty())
+ NotDiscardableComdats.insert(C);
+
// Delete functions that are trivially dead, ccc -> fastcc
LocalChange |= OptimizeFunctions(M);
diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp
index 624cb90..819b2e0 100644
--- a/lib/Transforms/IPO/InlineAlways.cpp
+++ b/lib/Transforms/IPO/InlineAlways.cpp
@@ -14,6 +14,8 @@
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/IR/CallSite.h"
@@ -65,6 +67,8 @@ public:
char AlwaysInliner::ID = 0;
INITIALIZE_PASS_BEGIN(AlwaysInliner, "always-inline",
"Inliner for always_inline functions", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis)
INITIALIZE_PASS_END(AlwaysInliner, "always-inline",
diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp
index d189756..d9a2b9e 100644
--- a/lib/Transforms/IPO/InlineSimple.cpp
+++ b/lib/Transforms/IPO/InlineSimple.cpp
@@ -12,6 +12,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/IR/CallSite.h"
@@ -73,6 +75,8 @@ static int computeThresholdFromOptLevels(unsigned OptLevel,
char SimpleInliner::ID = 0;
INITIALIZE_PASS_BEGIN(SimpleInliner, "inline",
"Function Integration/Inlining", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis)
INITIALIZE_PASS_END(SimpleInliner, "inline",
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index 9087ab2..3abe7a8 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -16,6 +16,8 @@
#include "llvm/Transforms/IPO/InlinerPass.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/IR/CallSite.h"
@@ -74,6 +76,8 @@ Inliner::Inliner(char &ID, int Threshold, bool InsertLifetime)
/// the call graph. If the derived class implements this method, it should
/// always explicitly call the implementation here.
void Inliner::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AssumptionTracker>();
CallGraphSCCPass::getAnalysisUsage(AU);
}
@@ -215,7 +219,7 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
// If the inlined function already uses this alloca then we can't reuse
// it.
- if (!UsedAllocas.insert(AvailableAlloca))
+ if (!UsedAllocas.insert(AvailableAlloca).second)
continue;
// Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare
@@ -357,8 +361,7 @@ bool Inliner::shouldInline(CallSite CS) {
// FIXME: All of this logic should be sunk into getInlineCost. It relies on
// the internal implementation of the inline cost metrics rather than
// treating them as truly abstract units etc.
- if (Caller->hasLocalLinkage() ||
- Caller->getLinkage() == GlobalValue::LinkOnceODRLinkage) {
+ if (Caller->hasLocalLinkage() || Caller->hasLinkOnceODRLinkage()) {
int TotalSecondaryCost = 0;
// The candidate cost to be imposed upon the current function.
int CandidateCost = IC.getCost() - (InlineConstants::CallPenalty + 1);
@@ -440,9 +443,11 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID,
bool Inliner::runOnSCC(CallGraphSCC &SCC) {
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
+ AssumptionTracker *AT = &getAnalysis<AssumptionTracker>();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
const TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
+ AliasAnalysis *AA = &getAnalysis<AliasAnalysis>();
SmallPtrSet<Function*, 8> SCCFunctions;
DEBUG(dbgs() << "Inliner visiting SCC:");
@@ -501,7 +506,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
InlinedArrayAllocasTy InlinedArrayAllocas;
- InlineFunctionInfo InlineInfo(&CG, DL);
+ InlineFunctionInfo InlineInfo(&CG, DL, AA, AT);
// Now that we have all of the call sites, loop over them and inline them if
// it looks profitable to do so.
@@ -664,6 +669,13 @@ bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) {
if (!F->isDefTriviallyDead())
continue;
+
+ // It is unsafe to drop a function with discardable linkage from a COMDAT
+ // without also dropping the other members of the COMDAT.
+ // The inliner doesn't visit non-function entities which are in COMDAT
+ // groups so it is unsafe to do so *unless* the linkage is local.
+ if (!F->hasLocalLinkage() && F->hasComdat())
+ continue;
// Remove any call graph edges from the function to its callees.
CGN->removeAllCalledFunctions();
diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp
index c970a1a..7950163 100644
--- a/lib/Transforms/IPO/Internalize.cpp
+++ b/lib/Transforms/IPO/Internalize.cpp
@@ -148,9 +148,7 @@ bool InternalizePass::runOnModule(Module &M) {
// we don't see references from function local inline assembly. To be
// conservative, we internalize symbols in llvm.compiler.used, but we
// keep llvm.compiler.used so that the symbol is not deleted by llvm.
- for (SmallPtrSet<GlobalValue *, 8>::iterator I = Used.begin(), E = Used.end();
- I != E; ++I) {
- GlobalValue *V = *I;
+ for (GlobalValue *V : Used) {
ExternalNames.insert(V->getName());
}
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 559ef0b..b91ebf2 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -286,7 +286,7 @@ private:
/// 6.4.Load: range metadata (as integer numbers)
/// On this stage its better to see the code, since its not more than 10-15
/// strings for particular instruction, and could change sometimes.
- int cmpOperation(const Instruction *L, const Instruction *R) const;
+ int cmpOperations(const Instruction *L, const Instruction *R) const;
/// Compare two GEPs for equivalent pointer arithmetic.
/// Parts to be compared for each comparison stage,
@@ -297,9 +297,9 @@ private:
/// 3. Pointer operand type (using cmpType method).
/// 4. Number of operands.
/// 5. Compare operands, using cmpValues method.
- int cmpGEP(const GEPOperator *GEPL, const GEPOperator *GEPR);
- int cmpGEP(const GetElementPtrInst *GEPL, const GetElementPtrInst *GEPR) {
- return cmpGEP(cast<GEPOperator>(GEPL), cast<GEPOperator>(GEPR));
+ int cmpGEPs(const GEPOperator *GEPL, const GEPOperator *GEPR);
+ int cmpGEPs(const GetElementPtrInst *GEPL, const GetElementPtrInst *GEPR) {
+ return cmpGEPs(cast<GEPOperator>(GEPL), cast<GEPOperator>(GEPR));
}
/// cmpType - compares two types,
@@ -342,12 +342,12 @@ private:
/// be checked with the same way. If we get Res != 0 on some stage, return it.
/// Otherwise return 0.
/// 6. For all other cases put llvm_unreachable.
- int cmpType(Type *TyL, Type *TyR) const;
+ int cmpTypes(Type *TyL, Type *TyR) const;
int cmpNumbers(uint64_t L, uint64_t R) const;
- int cmpAPInt(const APInt &L, const APInt &R) const;
- int cmpAPFloat(const APFloat &L, const APFloat &R) const;
+ int cmpAPInts(const APInt &L, const APInt &R) const;
+ int cmpAPFloats(const APFloat &L, const APFloat &R) const;
int cmpStrings(StringRef L, StringRef R) const;
int cmpAttrs(const AttributeSet L, const AttributeSet R) const;
@@ -392,15 +392,15 @@ private:
DenseMap<const Value*, int> sn_mapL, sn_mapR;
};
-class FunctionPtr {
+class FunctionNode {
AssertingVH<Function> F;
const DataLayout *DL;
public:
- FunctionPtr(Function *F, const DataLayout *DL) : F(F), DL(DL) {}
+ FunctionNode(Function *F, const DataLayout *DL) : F(F), DL(DL) {}
Function *getFunc() const { return F; }
void release() { F = 0; }
- bool operator<(const FunctionPtr &RHS) const {
+ bool operator<(const FunctionNode &RHS) const {
return (FunctionComparator(DL, F, RHS.getFunc()).compare()) == -1;
}
};
@@ -412,7 +412,7 @@ int FunctionComparator::cmpNumbers(uint64_t L, uint64_t R) const {
return 0;
}
-int FunctionComparator::cmpAPInt(const APInt &L, const APInt &R) const {
+int FunctionComparator::cmpAPInts(const APInt &L, const APInt &R) const {
if (int Res = cmpNumbers(L.getBitWidth(), R.getBitWidth()))
return Res;
if (L.ugt(R)) return 1;
@@ -420,11 +420,11 @@ int FunctionComparator::cmpAPInt(const APInt &L, const APInt &R) const {
return 0;
}
-int FunctionComparator::cmpAPFloat(const APFloat &L, const APFloat &R) const {
+int FunctionComparator::cmpAPFloats(const APFloat &L, const APFloat &R) const {
if (int Res = cmpNumbers((uint64_t)&L.getSemantics(),
(uint64_t)&R.getSemantics()))
return Res;
- return cmpAPInt(L.bitcastToAPInt(), R.bitcastToAPInt());
+ return cmpAPInts(L.bitcastToAPInt(), R.bitcastToAPInt());
}
int FunctionComparator::cmpStrings(StringRef L, StringRef R) const {
@@ -474,7 +474,7 @@ int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) {
// Check whether types are bitcastable. This part is just re-factored
// Type::canLosslesslyBitCastTo method, but instead of returning true/false,
// we also pack into result which type is "less" for us.
- int TypesRes = cmpType(TyL, TyR);
+ int TypesRes = cmpTypes(TyL, TyR);
if (TypesRes != 0) {
// Types are different, but check whether we can bitcast them.
if (!TyL->isFirstClassType()) {
@@ -541,12 +541,12 @@ int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) {
case Value::ConstantIntVal: {
const APInt &LInt = cast<ConstantInt>(L)->getValue();
const APInt &RInt = cast<ConstantInt>(R)->getValue();
- return cmpAPInt(LInt, RInt);
+ return cmpAPInts(LInt, RInt);
}
case Value::ConstantFPVal: {
const APFloat &LAPF = cast<ConstantFP>(L)->getValueAPF();
const APFloat &RAPF = cast<ConstantFP>(R)->getValueAPF();
- return cmpAPFloat(LAPF, RAPF);
+ return cmpAPFloats(LAPF, RAPF);
}
case Value::ConstantArrayVal: {
const ConstantArray *LA = cast<ConstantArray>(L);
@@ -615,7 +615,7 @@ int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) {
/// cmpType - compares two types,
/// defines total ordering among the types set.
/// See method declaration comments for more details.
-int FunctionComparator::cmpType(Type *TyL, Type *TyR) const {
+int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const {
PointerType *PTyL = dyn_cast<PointerType>(TyL);
PointerType *PTyR = dyn_cast<PointerType>(TyR);
@@ -665,8 +665,7 @@ int FunctionComparator::cmpType(Type *TyL, Type *TyR) const {
return cmpNumbers(STyL->isPacked(), STyR->isPacked());
for (unsigned i = 0, e = STyL->getNumElements(); i != e; ++i) {
- if (int Res = cmpType(STyL->getElementType(i),
- STyR->getElementType(i)))
+ if (int Res = cmpTypes(STyL->getElementType(i), STyR->getElementType(i)))
return Res;
}
return 0;
@@ -681,11 +680,11 @@ int FunctionComparator::cmpType(Type *TyL, Type *TyR) const {
if (FTyL->isVarArg() != FTyR->isVarArg())
return cmpNumbers(FTyL->isVarArg(), FTyR->isVarArg());
- if (int Res = cmpType(FTyL->getReturnType(), FTyR->getReturnType()))
+ if (int Res = cmpTypes(FTyL->getReturnType(), FTyR->getReturnType()))
return Res;
for (unsigned i = 0, e = FTyL->getNumParams(); i != e; ++i) {
- if (int Res = cmpType(FTyL->getParamType(i), FTyR->getParamType(i)))
+ if (int Res = cmpTypes(FTyL->getParamType(i), FTyR->getParamType(i)))
return Res;
}
return 0;
@@ -696,7 +695,7 @@ int FunctionComparator::cmpType(Type *TyL, Type *TyR) const {
ArrayType *ATyR = cast<ArrayType>(TyR);
if (ATyL->getNumElements() != ATyR->getNumElements())
return cmpNumbers(ATyL->getNumElements(), ATyR->getNumElements());
- return cmpType(ATyL->getElementType(), ATyR->getElementType());
+ return cmpTypes(ATyL->getElementType(), ATyR->getElementType());
}
}
}
@@ -705,8 +704,8 @@ int FunctionComparator::cmpType(Type *TyL, Type *TyR) const {
// and pointer-to-B are equivalent. This should be kept in sync with
// Instruction::isSameOperationAs.
// Read method declaration comments for more details.
-int FunctionComparator::cmpOperation(const Instruction *L,
- const Instruction *R) const {
+int FunctionComparator::cmpOperations(const Instruction *L,
+ const Instruction *R) const {
// Differences from Instruction::isSameOperationAs:
// * replace type comparison with calls to isEquivalentType.
// * we test for I->hasSameSubclassOptionalData (nuw/nsw/tail) at the top
@@ -717,7 +716,7 @@ int FunctionComparator::cmpOperation(const Instruction *L,
if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands()))
return Res;
- if (int Res = cmpType(L->getType(), R->getType()))
+ if (int Res = cmpTypes(L->getType(), R->getType()))
return Res;
if (int Res = cmpNumbers(L->getRawSubclassOptionalData(),
@@ -728,7 +727,7 @@ int FunctionComparator::cmpOperation(const Instruction *L,
// if all operands are the same type
for (unsigned i = 0, e = L->getNumOperands(); i != e; ++i) {
if (int Res =
- cmpType(L->getOperand(i)->getType(), R->getOperand(i)->getType()))
+ cmpTypes(L->getOperand(i)->getType(), R->getOperand(i)->getType()))
return Res;
}
@@ -766,13 +765,23 @@ int FunctionComparator::cmpOperation(const Instruction *L,
if (int Res = cmpNumbers(CI->getCallingConv(),
cast<CallInst>(R)->getCallingConv()))
return Res;
- return cmpAttrs(CI->getAttributes(), cast<CallInst>(R)->getAttributes());
+ if (int Res =
+ cmpAttrs(CI->getAttributes(), cast<CallInst>(R)->getAttributes()))
+ return Res;
+ return cmpNumbers(
+ (uint64_t)CI->getMetadata(LLVMContext::MD_range),
+ (uint64_t)cast<CallInst>(R)->getMetadata(LLVMContext::MD_range));
}
if (const InvokeInst *CI = dyn_cast<InvokeInst>(L)) {
if (int Res = cmpNumbers(CI->getCallingConv(),
cast<InvokeInst>(R)->getCallingConv()))
return Res;
- return cmpAttrs(CI->getAttributes(), cast<InvokeInst>(R)->getAttributes());
+ if (int Res =
+ cmpAttrs(CI->getAttributes(), cast<InvokeInst>(R)->getAttributes()))
+ return Res;
+ return cmpNumbers(
+ (uint64_t)CI->getMetadata(LLVMContext::MD_range),
+ (uint64_t)cast<InvokeInst>(R)->getMetadata(LLVMContext::MD_range));
}
if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(L)) {
ArrayRef<unsigned> LIndices = IVI->getIndices();
@@ -835,7 +844,7 @@ int FunctionComparator::cmpOperation(const Instruction *L,
// Determine whether two GEP operations perform the same underlying arithmetic.
// Read method declaration comments for more details.
-int FunctionComparator::cmpGEP(const GEPOperator *GEPL,
+int FunctionComparator::cmpGEPs(const GEPOperator *GEPL,
const GEPOperator *GEPR) {
unsigned int ASL = GEPL->getPointerAddressSpace();
@@ -851,7 +860,7 @@ int FunctionComparator::cmpGEP(const GEPOperator *GEPL,
APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0);
if (GEPL->accumulateConstantOffset(*DL, OffsetL) &&
GEPR->accumulateConstantOffset(*DL, OffsetR))
- return cmpAPInt(OffsetL, OffsetR);
+ return cmpAPInts(OffsetL, OffsetR);
}
if (int Res = cmpNumbers((uint64_t)GEPL->getPointerOperand()->getType(),
@@ -935,10 +944,10 @@ int FunctionComparator::compare(const BasicBlock *BBL, const BasicBlock *BBR) {
if (int Res =
cmpValues(GEPL->getPointerOperand(), GEPR->getPointerOperand()))
return Res;
- if (int Res = cmpGEP(GEPL, GEPR))
+ if (int Res = cmpGEPs(GEPL, GEPR))
return Res;
} else {
- if (int Res = cmpOperation(InstL, InstR))
+ if (int Res = cmpOperations(InstL, InstR))
return Res;
assert(InstL->getNumOperands() == InstR->getNumOperands());
@@ -950,7 +959,7 @@ int FunctionComparator::compare(const BasicBlock *BBL, const BasicBlock *BBR) {
if (int Res = cmpNumbers(OpL->getValueID(), OpR->getValueID()))
return Res;
// TODO: Already checked in cmpOperation
- if (int Res = cmpType(OpL->getType(), OpR->getType()))
+ if (int Res = cmpTypes(OpL->getType(), OpR->getType()))
return Res;
}
}
@@ -998,7 +1007,7 @@ int FunctionComparator::compare() {
if (int Res = cmpNumbers(FnL->getCallingConv(), FnR->getCallingConv()))
return Res;
- if (int Res = cmpType(FnL->getFunctionType(), FnR->getFunctionType()))
+ if (int Res = cmpTypes(FnL->getFunctionType(), FnR->getFunctionType()))
return Res;
assert(FnL->arg_size() == FnR->arg_size() &&
@@ -1040,7 +1049,7 @@ int FunctionComparator::compare() {
assert(TermL->getNumSuccessors() == TermR->getNumSuccessors());
for (unsigned i = 0, e = TermL->getNumSuccessors(); i != e; ++i) {
- if (!VisitedBBs.insert(TermL->getSuccessor(i)))
+ if (!VisitedBBs.insert(TermL->getSuccessor(i)).second)
continue;
FnLBBs.push_back(TermL->getSuccessor(i));
@@ -1068,7 +1077,7 @@ public:
bool runOnModule(Module &M) override;
private:
- typedef std::set<FunctionPtr> FnTreeType;
+ typedef std::set<FunctionNode> FnTreeType;
/// A work queue of functions that may have been modified and should be
/// analyzed again.
@@ -1291,11 +1300,11 @@ static Value *createCast(IRBuilder<false> &Builder, Value *V, Type *DestTy) {
Value *Result = UndefValue::get(DestTy);
for (unsigned int I = 0, E = SrcTy->getStructNumElements(); I < E; ++I) {
Value *Element = createCast(
- Builder, Builder.CreateExtractValue(V, ArrayRef<unsigned int>(I)),
+ Builder, Builder.CreateExtractValue(V, makeArrayRef(I)),
DestTy->getStructElementType(I));
Result =
- Builder.CreateInsertValue(Result, Element, ArrayRef<unsigned int>(I));
+ Builder.CreateInsertValue(Result, Element, makeArrayRef(I));
}
return Result;
}
@@ -1411,14 +1420,14 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
// that was already inserted.
bool MergeFunctions::insert(Function *NewFunction) {
std::pair<FnTreeType::iterator, bool> Result =
- FnTree.insert(FunctionPtr(NewFunction, DL));
+ FnTree.insert(FunctionNode(NewFunction, DL));
if (Result.second) {
DEBUG(dbgs() << "Inserting as unique: " << NewFunction->getName() << '\n');
return false;
}
- const FunctionPtr &OldF = *Result.first;
+ const FunctionNode &OldF = *Result.first;
// Don't merge tiny functions, since it can just end up making the function
// larger.
@@ -1448,7 +1457,7 @@ bool MergeFunctions::insert(Function *NewFunction) {
void MergeFunctions::remove(Function *F) {
// We need to make sure we remove F, not a function "equal" to F per the
// function equality comparator.
- FnTreeType::iterator found = FnTree.find(FunctionPtr(F, DL));
+ FnTreeType::iterator found = FnTree.find(FunctionNode(F, DL));
size_t Erased = 0;
if (found != FnTree.end() && found->getFunc() == F) {
Erased = 1;
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 46a3187..da85a91 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -17,11 +17,14 @@
#include "llvm-c/Transforms/PassManagerBuilder.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/Passes.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Verifier.h"
#include "llvm/PassManager.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Vectorize.h"
@@ -45,6 +48,10 @@ UseGVNAfterVectorization("use-gvn-after-vectorization",
cl::init(false), cl::Hidden,
cl::desc("Run GVN instead of Early CSE after vectorization passes"));
+static cl::opt<bool> ExtraVectorizerPasses(
+ "extra-vectorizer-passes", cl::init(false), cl::Hidden,
+ cl::desc("Run cleanup optimization passes after vectorization."));
+
static cl::opt<bool> UseNewSROA("use-new-sroa",
cl::init(true), cl::Hidden,
cl::desc("Enable the new, experimental SROA pass"));
@@ -57,6 +64,20 @@ static cl::opt<bool> RunLoadCombine("combine-loads", cl::init(false),
cl::Hidden,
cl::desc("Run the load combining pass"));
+static cl::opt<bool>
+RunSLPAfterLoopVectorization("run-slp-after-loop-vectorization",
+ cl::init(true), cl::Hidden,
+ cl::desc("Run the SLP vectorizer (and BB vectorizer) after the Loop "
+ "vectorizer instead of before"));
+
+static cl::opt<bool> UseCFLAA("use-cfl-aa",
+ cl::init(false), cl::Hidden,
+ cl::desc("Enable the new, experimental CFL alias analysis"));
+
+static cl::opt<bool>
+EnableMLSM("mlsm", cl::init(true), cl::Hidden,
+ cl::desc("Enable motion of merged load and store"));
+
PassManagerBuilder::PassManagerBuilder() {
OptLevel = 2;
SizeLevel = 0;
@@ -70,6 +91,11 @@ PassManagerBuilder::PassManagerBuilder() {
LoopVectorize = RunLoopVectorization;
RerollLoops = RunLoopRerolling;
LoadCombine = RunLoadCombine;
+ DisableGVNLoadPRE = false;
+ VerifyInput = false;
+ VerifyOutput = false;
+ StripDebug = false;
+ MergeFunctions = false;
}
PassManagerBuilder::~PassManagerBuilder() {
@@ -106,7 +132,10 @@ PassManagerBuilder::addInitialAliasAnalysisPasses(PassManagerBase &PM) const {
// Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
// BasicAliasAnalysis wins if they disagree. This is intended to help
// support "obvious" type-punning idioms.
+ if (UseCFLAA)
+ PM.add(createCFLAliasAnalysisPass());
PM.add(createTypeBasedAliasAnalysisPass());
+ PM.add(createScopedNoAliasAAPass());
PM.add(createBasicAliasAnalysisPass());
}
@@ -130,18 +159,22 @@ void PassManagerBuilder::populateFunctionPassManager(FunctionPassManager &FPM) {
}
void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
- // If all optimizations are disabled, just run the always-inline pass.
+ // If all optimizations are disabled, just run the always-inline pass and,
+ // if enabled, the function merging pass.
if (OptLevel == 0) {
if (Inliner) {
MPM.add(Inliner);
Inliner = nullptr;
}
- // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC
- // pass manager, but we don't want to add extensions into that pass manager.
- // To prevent this we must insert a no-op module pass to reset the pass
- // manager to get the same behavior as EP_OptimizerLast in non-O0 builds.
- if (!GlobalExtensions->empty() || !Extensions.empty())
+ // FIXME: The BarrierNoopPass is a HACK! The inliner pass above implicitly
+ // creates a CGSCC pass manager, but we don't want to add extensions into
+ // that pass manager. To prevent this we insert a no-op module pass to reset
+ // the pass manager to get the same behavior as EP_OptimizerLast in non-O0
+ // builds. The function merging pass is
+ if (MergeFunctions)
+ MPM.add(createMergeFunctionsPass());
+ else if (!GlobalExtensions->empty() || !Extensions.empty())
MPM.add(createBarrierNoopPass());
addExtensionsToPM(EP_EnabledOnOptLevel0, MPM);
@@ -207,8 +240,11 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
MPM.add(createSimpleLoopUnrollPass()); // Unroll small loops
addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
- if (OptLevel > 1)
- MPM.add(createGVNPass()); // Remove redundancies
+ if (OptLevel > 1) {
+ if (EnableMLSM)
+ MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds
+ MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
+ }
MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset
MPM.add(createSCCPPass()); // Constant prop with SCCP
@@ -224,21 +260,23 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
if (RerollLoops)
MPM.add(createLoopRerollPass());
- if (SLPVectorize)
- MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
-
- if (BBVectorize) {
- MPM.add(createBBVectorizePass());
- MPM.add(createInstructionCombiningPass());
- addExtensionsToPM(EP_Peephole, MPM);
- if (OptLevel > 1 && UseGVNAfterVectorization)
- MPM.add(createGVNPass()); // Remove redundancies
- else
- MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
-
- // BBVectorize may have significantly shortened a loop body; unroll again.
- if (!DisableUnrollLoops)
- MPM.add(createLoopUnrollPass());
+ if (!RunSLPAfterLoopVectorization) {
+ if (SLPVectorize)
+ MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
+
+ if (BBVectorize) {
+ MPM.add(createBBVectorizePass());
+ MPM.add(createInstructionCombiningPass());
+ addExtensionsToPM(EP_Peephole, MPM);
+ if (OptLevel > 1 && UseGVNAfterVectorization)
+ MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
+ else
+ MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
+
+ // BBVectorize may have significantly shortened a loop body; unroll again.
+ if (!DisableUnrollLoops)
+ MPM.add(createLoopUnrollPass());
+ }
}
if (LoadCombine)
@@ -253,6 +291,13 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
// pass manager that we are specifically trying to avoid. To prevent this
// we must insert a no-op module pass to reset the pass manager.
MPM.add(createBarrierNoopPass());
+
+ // Re-rotate loops in all our loop nests. These may have fallout out of
+ // rotated form due to GVN or other transformations, and the vectorizer relies
+ // on the rotated form.
+ if (ExtraVectorizerPasses)
+ MPM.add(createLoopRotatePass());
+
MPM.add(createLoopVectorizePass(DisableUnrollLoops, LoopVectorize));
// FIXME: Because of #pragma vectorize enable, the passes below are always
// inserted in the pipeline, even when the vectorizer doesn't run (ex. when
@@ -260,12 +305,56 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
// as function calls, so that we can only pass them when the vectorizer
// changed the code.
MPM.add(createInstructionCombiningPass());
+ if (OptLevel > 1 && ExtraVectorizerPasses) {
+ // At higher optimization levels, try to clean up any runtime overlap and
+ // alignment checks inserted by the vectorizer. We want to track correllated
+ // runtime checks for two inner loops in the same outer loop, fold any
+ // common computations, hoist loop-invariant aspects out of any outer loop,
+ // and unswitch the runtime checks if possible. Once hoisted, we may have
+ // dead (or speculatable) control flows or more combining opportunities.
+ MPM.add(createEarlyCSEPass());
+ MPM.add(createCorrelatedValuePropagationPass());
+ MPM.add(createInstructionCombiningPass());
+ MPM.add(createLICMPass());
+ MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3));
+ MPM.add(createCFGSimplificationPass());
+ MPM.add(createInstructionCombiningPass());
+ }
+
+ if (RunSLPAfterLoopVectorization) {
+ if (SLPVectorize) {
+ MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
+ if (OptLevel > 1 && ExtraVectorizerPasses) {
+ MPM.add(createEarlyCSEPass());
+ }
+ }
+
+ if (BBVectorize) {
+ MPM.add(createBBVectorizePass());
+ MPM.add(createInstructionCombiningPass());
+ addExtensionsToPM(EP_Peephole, MPM);
+ if (OptLevel > 1 && UseGVNAfterVectorization)
+ MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
+ else
+ MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
+
+ // BBVectorize may have significantly shortened a loop body; unroll again.
+ if (!DisableUnrollLoops)
+ MPM.add(createLoopUnrollPass());
+ }
+ }
+
addExtensionsToPM(EP_Peephole, MPM);
MPM.add(createCFGSimplificationPass());
+ MPM.add(createInstructionCombiningPass());
if (!DisableUnrollLoops)
MPM.add(createLoopUnrollPass()); // Unroll small loops
+ // After vectorization and unrolling, assume intrinsics may tell us more
+ // about pointer alignments.
+ MPM.add(createAlignmentFromAssumptionsPass());
+
if (!DisableUnitAtATime) {
// FIXME: We shouldn't bother with this anymore.
MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes
@@ -277,22 +366,17 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
MPM.add(createConstantMergePass()); // Merge dup global constants
}
}
+
+ if (MergeFunctions)
+ MPM.add(createMergeFunctionsPass());
+
addExtensionsToPM(EP_OptimizerLast, MPM);
}
-void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
- bool Internalize,
- bool RunInliner,
- bool DisableGVNLoadPRE) {
+void PassManagerBuilder::addLTOOptimizationPasses(PassManagerBase &PM) {
// Provide AliasAnalysis services for optimizations.
addInitialAliasAnalysisPasses(PM);
- // Now that composite has been compiled, scan through the module, looking
- // for a main function. If main is defined, mark all other functions
- // internal.
- if (Internalize)
- PM.add(createInternalizePass("main"));
-
// Propagate constants at call sites into the functions they call. This
// opens opportunities for globalopt (and inlining) by substituting function
// pointers passed as arguments to direct uses of functions.
@@ -316,8 +400,11 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
addExtensionsToPM(EP_Peephole, PM);
// Inline small functions
- if (RunInliner)
- PM.add(createFunctionInliningPass());
+ bool RunInliner = Inliner;
+ if (RunInliner) {
+ PM.add(Inliner);
+ Inliner = nullptr;
+ }
PM.add(createPruneEHPass()); // Remove dead EH info.
@@ -346,6 +433,8 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
PM.add(createGlobalsModRefPass()); // IP alias analysis.
PM.add(createLICMPass()); // Hoist loop invariants.
+ if (EnableMLSM)
+ PM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds.
PM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies.
PM.add(createMemCpyOptPass()); // Remove dead memcpys.
@@ -355,10 +444,16 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
// More loops are countable; try to optimize them.
PM.add(createIndVarSimplifyPass());
PM.add(createLoopDeletionPass());
- PM.add(createLoopVectorizePass(true, true));
+ PM.add(createLoopVectorizePass(true, LoopVectorize));
// More scalar chains could be vectorized due to more alias information
- PM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
+ if (RunSLPAfterLoopVectorization)
+ if (SLPVectorize)
+ PM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
+
+ // After vectorization, assume intrinsics may tell us more about pointer
+ // alignments.
+ PM.add(createAlignmentFromAssumptionsPass());
if (LoadCombine)
PM.add(createLoadCombinePass());
@@ -374,6 +469,39 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
// Now that we have optimized the program, discard unreachable functions.
PM.add(createGlobalDCEPass());
+
+ // FIXME: this is profitable (for compiler time) to do at -O0 too, but
+ // currently it damages debug info.
+ if (MergeFunctions)
+ PM.add(createMergeFunctionsPass());
+}
+
+void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
+ TargetMachine *TM) {
+ if (TM) {
+ PM.add(new DataLayoutPass());
+ TM->addAnalysisPasses(PM);
+ }
+
+ if (LibraryInfo)
+ PM.add(new TargetLibraryInfo(*LibraryInfo));
+
+ if (VerifyInput)
+ PM.add(createVerifierPass());
+
+ if (StripDebug)
+ PM.add(createStripSymbolsPass(true));
+
+ if (VerifyInput)
+ PM.add(createDebugInfoVerifierPass());
+
+ if (OptLevel != 0)
+ addLTOOptimizationPasses(PM);
+
+ if (VerifyOutput) {
+ PM.add(createVerifierPass());
+ PM.add(createDebugInfoVerifierPass());
+ }
}
inline PassManagerBuilder *unwrap(LLVMPassManagerBuilderRef P) {
@@ -457,5 +585,11 @@ void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB,
LLVMBool RunInliner) {
PassManagerBuilder *Builder = unwrap(PMB);
PassManagerBase *LPM = unwrap(PM);
- Builder->populateLTOPassManager(*LPM, Internalize != 0, RunInliner != 0);
+
+ // A small backwards compatibility hack. populateLTOPassManager used to take
+ // an RunInliner option.
+ if (RunInliner && !Builder->Inliner)
+ Builder->Inliner = createFunctionInliningPass();
+
+ Builder->populateLTOPassManager(*LPM);
}
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index 1abbccc..3412b9e 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -154,9 +154,8 @@ static void RemoveDeadConstant(Constant *C) {
C->destroyConstant();
// If the constant referenced anything, see if we can delete it as well.
- for (SmallPtrSet<Constant*, 4>::iterator OI = Operands.begin(),
- OE = Operands.end(); OI != OE; ++OI)
- RemoveDeadConstant(*OI);
+ for (Constant *O : Operands)
+ RemoveDeadConstant(O);
}
// Strip the symbol table of its names.
@@ -191,7 +190,7 @@ static void StripTypeNames(Module &M, bool PreserveDbgInfo) {
/// Find values that are marked as llvm.used.
static void findUsedValues(GlobalVariable *LLVMUsed,
- SmallPtrSet<const GlobalValue*, 8> &UsedValues) {
+ SmallPtrSetImpl<const GlobalValue*> &UsedValues) {
if (!LLVMUsed) return;
UsedValues.insert(LLVMUsed);
@@ -350,28 +349,12 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {
// subprogram list/global variable list with our new live subprogram/global
// variable list.
if (SubprogramChange) {
- // Make sure that 9 is still the index of the subprograms. This is to make
- // sure that an assert is hit if the location of the subprogram array
- // changes. This is just to make sure that this is updated if such an
- // event occurs.
- assert(DIC->getNumOperands() >= 10 &&
- SPs == DIC->getOperand(9) &&
- "DICompileUnits is expected to store Subprograms in operand "
- "9.");
- DIC->replaceOperandWith(9, MDNode::get(C, LiveSubprograms));
+ DIC.replaceSubprograms(DIArray(MDNode::get(C, LiveSubprograms)));
Changed = true;
}
if (GlobalVariableChange) {
- // Make sure that 10 is still the index of global variables. This is to
- // make sure that an assert is hit if the location of the subprogram array
- // changes. This is just to make sure that this index is updated if such
- // an event occurs.
- assert(DIC->getNumOperands() >= 11 &&
- GVs == DIC->getOperand(10) &&
- "DICompileUnits is expected to store Global Variables in operand "
- "10.");
- DIC->replaceOperandWith(10, MDNode::get(C, LiveGlobalVariables));
+ DIC.replaceGlobalVariables(DIArray(MDNode::get(C, LiveGlobalVariables)));
Changed = true;
}