diff options
author | Stephen Hines <srhines@google.com> | 2014-12-01 14:51:49 -0800 |
---|---|---|
committer | Stephen Hines <srhines@google.com> | 2014-12-02 16:08:10 -0800 |
commit | 37ed9c199ca639565f6ce88105f9e39e898d82d0 (patch) | |
tree | 8fb36d3910e3ee4c4e1b7422f4f017108efc52f5 /lib/Transforms/IPO | |
parent | d2327b22152ced7bc46dc629fc908959e8a52d03 (diff) | |
download | external_llvm-37ed9c199ca639565f6ce88105f9e39e898d82d0.zip external_llvm-37ed9c199ca639565f6ce88105f9e39e898d82d0.tar.gz external_llvm-37ed9c199ca639565f6ce88105f9e39e898d82d0.tar.bz2 |
Update aosp/master LLVM for rebase to r222494.
Change-Id: Ic787f5e0124df789bd26f3f24680f45e678eef2d
Diffstat (limited to 'lib/Transforms/IPO')
-rw-r--r-- | lib/Transforms/IPO/ArgumentPromotion.cpp | 129 | ||||
-rw-r--r-- | lib/Transforms/IPO/ConstantMerge.cpp | 2 | ||||
-rw-r--r-- | lib/Transforms/IPO/DeadArgumentElimination.cpp | 28 | ||||
-rw-r--r-- | lib/Transforms/IPO/ExtractGV.cpp | 10 | ||||
-rw-r--r-- | lib/Transforms/IPO/FunctionAttrs.cpp | 30 | ||||
-rw-r--r-- | lib/Transforms/IPO/GlobalDCE.cpp | 51 | ||||
-rw-r--r-- | lib/Transforms/IPO/GlobalOpt.cpp | 109 | ||||
-rw-r--r-- | lib/Transforms/IPO/InlineAlways.cpp | 4 | ||||
-rw-r--r-- | lib/Transforms/IPO/InlineSimple.cpp | 4 | ||||
-rw-r--r-- | lib/Transforms/IPO/Inliner.cpp | 20 | ||||
-rw-r--r-- | lib/Transforms/IPO/Internalize.cpp | 4 | ||||
-rw-r--r-- | lib/Transforms/IPO/MergeFunctions.cpp | 91 | ||||
-rw-r--r-- | lib/Transforms/IPO/PassManagerBuilder.cpp | 210 | ||||
-rw-r--r-- | lib/Transforms/IPO/StripSymbols.cpp | 27 |
14 files changed, 497 insertions, 222 deletions
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index f9de54a..c4706e8 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -78,11 +78,15 @@ namespace { const DataLayout *DL; private: + bool isDenselyPacked(Type *type); + bool canPaddingBeAccessed(Argument *Arg); CallGraphNode *PromoteArguments(CallGraphNode *CGN); bool isSafeToPromoteArgument(Argument *Arg, bool isByVal) const; CallGraphNode *DoPromotion(Function *F, - SmallPtrSet<Argument*, 8> &ArgsToPromote, - SmallPtrSet<Argument*, 8> &ByValArgsToTransform); + SmallPtrSetImpl<Argument*> &ArgsToPromote, + SmallPtrSetImpl<Argument*> &ByValArgsToTransform); + + using llvm::Pass::doInitialization; bool doInitialization(CallGraph &CG) override; /// The maximum number of elements to expand, or 0 for unlimited. unsigned maxElements; @@ -123,6 +127,78 @@ bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) { return Changed; } +/// \brief Checks if a type could have padding bytes. +bool ArgPromotion::isDenselyPacked(Type *type) { + + // There is no size information, so be conservative. + if (!type->isSized()) + return false; + + // If the alloc size is not equal to the storage size, then there are padding + // bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128. + if (!DL || DL->getTypeSizeInBits(type) != DL->getTypeAllocSizeInBits(type)) + return false; + + if (!isa<CompositeType>(type)) + return true; + + // For homogenous sequential types, check for padding within members. + if (SequentialType *seqTy = dyn_cast<SequentialType>(type)) + return isa<PointerType>(seqTy) || isDenselyPacked(seqTy->getElementType()); + + // Check for padding within and between elements of a struct. + StructType *StructTy = cast<StructType>(type); + const StructLayout *Layout = DL->getStructLayout(StructTy); + uint64_t StartPos = 0; + for (unsigned i = 0, E = StructTy->getNumElements(); i < E; ++i) { + Type *ElTy = StructTy->getElementType(i); + if (!isDenselyPacked(ElTy)) + return false; + if (StartPos != Layout->getElementOffsetInBits(i)) + return false; + StartPos += DL->getTypeAllocSizeInBits(ElTy); + } + + return true; +} + +/// \brief Checks if the padding bytes of an argument could be accessed. +bool ArgPromotion::canPaddingBeAccessed(Argument *arg) { + + assert(arg->hasByValAttr()); + + // Track all the pointers to the argument to make sure they are not captured. + SmallPtrSet<Value *, 16> PtrValues; + PtrValues.insert(arg); + + // Track all of the stores. + SmallVector<StoreInst *, 16> Stores; + + // Scan through the uses recursively to make sure the pointer is always used + // sanely. + SmallVector<Value *, 16> WorkList; + WorkList.insert(WorkList.end(), arg->user_begin(), arg->user_end()); + while (!WorkList.empty()) { + Value *V = WorkList.back(); + WorkList.pop_back(); + if (isa<GetElementPtrInst>(V) || isa<PHINode>(V)) { + if (PtrValues.insert(V).second) + WorkList.insert(WorkList.end(), V->user_begin(), V->user_end()); + } else if (StoreInst *Store = dyn_cast<StoreInst>(V)) { + Stores.push_back(Store); + } else if (!isa<LoadInst>(V)) { + return true; + } + } + +// Check to make sure the pointers aren't captured + for (StoreInst *Store : Stores) + if (PtrValues.count(Store->getValueOperand())) + return true; + + return false; +} + /// PromoteArguments - This method checks the specified function to see if there /// are any promotable arguments and if it is safe to promote the function (for /// example, all callers are direct). If safe to promote some arguments, it @@ -154,6 +230,13 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { isSelfRecursive = true; } + // Don't promote arguments for variadic functions. Adding, removing, or + // changing non-pack parameters can change the classification of pack + // parameters. Frontends encode that classification at the call site in the + // IR, while in the callee the classification is determined dynamically based + // on the number of registers consumed so far. + if (F->isVarArg()) return nullptr; + // Check to see which arguments are promotable. If an argument is promotable, // add it to ArgsToPromote. SmallPtrSet<Argument*, 8> ArgsToPromote; @@ -163,9 +246,13 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType(); // If this is a byval argument, and if the aggregate type is small, just - // pass the elements, which is always safe. This does not apply to - // inalloca. - if (PtrArg->hasByValAttr()) { + // pass the elements, which is always safe, if the passed value is densely + // packed or if we can prove the padding bytes are never accessed. This does + // not apply to inalloca. + bool isSafeToPromote = + PtrArg->hasByValAttr() && + (isDenselyPacked(AgTy) || !canPaddingBeAccessed(PtrArg)); + if (isSafeToPromote) { if (StructType *STy = dyn_cast<StructType>(AgTy)) { if (maxElements > 0 && STy->getNumElements() > maxElements) { DEBUG(dbgs() << "argpromotion disable promoting argument '" @@ -443,7 +530,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, // of elements of the aggregate. return false; } - ToPromote.insert(Operands); + ToPromote.insert(std::move(Operands)); } } @@ -475,10 +562,8 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, // loading block. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { BasicBlock *P = *PI; - for (idf_ext_iterator<BasicBlock*, SmallPtrSet<BasicBlock*, 16> > - I = idf_ext_begin(P, TranspBlocks), - E = idf_ext_end(P, TranspBlocks); I != E; ++I) - if (AA.canBasicBlockModify(**I, Loc)) + for (BasicBlock *TranspBB : inverse_depth_first_ext(P, TranspBlocks)) + if (AA.canBasicBlockModify(*TranspBB, Loc)) return false; } } @@ -493,8 +578,8 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, /// arguments, and returns the new function. At this point, we know that it's /// safe to do so. CallGraphNode *ArgPromotion::DoPromotion(Function *F, - SmallPtrSet<Argument*, 8> &ArgsToPromote, - SmallPtrSet<Argument*, 8> &ByValArgsToTransform) { + SmallPtrSetImpl<Argument*> &ArgsToPromote, + SmallPtrSetImpl<Argument*> &ByValArgsToTransform) { // Start by computing a new prototype for the function, which is the same as // the old function, but has modified arguments. @@ -615,9 +700,15 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // Patch the pointer to LLVM function in debug info descriptor. auto DI = FunctionDIs.find(F); - if (DI != FunctionDIs.end()) - DI->second.replaceFunction(NF); - + if (DI != FunctionDIs.end()) { + DISubprogram SP = DI->second; + SP.replaceFunction(NF); + // Ensure the map is updated so it can be reused on subsequent argument + // promotions of the same function. + FunctionDIs.erase(DI); + FunctionDIs[NF] = SP; + } + DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n" << "From: " << *F); @@ -716,9 +807,11 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // of the previous load. LoadInst *newLoad = new LoadInst(V, V->getName()+".val", Call); newLoad->setAlignment(OrigLoad->getAlignment()); - // Transfer the TBAA info too. - newLoad->setMetadata(LLVMContext::MD_tbaa, - OrigLoad->getMetadata(LLVMContext::MD_tbaa)); + // Transfer the AA info too. + AAMDNodes AAInfo; + OrigLoad->getAAMetadata(AAInfo); + newLoad->setAAMetadata(AAInfo); + Args.push_back(newLoad); AA.copyValue(OrigLoad, Args.back()); } diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp index 23be081..0b6ade9 100644 --- a/lib/Transforms/IPO/ConstantMerge.cpp +++ b/lib/Transforms/IPO/ConstantMerge.cpp @@ -66,7 +66,7 @@ ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); } /// Find values that are marked as llvm.used. static void FindUsedValues(GlobalVariable *LLVMUsed, - SmallPtrSet<const GlobalValue*, 8> &UsedValues) { + SmallPtrSetImpl<const GlobalValue*> &UsedValues) { if (!LLVMUsed) return; ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer()); diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index ac3853d..4045c09 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -199,10 +199,15 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { return false; // Okay, we know we can transform this function if safe. Scan its body - // looking for calls to llvm.vastart. + // looking for calls marked musttail or calls to llvm.vastart. for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + CallInst *CI = dyn_cast<CallInst>(I); + if (!CI) + continue; + if (CI->isMustTailCall()) + return false; + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) { if (II->getIntrinsicID() == Intrinsic::vastart) return false; } @@ -297,8 +302,14 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { // Patch the pointer to LLVM function in debug info descriptor. auto DI = FunctionDIs.find(&Fn); - if (DI != FunctionDIs.end()) - DI->second.replaceFunction(NF); + if (DI != FunctionDIs.end()) { + DISubprogram SP = DI->second; + SP.replaceFunction(NF); + // Ensure the map is updated so it can be reused on non-varargs argument + // eliminations of the same function. + FunctionDIs.erase(DI); + FunctionDIs[NF] = SP; + } // Fix up any BlockAddresses that refer to the function. Fn.replaceAllUsesWith(ConstantExpr::getBitCast(NF, Fn.getType())); @@ -1088,8 +1099,8 @@ bool DAE::runOnModule(Module &M) { // determine that dead arguments passed into recursive functions are dead). // DEBUG(dbgs() << "DAE - Determining liveness\n"); - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) - SurveyFunction(*I); + for (auto &F : M) + SurveyFunction(F); // Now, remove all dead arguments and return values from each function in // turn. @@ -1102,11 +1113,8 @@ bool DAE::runOnModule(Module &M) { // Finally, look for any unused parameters in functions with non-local // linkage and replace the passed in parameters with undef. - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { - Function& F = *I; - + for (auto &F : M) Changed |= RemoveDeadArgumentsFromCallers(F); - } return Changed; } diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp index 40ec9fa..2f8c7d9 100644 --- a/lib/Transforms/IPO/ExtractGV.cpp +++ b/lib/Transforms/IPO/ExtractGV.cpp @@ -91,7 +91,7 @@ namespace { continue; } - makeVisible(*I, Delete); + makeVisible(*I, Delete); if (Delete) I->setInitializer(nullptr); @@ -106,7 +106,7 @@ namespace { continue; } - makeVisible(*I, Delete); + makeVisible(*I, Delete); if (Delete) I->deleteBody(); @@ -118,8 +118,8 @@ namespace { Module::alias_iterator CurI = I; ++I; - bool Delete = deleteStuff == (bool)Named.count(CurI); - makeVisible(*CurI, Delete); + bool Delete = deleteStuff == (bool)Named.count(CurI); + makeVisible(*CurI, Delete); if (Delete) { Type *Ty = CurI->getType()->getElementType(); @@ -148,7 +148,7 @@ namespace { char GVExtractorPass::ID = 0; } -ModulePass *llvm::createGVExtractionPass(std::vector<GlobalValue*>& GVs, +ModulePass *llvm::createGVExtractionPass(std::vector<GlobalValue *> &GVs, bool deleteFn) { return new GVExtractorPass(GVs, deleteFn); } diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index 8174df9..823ae53 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -161,8 +161,9 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) { for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { Function *F = (*I)->getFunction(); - if (!F) - // External node - may write memory. Just give up. + if (!F || F->hasFnAttribute(Attribute::OptimizeNone)) + // External node or node we don't want to optimize - assume it may write + // memory and give up. return false; AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(F); @@ -204,9 +205,11 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) { CI != CE; ++CI) { Value *Arg = *CI; if (Arg->getType()->isPointerTy()) { + AAMDNodes AAInfo; + I->getAAMetadata(AAInfo); + AliasAnalysis::Location Loc(Arg, - AliasAnalysis::UnknownSize, - I->getMetadata(LLVMContext::MD_tbaa)); + AliasAnalysis::UnknownSize, AAInfo); if (!AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) { if (MRB & AliasAnalysis::Mod) // Writes non-local memory. Give up. @@ -443,7 +446,7 @@ determinePointerReadAttrs(Argument *A, case Instruction::AddrSpaceCast: // The original value is not read/written via this if the new value isn't. for (Use &UU : I->uses()) - if (Visited.insert(&UU)) + if (Visited.insert(&UU).second) Worklist.push_back(&UU); break; @@ -457,7 +460,7 @@ determinePointerReadAttrs(Argument *A, auto AddUsersToWorklistIfCapturing = [&] { if (Captures) for (Use &UU : I->uses()) - if (Visited.insert(&UU)) + if (Visited.insert(&UU).second) Worklist.push_back(&UU); }; @@ -525,7 +528,8 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) { // looking up whether a given CallGraphNode is in this SCC. for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { Function *F = (*I)->getFunction(); - if (F && !F->isDeclaration() && !F->mayBeOverridden()) + if (F && !F->isDeclaration() && !F->mayBeOverridden() && + !F->hasFnAttribute(Attribute::OptimizeNone)) SCCNodes.insert(F); } @@ -539,8 +543,9 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) { for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { Function *F = (*I)->getFunction(); - if (!F) - // External node - only a problem for arguments that we pass to it. + if (!F || F->hasFnAttribute(Attribute::OptimizeNone)) + // External node or function we're trying not to optimize - only a problem + // for arguments that we pass to it. continue; // Definitions with weak linkage may be overridden at linktime with @@ -792,8 +797,8 @@ bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) { for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { Function *F = (*I)->getFunction(); - if (!F) - // External node - skip it; + if (!F || F->hasFnAttribute(Attribute::OptimizeNone)) + // External node or node we don't want to optimize - skip it; return false; // Already noalias. @@ -832,6 +837,9 @@ bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) { /// given function and set any applicable attributes. Returns true /// if any attributes were set and false otherwise. bool FunctionAttrs::inferPrototypeAttributes(Function &F) { + if (F.hasFnAttribute(Attribute::OptimizeNone)) + return false; + FunctionType *FTy = F.getFunctionType(); LibFunc::Func TheLibFunc; if (!(TLI->getLibFunc(F.getName(), TheLibFunc) && TLI->has(TheLibFunc))) diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp index 7e7a4c0..705e929 100644 --- a/lib/Transforms/IPO/GlobalDCE.cpp +++ b/lib/Transforms/IPO/GlobalDCE.cpp @@ -22,6 +22,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/Transforms/Utils/CtorUtils.h" +#include "llvm/Transforms/Utils/GlobalStatus.h" #include "llvm/Pass.h" using namespace llvm; @@ -77,9 +78,6 @@ bool GlobalDCE::runOnModule(Module &M) { // Remove empty functions from the global ctors list. Changed |= optimizeGlobalCtorsList(M, isEmptyFunction); - typedef std::multimap<const Comdat *, GlobalValue *> ComdatGVPairsTy; - ComdatGVPairsTy ComdatGVPairs; - // Loop over the module, adding globals which are obviously necessary. for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { Changed |= RemoveUnusedGlobalValue(*I); @@ -87,8 +85,6 @@ bool GlobalDCE::runOnModule(Module &M) { if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage()) { if (!I->isDiscardableIfUnused()) GlobalIsNeeded(I); - else if (const Comdat *C = I->getComdat()) - ComdatGVPairs.insert(std::make_pair(C, I)); } } @@ -100,8 +96,6 @@ bool GlobalDCE::runOnModule(Module &M) { if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage()) { if (!I->isDiscardableIfUnused()) GlobalIsNeeded(I); - else if (const Comdat *C = I->getComdat()) - ComdatGVPairs.insert(std::make_pair(C, I)); } } @@ -111,24 +105,7 @@ bool GlobalDCE::runOnModule(Module &M) { // Externally visible aliases are needed. if (!I->isDiscardableIfUnused()) { GlobalIsNeeded(I); - } else if (const Comdat *C = I->getComdat()) { - ComdatGVPairs.insert(std::make_pair(C, I)); - } - } - - for (ComdatGVPairsTy::iterator I = ComdatGVPairs.begin(), - E = ComdatGVPairs.end(); - I != E;) { - ComdatGVPairsTy::iterator UB = ComdatGVPairs.upper_bound(I->first); - bool CanDiscard = std::all_of(I, UB, [](ComdatGVPairsTy::value_type Pair) { - return Pair.second->isDiscardableIfUnused(); - }); - if (!CanDiscard) { - std::for_each(I, UB, [this](ComdatGVPairsTy::value_type Pair) { - GlobalIsNeeded(Pair.second); - }); } - I = UB; } // Now that all globals which are needed are in the AliveGlobals set, we loop @@ -141,7 +118,12 @@ bool GlobalDCE::runOnModule(Module &M) { I != E; ++I) if (!AliveGlobals.count(I)) { DeadGlobalVars.push_back(I); // Keep track of dead globals - I->setInitializer(nullptr); + if (I->hasInitializer()) { + Constant *Init = I->getInitializer(); + I->setInitializer(nullptr); + if (isSafeToDestroyConstant(Init)) + Init->destroyConstant(); + } } // The second pass drops the bodies of functions which are dead... @@ -203,9 +185,22 @@ bool GlobalDCE::runOnModule(Module &M) { /// recursively mark anything that it uses as also needed. void GlobalDCE::GlobalIsNeeded(GlobalValue *G) { // If the global is already in the set, no need to reprocess it. - if (!AliveGlobals.insert(G)) + if (!AliveGlobals.insert(G).second) return; - + + Module *M = G->getParent(); + if (Comdat *C = G->getComdat()) { + for (Function &F : *M) + if (F.getComdat() == C) + GlobalIsNeeded(&F); + for (GlobalVariable &GV : M->globals()) + if (GV.getComdat() == C) + GlobalIsNeeded(&GV); + for (GlobalAlias &GA : M->aliases()) + if (GA.getComdat() == C) + GlobalIsNeeded(&GA); + } + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(G)) { // If this is a global variable, we must make sure to add any global values // referenced by the initializer to the alive set. @@ -243,7 +238,7 @@ void GlobalDCE::MarkUsedGlobalsAsNeeded(Constant *C) { for (User::op_iterator I = C->op_begin(), E = C->op_end(); I != E; ++I) { // If we've already processed this constant there's no need to do it again. Constant *Op = dyn_cast<Constant>(*I); - if (Op && SeenConstants.insert(Op)) + if (Op && SeenConstants.insert(Op).second) MarkUsedGlobalsAsNeeded(Op); } } diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index c1d0d3b..6e0ae83 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -88,6 +88,7 @@ namespace { const DataLayout *DL; TargetLibraryInfo *TLI; + SmallSet<const Comdat *, 8> NotDiscardableComdats; }; } @@ -612,7 +613,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { /// value will trap if the value is dynamically null. PHIs keeps track of any /// phi nodes we've seen to avoid reprocessing them. static bool AllUsesOfValueWillTrapIfNull(const Value *V, - SmallPtrSet<const PHINode*, 8> &PHIs) { + SmallPtrSetImpl<const PHINode*> &PHIs) { for (const User *U : V->users()) if (isa<LoadInst>(U)) { // Will trap. @@ -638,7 +639,7 @@ static bool AllUsesOfValueWillTrapIfNull(const Value *V, } else if (const PHINode *PN = dyn_cast<PHINode>(U)) { // If we've already seen this phi node, ignore it, it has already been // checked. - if (PHIs.insert(PN) && !AllUsesOfValueWillTrapIfNull(PN, PHIs)) + if (PHIs.insert(PN).second && !AllUsesOfValueWillTrapIfNull(PN, PHIs)) return false; } else if (isa<ICmpInst>(U) && isa<ConstantPointerNull>(U->getOperand(1))) { @@ -957,7 +958,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, /// it is to the specified global. static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V, const GlobalVariable *GV, - SmallPtrSet<const PHINode*, 8> &PHIs) { + SmallPtrSetImpl<const PHINode*> &PHIs) { for (const User *U : V->users()) { const Instruction *Inst = cast<Instruction>(U); @@ -981,7 +982,7 @@ static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V, if (const PHINode *PN = dyn_cast<PHINode>(Inst)) { // PHIs are ok if all uses are ok. Don't infinitely recurse through PHI // cycles. - if (PHIs.insert(PN)) + if (PHIs.insert(PN).second) if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(PN, GV, PHIs)) return false; continue; @@ -1047,8 +1048,8 @@ static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc, /// of a load) are simple enough to perform heap SRA on. This permits GEP's /// that index through the array and struct field, icmps of null, and PHIs. static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V, - SmallPtrSet<const PHINode*, 32> &LoadUsingPHIs, - SmallPtrSet<const PHINode*, 32> &LoadUsingPHIsPerLoad) { + SmallPtrSetImpl<const PHINode*> &LoadUsingPHIs, + SmallPtrSetImpl<const PHINode*> &LoadUsingPHIsPerLoad) { // We permit two users of the load: setcc comparing against the null // pointer, and a getelementptr of a specific form. for (const User *U : V->users()) { @@ -1072,11 +1073,11 @@ static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V, } if (const PHINode *PN = dyn_cast<PHINode>(UI)) { - if (!LoadUsingPHIsPerLoad.insert(PN)) + if (!LoadUsingPHIsPerLoad.insert(PN).second) // This means some phi nodes are dependent on each other. // Avoid infinite looping! return false; - if (!LoadUsingPHIs.insert(PN)) + if (!LoadUsingPHIs.insert(PN).second) // If we have already analyzed this PHI, then it is safe. continue; @@ -1115,9 +1116,7 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV, // that all inputs the to the PHI nodes are in the same equivalence sets. // Check to verify that all operands of the PHIs are either PHIS that can be // transformed, loads from GV, or MI itself. - for (SmallPtrSet<const PHINode*, 32>::const_iterator I = LoadUsingPHIs.begin() - , E = LoadUsingPHIs.end(); I != E; ++I) { - const PHINode *PN = *I; + for (const PHINode *PN : LoadUsingPHIs) { for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) { Value *InVal = PN->getIncomingValue(op); @@ -1910,8 +1909,11 @@ bool GlobalOpt::OptimizeFunctions(Module &M) { // Functions without names cannot be referenced outside this module. if (!F->hasName() && !F->isDeclaration() && !F->hasLocalLinkage()) F->setLinkage(GlobalValue::InternalLinkage); + + const Comdat *C = F->getComdat(); + bool inComdat = C && NotDiscardableComdats.count(C); F->removeDeadConstantUsers(); - if (F->isDefTriviallyDead()) { + if ((!inComdat || F->hasLocalLinkage()) && F->isDefTriviallyDead()) { F->eraseFromParent(); Changed = true; ++NumFnDeleted; @@ -1943,12 +1945,6 @@ bool GlobalOpt::OptimizeFunctions(Module &M) { bool GlobalOpt::OptimizeGlobalVars(Module &M) { bool Changed = false; - SmallSet<const Comdat *, 8> NotDiscardableComdats; - for (const GlobalVariable &GV : M.globals()) - if (const Comdat *C = GV.getComdat()) - if (!GV.isDiscardableIfUnused()) - NotDiscardableComdats.insert(C); - for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); GVI != E; ) { GlobalVariable *GV = GVI++; @@ -1965,7 +1961,7 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) { if (GV->isDiscardableIfUnused()) { if (const Comdat *C = GV->getComdat()) - if (NotDiscardableComdats.count(C)) + if (NotDiscardableComdats.count(C) && !GV->hasLocalLinkage()) continue; Changed |= ProcessGlobal(GV, GVI); } @@ -1975,7 +1971,7 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) { static inline bool isSimpleEnoughValueToCommit(Constant *C, - SmallPtrSet<Constant*, 8> &SimpleConstants, + SmallPtrSetImpl<Constant*> &SimpleConstants, const DataLayout *DL); @@ -1988,7 +1984,7 @@ isSimpleEnoughValueToCommit(Constant *C, /// in SimpleConstants to avoid having to rescan the same constants all the /// time. static bool isSimpleEnoughValueToCommitHelper(Constant *C, - SmallPtrSet<Constant*, 8> &SimpleConstants, + SmallPtrSetImpl<Constant*> &SimpleConstants, const DataLayout *DL) { // Simple global addresses are supported, do not allow dllimport or // thread-local globals. @@ -2046,10 +2042,11 @@ static bool isSimpleEnoughValueToCommitHelper(Constant *C, static inline bool isSimpleEnoughValueToCommit(Constant *C, - SmallPtrSet<Constant*, 8> &SimpleConstants, + SmallPtrSetImpl<Constant*> &SimpleConstants, const DataLayout *DL) { // If we already checked this constant, we win. - if (!SimpleConstants.insert(C)) return true; + if (!SimpleConstants.insert(C).second) + return true; // Check the constant. return isSimpleEnoughValueToCommitHelper(C, SimpleConstants, DL); } @@ -2217,7 +2214,7 @@ public: return MutatedMemory; } - const SmallPtrSet<GlobalVariable*, 8> &getInvariants() const { + const SmallPtrSetImpl<GlobalVariable*> &getInvariants() const { return Invariants; } @@ -2394,6 +2391,17 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, getVal(SI->getOperand(2))); DEBUG(dbgs() << "Found a Select! Simplifying: " << *InstResult << "\n"); + } else if (auto *EVI = dyn_cast<ExtractValueInst>(CurInst)) { + InstResult = ConstantExpr::getExtractValue( + getVal(EVI->getAggregateOperand()), EVI->getIndices()); + DEBUG(dbgs() << "Found an ExtractValueInst! Simplifying: " << *InstResult + << "\n"); + } else if (auto *IVI = dyn_cast<InsertValueInst>(CurInst)) { + InstResult = ConstantExpr::getInsertValue( + getVal(IVI->getAggregateOperand()), + getVal(IVI->getInsertedValueOperand()), IVI->getIndices()); + DEBUG(dbgs() << "Found an InsertValueInst! Simplifying: " << *InstResult + << "\n"); } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) { Constant *P = getVal(GEP->getOperand(0)); SmallVector<Constant*, 8> GEPOps; @@ -2663,7 +2671,7 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal, // Okay, we succeeded in evaluating this control flow. See if we have // executed the new block before. If so, we have a looping function, // which we cannot evaluate in reasonable time. - if (!ExecutedBlocks.insert(NextBB)) + if (!ExecutedBlocks.insert(NextBB).second) return false; // looped! // Okay, we have never been in this block before. Check to see if there @@ -2700,10 +2708,8 @@ static bool EvaluateStaticConstructor(Function *F, const DataLayout *DL, Eval.getMutatedMemory().begin(), E = Eval.getMutatedMemory().end(); I != E; ++I) CommitValueTo(I->second, I->first); - for (SmallPtrSet<GlobalVariable*, 8>::const_iterator I = - Eval.getInvariants().begin(), E = Eval.getInvariants().end(); - I != E; ++I) - (*I)->setConstant(true); + for (GlobalVariable *GV : Eval.getInvariants()) + GV->setConstant(true); } return EvalSuccess; @@ -2714,7 +2720,7 @@ static int compareNames(Constant *const *A, Constant *const *B) { } static void setUsedInitializer(GlobalVariable &V, - SmallPtrSet<GlobalValue *, 8> Init) { + const SmallPtrSet<GlobalValue *, 8> &Init) { if (Init.empty()) { V.eraseFromParent(); return; @@ -2724,10 +2730,9 @@ static void setUsedInitializer(GlobalVariable &V, PointerType *Int8PtrTy = Type::getInt8PtrTy(V.getContext(), 0); SmallVector<llvm::Constant *, 8> UsedArray; - for (SmallPtrSet<GlobalValue *, 8>::iterator I = Init.begin(), E = Init.end(); - I != E; ++I) { + for (GlobalValue *GV : Init) { Constant *Cast - = ConstantExpr::getPointerBitCastOrAddrSpaceCast(*I, Int8PtrTy); + = ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, Int8PtrTy); UsedArray.push_back(Cast); } // Sort to get deterministic order. @@ -2758,18 +2763,27 @@ public: CompilerUsedV = collectUsedGlobalVariables(M, CompilerUsed, true); } typedef SmallPtrSet<GlobalValue *, 8>::iterator iterator; + typedef iterator_range<iterator> used_iterator_range; iterator usedBegin() { return Used.begin(); } iterator usedEnd() { return Used.end(); } + used_iterator_range used() { + return used_iterator_range(usedBegin(), usedEnd()); + } iterator compilerUsedBegin() { return CompilerUsed.begin(); } iterator compilerUsedEnd() { return CompilerUsed.end(); } + used_iterator_range compilerUsed() { + return used_iterator_range(compilerUsedBegin(), compilerUsedEnd()); + } bool usedCount(GlobalValue *GV) const { return Used.count(GV); } bool compilerUsedCount(GlobalValue *GV) const { return CompilerUsed.count(GV); } bool usedErase(GlobalValue *GV) { return Used.erase(GV); } bool compilerUsedErase(GlobalValue *GV) { return CompilerUsed.erase(GV); } - bool usedInsert(GlobalValue *GV) { return Used.insert(GV); } - bool compilerUsedInsert(GlobalValue *GV) { return CompilerUsed.insert(GV); } + bool usedInsert(GlobalValue *GV) { return Used.insert(GV).second; } + bool compilerUsedInsert(GlobalValue *GV) { + return CompilerUsed.insert(GV).second; + } void syncVariablesAndSets() { if (UsedV) @@ -2814,7 +2828,8 @@ static bool mayHaveOtherReferences(GlobalAlias &GA, const LLVMUsed &U) { return U.usedCount(&GA) || U.compilerUsedCount(&GA); } -static bool hasUsesToReplace(GlobalAlias &GA, LLVMUsed &U, bool &RenameTarget) { +static bool hasUsesToReplace(GlobalAlias &GA, const LLVMUsed &U, + bool &RenameTarget) { RenameTarget = false; bool Ret = false; if (hasUseOtherThanLLVMUsed(GA, U)) @@ -2849,10 +2864,8 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) { bool Changed = false; LLVMUsed Used(M); - for (SmallPtrSet<GlobalValue *, 8>::iterator I = Used.usedBegin(), - E = Used.usedEnd(); - I != E; ++I) - Used.compilerUsedErase(*I); + for (GlobalValue *GV : Used.used()) + Used.compilerUsedErase(GV); for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); I != E;) { @@ -2963,7 +2976,7 @@ static bool cxxDtorIsEmpty(const Function &Fn, SmallPtrSet<const Function *, 8> NewCalledFunctions(CalledFunctions); // Don't treat recursive functions as empty. - if (!NewCalledFunctions.insert(CalledFn)) + if (!NewCalledFunctions.insert(CalledFn).second) return false; if (!cxxDtorIsEmpty(*CalledFn, NewCalledFunctions)) @@ -3035,6 +3048,20 @@ bool GlobalOpt::runOnModule(Module &M) { while (LocalChange) { LocalChange = false; + NotDiscardableComdats.clear(); + for (const GlobalVariable &GV : M.globals()) + if (const Comdat *C = GV.getComdat()) + if (!GV.isDiscardableIfUnused() || !GV.use_empty()) + NotDiscardableComdats.insert(C); + for (Function &F : M) + if (const Comdat *C = F.getComdat()) + if (!F.isDefTriviallyDead()) + NotDiscardableComdats.insert(C); + for (GlobalAlias &GA : M.aliases()) + if (const Comdat *C = GA.getComdat()) + if (!GA.isDiscardableIfUnused() || !GA.use_empty()) + NotDiscardableComdats.insert(C); + // Delete functions that are trivially dead, ccc -> fastcc LocalChange |= OptimizeFunctions(M); diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp index 624cb90..819b2e0 100644 --- a/lib/Transforms/IPO/InlineAlways.cpp +++ b/lib/Transforms/IPO/InlineAlways.cpp @@ -14,6 +14,8 @@ #include "llvm/Transforms/IPO.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionTracker.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/IR/CallSite.h" @@ -65,6 +67,8 @@ public: char AlwaysInliner::ID = 0; INITIALIZE_PASS_BEGIN(AlwaysInliner, "always-inline", "Inliner for always_inline functions", false, false) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AssumptionTracker) INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis) INITIALIZE_PASS_END(AlwaysInliner, "always-inline", diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp index d189756..d9a2b9e 100644 --- a/lib/Transforms/IPO/InlineSimple.cpp +++ b/lib/Transforms/IPO/InlineSimple.cpp @@ -12,6 +12,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionTracker.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/IR/CallSite.h" @@ -73,6 +75,8 @@ static int computeThresholdFromOptLevels(unsigned OptLevel, char SimpleInliner::ID = 0; INITIALIZE_PASS_BEGIN(SimpleInliner, "inline", "Function Integration/Inlining", false, false) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AssumptionTracker) INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis) INITIALIZE_PASS_END(SimpleInliner, "inline", diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index 9087ab2..3abe7a8 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -16,6 +16,8 @@ #include "llvm/Transforms/IPO/InlinerPass.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionTracker.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/IR/CallSite.h" @@ -74,6 +76,8 @@ Inliner::Inliner(char &ID, int Threshold, bool InsertLifetime) /// the call graph. If the derived class implements this method, it should /// always explicitly call the implementation here. void Inliner::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<AliasAnalysis>(); + AU.addRequired<AssumptionTracker>(); CallGraphSCCPass::getAnalysisUsage(AU); } @@ -215,7 +219,7 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, // If the inlined function already uses this alloca then we can't reuse // it. - if (!UsedAllocas.insert(AvailableAlloca)) + if (!UsedAllocas.insert(AvailableAlloca).second) continue; // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare @@ -357,8 +361,7 @@ bool Inliner::shouldInline(CallSite CS) { // FIXME: All of this logic should be sunk into getInlineCost. It relies on // the internal implementation of the inline cost metrics rather than // treating them as truly abstract units etc. - if (Caller->hasLocalLinkage() || - Caller->getLinkage() == GlobalValue::LinkOnceODRLinkage) { + if (Caller->hasLocalLinkage() || Caller->hasLinkOnceODRLinkage()) { int TotalSecondaryCost = 0; // The candidate cost to be imposed upon the current function. int CandidateCost = IC.getCost() - (InlineConstants::CallPenalty + 1); @@ -440,9 +443,11 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID, bool Inliner::runOnSCC(CallGraphSCC &SCC) { CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); + AssumptionTracker *AT = &getAnalysis<AssumptionTracker>(); DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; const TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>(); + AliasAnalysis *AA = &getAnalysis<AliasAnalysis>(); SmallPtrSet<Function*, 8> SCCFunctions; DEBUG(dbgs() << "Inliner visiting SCC:"); @@ -501,7 +506,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { InlinedArrayAllocasTy InlinedArrayAllocas; - InlineFunctionInfo InlineInfo(&CG, DL); + InlineFunctionInfo InlineInfo(&CG, DL, AA, AT); // Now that we have all of the call sites, loop over them and inline them if // it looks profitable to do so. @@ -664,6 +669,13 @@ bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) { if (!F->isDefTriviallyDead()) continue; + + // It is unsafe to drop a function with discardable linkage from a COMDAT + // without also dropping the other members of the COMDAT. + // The inliner doesn't visit non-function entities which are in COMDAT + // groups so it is unsafe to do so *unless* the linkage is local. + if (!F->hasLocalLinkage() && F->hasComdat()) + continue; // Remove any call graph edges from the function to its callees. CGN->removeAllCalledFunctions(); diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp index c970a1a..7950163 100644 --- a/lib/Transforms/IPO/Internalize.cpp +++ b/lib/Transforms/IPO/Internalize.cpp @@ -148,9 +148,7 @@ bool InternalizePass::runOnModule(Module &M) { // we don't see references from function local inline assembly. To be // conservative, we internalize symbols in llvm.compiler.used, but we // keep llvm.compiler.used so that the symbol is not deleted by llvm. - for (SmallPtrSet<GlobalValue *, 8>::iterator I = Used.begin(), E = Used.end(); - I != E; ++I) { - GlobalValue *V = *I; + for (GlobalValue *V : Used) { ExternalNames.insert(V->getName()); } diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index 559ef0b..b91ebf2 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -286,7 +286,7 @@ private: /// 6.4.Load: range metadata (as integer numbers) /// On this stage its better to see the code, since its not more than 10-15 /// strings for particular instruction, and could change sometimes. - int cmpOperation(const Instruction *L, const Instruction *R) const; + int cmpOperations(const Instruction *L, const Instruction *R) const; /// Compare two GEPs for equivalent pointer arithmetic. /// Parts to be compared for each comparison stage, @@ -297,9 +297,9 @@ private: /// 3. Pointer operand type (using cmpType method). /// 4. Number of operands. /// 5. Compare operands, using cmpValues method. - int cmpGEP(const GEPOperator *GEPL, const GEPOperator *GEPR); - int cmpGEP(const GetElementPtrInst *GEPL, const GetElementPtrInst *GEPR) { - return cmpGEP(cast<GEPOperator>(GEPL), cast<GEPOperator>(GEPR)); + int cmpGEPs(const GEPOperator *GEPL, const GEPOperator *GEPR); + int cmpGEPs(const GetElementPtrInst *GEPL, const GetElementPtrInst *GEPR) { + return cmpGEPs(cast<GEPOperator>(GEPL), cast<GEPOperator>(GEPR)); } /// cmpType - compares two types, @@ -342,12 +342,12 @@ private: /// be checked with the same way. If we get Res != 0 on some stage, return it. /// Otherwise return 0. /// 6. For all other cases put llvm_unreachable. - int cmpType(Type *TyL, Type *TyR) const; + int cmpTypes(Type *TyL, Type *TyR) const; int cmpNumbers(uint64_t L, uint64_t R) const; - int cmpAPInt(const APInt &L, const APInt &R) const; - int cmpAPFloat(const APFloat &L, const APFloat &R) const; + int cmpAPInts(const APInt &L, const APInt &R) const; + int cmpAPFloats(const APFloat &L, const APFloat &R) const; int cmpStrings(StringRef L, StringRef R) const; int cmpAttrs(const AttributeSet L, const AttributeSet R) const; @@ -392,15 +392,15 @@ private: DenseMap<const Value*, int> sn_mapL, sn_mapR; }; -class FunctionPtr { +class FunctionNode { AssertingVH<Function> F; const DataLayout *DL; public: - FunctionPtr(Function *F, const DataLayout *DL) : F(F), DL(DL) {} + FunctionNode(Function *F, const DataLayout *DL) : F(F), DL(DL) {} Function *getFunc() const { return F; } void release() { F = 0; } - bool operator<(const FunctionPtr &RHS) const { + bool operator<(const FunctionNode &RHS) const { return (FunctionComparator(DL, F, RHS.getFunc()).compare()) == -1; } }; @@ -412,7 +412,7 @@ int FunctionComparator::cmpNumbers(uint64_t L, uint64_t R) const { return 0; } -int FunctionComparator::cmpAPInt(const APInt &L, const APInt &R) const { +int FunctionComparator::cmpAPInts(const APInt &L, const APInt &R) const { if (int Res = cmpNumbers(L.getBitWidth(), R.getBitWidth())) return Res; if (L.ugt(R)) return 1; @@ -420,11 +420,11 @@ int FunctionComparator::cmpAPInt(const APInt &L, const APInt &R) const { return 0; } -int FunctionComparator::cmpAPFloat(const APFloat &L, const APFloat &R) const { +int FunctionComparator::cmpAPFloats(const APFloat &L, const APFloat &R) const { if (int Res = cmpNumbers((uint64_t)&L.getSemantics(), (uint64_t)&R.getSemantics())) return Res; - return cmpAPInt(L.bitcastToAPInt(), R.bitcastToAPInt()); + return cmpAPInts(L.bitcastToAPInt(), R.bitcastToAPInt()); } int FunctionComparator::cmpStrings(StringRef L, StringRef R) const { @@ -474,7 +474,7 @@ int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) { // Check whether types are bitcastable. This part is just re-factored // Type::canLosslesslyBitCastTo method, but instead of returning true/false, // we also pack into result which type is "less" for us. - int TypesRes = cmpType(TyL, TyR); + int TypesRes = cmpTypes(TyL, TyR); if (TypesRes != 0) { // Types are different, but check whether we can bitcast them. if (!TyL->isFirstClassType()) { @@ -541,12 +541,12 @@ int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) { case Value::ConstantIntVal: { const APInt &LInt = cast<ConstantInt>(L)->getValue(); const APInt &RInt = cast<ConstantInt>(R)->getValue(); - return cmpAPInt(LInt, RInt); + return cmpAPInts(LInt, RInt); } case Value::ConstantFPVal: { const APFloat &LAPF = cast<ConstantFP>(L)->getValueAPF(); const APFloat &RAPF = cast<ConstantFP>(R)->getValueAPF(); - return cmpAPFloat(LAPF, RAPF); + return cmpAPFloats(LAPF, RAPF); } case Value::ConstantArrayVal: { const ConstantArray *LA = cast<ConstantArray>(L); @@ -615,7 +615,7 @@ int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) { /// cmpType - compares two types, /// defines total ordering among the types set. /// See method declaration comments for more details. -int FunctionComparator::cmpType(Type *TyL, Type *TyR) const { +int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const { PointerType *PTyL = dyn_cast<PointerType>(TyL); PointerType *PTyR = dyn_cast<PointerType>(TyR); @@ -665,8 +665,7 @@ int FunctionComparator::cmpType(Type *TyL, Type *TyR) const { return cmpNumbers(STyL->isPacked(), STyR->isPacked()); for (unsigned i = 0, e = STyL->getNumElements(); i != e; ++i) { - if (int Res = cmpType(STyL->getElementType(i), - STyR->getElementType(i))) + if (int Res = cmpTypes(STyL->getElementType(i), STyR->getElementType(i))) return Res; } return 0; @@ -681,11 +680,11 @@ int FunctionComparator::cmpType(Type *TyL, Type *TyR) const { if (FTyL->isVarArg() != FTyR->isVarArg()) return cmpNumbers(FTyL->isVarArg(), FTyR->isVarArg()); - if (int Res = cmpType(FTyL->getReturnType(), FTyR->getReturnType())) + if (int Res = cmpTypes(FTyL->getReturnType(), FTyR->getReturnType())) return Res; for (unsigned i = 0, e = FTyL->getNumParams(); i != e; ++i) { - if (int Res = cmpType(FTyL->getParamType(i), FTyR->getParamType(i))) + if (int Res = cmpTypes(FTyL->getParamType(i), FTyR->getParamType(i))) return Res; } return 0; @@ -696,7 +695,7 @@ int FunctionComparator::cmpType(Type *TyL, Type *TyR) const { ArrayType *ATyR = cast<ArrayType>(TyR); if (ATyL->getNumElements() != ATyR->getNumElements()) return cmpNumbers(ATyL->getNumElements(), ATyR->getNumElements()); - return cmpType(ATyL->getElementType(), ATyR->getElementType()); + return cmpTypes(ATyL->getElementType(), ATyR->getElementType()); } } } @@ -705,8 +704,8 @@ int FunctionComparator::cmpType(Type *TyL, Type *TyR) const { // and pointer-to-B are equivalent. This should be kept in sync with // Instruction::isSameOperationAs. // Read method declaration comments for more details. -int FunctionComparator::cmpOperation(const Instruction *L, - const Instruction *R) const { +int FunctionComparator::cmpOperations(const Instruction *L, + const Instruction *R) const { // Differences from Instruction::isSameOperationAs: // * replace type comparison with calls to isEquivalentType. // * we test for I->hasSameSubclassOptionalData (nuw/nsw/tail) at the top @@ -717,7 +716,7 @@ int FunctionComparator::cmpOperation(const Instruction *L, if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands())) return Res; - if (int Res = cmpType(L->getType(), R->getType())) + if (int Res = cmpTypes(L->getType(), R->getType())) return Res; if (int Res = cmpNumbers(L->getRawSubclassOptionalData(), @@ -728,7 +727,7 @@ int FunctionComparator::cmpOperation(const Instruction *L, // if all operands are the same type for (unsigned i = 0, e = L->getNumOperands(); i != e; ++i) { if (int Res = - cmpType(L->getOperand(i)->getType(), R->getOperand(i)->getType())) + cmpTypes(L->getOperand(i)->getType(), R->getOperand(i)->getType())) return Res; } @@ -766,13 +765,23 @@ int FunctionComparator::cmpOperation(const Instruction *L, if (int Res = cmpNumbers(CI->getCallingConv(), cast<CallInst>(R)->getCallingConv())) return Res; - return cmpAttrs(CI->getAttributes(), cast<CallInst>(R)->getAttributes()); + if (int Res = + cmpAttrs(CI->getAttributes(), cast<CallInst>(R)->getAttributes())) + return Res; + return cmpNumbers( + (uint64_t)CI->getMetadata(LLVMContext::MD_range), + (uint64_t)cast<CallInst>(R)->getMetadata(LLVMContext::MD_range)); } if (const InvokeInst *CI = dyn_cast<InvokeInst>(L)) { if (int Res = cmpNumbers(CI->getCallingConv(), cast<InvokeInst>(R)->getCallingConv())) return Res; - return cmpAttrs(CI->getAttributes(), cast<InvokeInst>(R)->getAttributes()); + if (int Res = + cmpAttrs(CI->getAttributes(), cast<InvokeInst>(R)->getAttributes())) + return Res; + return cmpNumbers( + (uint64_t)CI->getMetadata(LLVMContext::MD_range), + (uint64_t)cast<InvokeInst>(R)->getMetadata(LLVMContext::MD_range)); } if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(L)) { ArrayRef<unsigned> LIndices = IVI->getIndices(); @@ -835,7 +844,7 @@ int FunctionComparator::cmpOperation(const Instruction *L, // Determine whether two GEP operations perform the same underlying arithmetic. // Read method declaration comments for more details. -int FunctionComparator::cmpGEP(const GEPOperator *GEPL, +int FunctionComparator::cmpGEPs(const GEPOperator *GEPL, const GEPOperator *GEPR) { unsigned int ASL = GEPL->getPointerAddressSpace(); @@ -851,7 +860,7 @@ int FunctionComparator::cmpGEP(const GEPOperator *GEPL, APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0); if (GEPL->accumulateConstantOffset(*DL, OffsetL) && GEPR->accumulateConstantOffset(*DL, OffsetR)) - return cmpAPInt(OffsetL, OffsetR); + return cmpAPInts(OffsetL, OffsetR); } if (int Res = cmpNumbers((uint64_t)GEPL->getPointerOperand()->getType(), @@ -935,10 +944,10 @@ int FunctionComparator::compare(const BasicBlock *BBL, const BasicBlock *BBR) { if (int Res = cmpValues(GEPL->getPointerOperand(), GEPR->getPointerOperand())) return Res; - if (int Res = cmpGEP(GEPL, GEPR)) + if (int Res = cmpGEPs(GEPL, GEPR)) return Res; } else { - if (int Res = cmpOperation(InstL, InstR)) + if (int Res = cmpOperations(InstL, InstR)) return Res; assert(InstL->getNumOperands() == InstR->getNumOperands()); @@ -950,7 +959,7 @@ int FunctionComparator::compare(const BasicBlock *BBL, const BasicBlock *BBR) { if (int Res = cmpNumbers(OpL->getValueID(), OpR->getValueID())) return Res; // TODO: Already checked in cmpOperation - if (int Res = cmpType(OpL->getType(), OpR->getType())) + if (int Res = cmpTypes(OpL->getType(), OpR->getType())) return Res; } } @@ -998,7 +1007,7 @@ int FunctionComparator::compare() { if (int Res = cmpNumbers(FnL->getCallingConv(), FnR->getCallingConv())) return Res; - if (int Res = cmpType(FnL->getFunctionType(), FnR->getFunctionType())) + if (int Res = cmpTypes(FnL->getFunctionType(), FnR->getFunctionType())) return Res; assert(FnL->arg_size() == FnR->arg_size() && @@ -1040,7 +1049,7 @@ int FunctionComparator::compare() { assert(TermL->getNumSuccessors() == TermR->getNumSuccessors()); for (unsigned i = 0, e = TermL->getNumSuccessors(); i != e; ++i) { - if (!VisitedBBs.insert(TermL->getSuccessor(i))) + if (!VisitedBBs.insert(TermL->getSuccessor(i)).second) continue; FnLBBs.push_back(TermL->getSuccessor(i)); @@ -1068,7 +1077,7 @@ public: bool runOnModule(Module &M) override; private: - typedef std::set<FunctionPtr> FnTreeType; + typedef std::set<FunctionNode> FnTreeType; /// A work queue of functions that may have been modified and should be /// analyzed again. @@ -1291,11 +1300,11 @@ static Value *createCast(IRBuilder<false> &Builder, Value *V, Type *DestTy) { Value *Result = UndefValue::get(DestTy); for (unsigned int I = 0, E = SrcTy->getStructNumElements(); I < E; ++I) { Value *Element = createCast( - Builder, Builder.CreateExtractValue(V, ArrayRef<unsigned int>(I)), + Builder, Builder.CreateExtractValue(V, makeArrayRef(I)), DestTy->getStructElementType(I)); Result = - Builder.CreateInsertValue(Result, Element, ArrayRef<unsigned int>(I)); + Builder.CreateInsertValue(Result, Element, makeArrayRef(I)); } return Result; } @@ -1411,14 +1420,14 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) { // that was already inserted. bool MergeFunctions::insert(Function *NewFunction) { std::pair<FnTreeType::iterator, bool> Result = - FnTree.insert(FunctionPtr(NewFunction, DL)); + FnTree.insert(FunctionNode(NewFunction, DL)); if (Result.second) { DEBUG(dbgs() << "Inserting as unique: " << NewFunction->getName() << '\n'); return false; } - const FunctionPtr &OldF = *Result.first; + const FunctionNode &OldF = *Result.first; // Don't merge tiny functions, since it can just end up making the function // larger. @@ -1448,7 +1457,7 @@ bool MergeFunctions::insert(Function *NewFunction) { void MergeFunctions::remove(Function *F) { // We need to make sure we remove F, not a function "equal" to F per the // function equality comparator. - FnTreeType::iterator found = FnTree.find(FunctionPtr(F, DL)); + FnTreeType::iterator found = FnTree.find(FunctionNode(F, DL)); size_t Erased = 0; if (found != FnTree.end() && found->getFunc() == F) { Erased = 1; diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 46a3187..da85a91 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -17,11 +17,14 @@ #include "llvm-c/Transforms/PassManagerBuilder.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/Passes.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/Verifier.h" #include "llvm/PassManager.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Vectorize.h" @@ -45,6 +48,10 @@ UseGVNAfterVectorization("use-gvn-after-vectorization", cl::init(false), cl::Hidden, cl::desc("Run GVN instead of Early CSE after vectorization passes")); +static cl::opt<bool> ExtraVectorizerPasses( + "extra-vectorizer-passes", cl::init(false), cl::Hidden, + cl::desc("Run cleanup optimization passes after vectorization.")); + static cl::opt<bool> UseNewSROA("use-new-sroa", cl::init(true), cl::Hidden, cl::desc("Enable the new, experimental SROA pass")); @@ -57,6 +64,20 @@ static cl::opt<bool> RunLoadCombine("combine-loads", cl::init(false), cl::Hidden, cl::desc("Run the load combining pass")); +static cl::opt<bool> +RunSLPAfterLoopVectorization("run-slp-after-loop-vectorization", + cl::init(true), cl::Hidden, + cl::desc("Run the SLP vectorizer (and BB vectorizer) after the Loop " + "vectorizer instead of before")); + +static cl::opt<bool> UseCFLAA("use-cfl-aa", + cl::init(false), cl::Hidden, + cl::desc("Enable the new, experimental CFL alias analysis")); + +static cl::opt<bool> +EnableMLSM("mlsm", cl::init(true), cl::Hidden, + cl::desc("Enable motion of merged load and store")); + PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; @@ -70,6 +91,11 @@ PassManagerBuilder::PassManagerBuilder() { LoopVectorize = RunLoopVectorization; RerollLoops = RunLoopRerolling; LoadCombine = RunLoadCombine; + DisableGVNLoadPRE = false; + VerifyInput = false; + VerifyOutput = false; + StripDebug = false; + MergeFunctions = false; } PassManagerBuilder::~PassManagerBuilder() { @@ -106,7 +132,10 @@ PassManagerBuilder::addInitialAliasAnalysisPasses(PassManagerBase &PM) const { // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that // BasicAliasAnalysis wins if they disagree. This is intended to help // support "obvious" type-punning idioms. + if (UseCFLAA) + PM.add(createCFLAliasAnalysisPass()); PM.add(createTypeBasedAliasAnalysisPass()); + PM.add(createScopedNoAliasAAPass()); PM.add(createBasicAliasAnalysisPass()); } @@ -130,18 +159,22 @@ void PassManagerBuilder::populateFunctionPassManager(FunctionPassManager &FPM) { } void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { - // If all optimizations are disabled, just run the always-inline pass. + // If all optimizations are disabled, just run the always-inline pass and, + // if enabled, the function merging pass. if (OptLevel == 0) { if (Inliner) { MPM.add(Inliner); Inliner = nullptr; } - // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC - // pass manager, but we don't want to add extensions into that pass manager. - // To prevent this we must insert a no-op module pass to reset the pass - // manager to get the same behavior as EP_OptimizerLast in non-O0 builds. - if (!GlobalExtensions->empty() || !Extensions.empty()) + // FIXME: The BarrierNoopPass is a HACK! The inliner pass above implicitly + // creates a CGSCC pass manager, but we don't want to add extensions into + // that pass manager. To prevent this we insert a no-op module pass to reset + // the pass manager to get the same behavior as EP_OptimizerLast in non-O0 + // builds. The function merging pass is + if (MergeFunctions) + MPM.add(createMergeFunctionsPass()); + else if (!GlobalExtensions->empty() || !Extensions.empty()) MPM.add(createBarrierNoopPass()); addExtensionsToPM(EP_EnabledOnOptLevel0, MPM); @@ -207,8 +240,11 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { MPM.add(createSimpleLoopUnrollPass()); // Unroll small loops addExtensionsToPM(EP_LoopOptimizerEnd, MPM); - if (OptLevel > 1) - MPM.add(createGVNPass()); // Remove redundancies + if (OptLevel > 1) { + if (EnableMLSM) + MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds + MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies + } MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset MPM.add(createSCCPPass()); // Constant prop with SCCP @@ -224,21 +260,23 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { if (RerollLoops) MPM.add(createLoopRerollPass()); - if (SLPVectorize) - MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. - - if (BBVectorize) { - MPM.add(createBBVectorizePass()); - MPM.add(createInstructionCombiningPass()); - addExtensionsToPM(EP_Peephole, MPM); - if (OptLevel > 1 && UseGVNAfterVectorization) - MPM.add(createGVNPass()); // Remove redundancies - else - MPM.add(createEarlyCSEPass()); // Catch trivial redundancies - - // BBVectorize may have significantly shortened a loop body; unroll again. - if (!DisableUnrollLoops) - MPM.add(createLoopUnrollPass()); + if (!RunSLPAfterLoopVectorization) { + if (SLPVectorize) + MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. + + if (BBVectorize) { + MPM.add(createBBVectorizePass()); + MPM.add(createInstructionCombiningPass()); + addExtensionsToPM(EP_Peephole, MPM); + if (OptLevel > 1 && UseGVNAfterVectorization) + MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies + else + MPM.add(createEarlyCSEPass()); // Catch trivial redundancies + + // BBVectorize may have significantly shortened a loop body; unroll again. + if (!DisableUnrollLoops) + MPM.add(createLoopUnrollPass()); + } } if (LoadCombine) @@ -253,6 +291,13 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { // pass manager that we are specifically trying to avoid. To prevent this // we must insert a no-op module pass to reset the pass manager. MPM.add(createBarrierNoopPass()); + + // Re-rotate loops in all our loop nests. These may have fallout out of + // rotated form due to GVN or other transformations, and the vectorizer relies + // on the rotated form. + if (ExtraVectorizerPasses) + MPM.add(createLoopRotatePass()); + MPM.add(createLoopVectorizePass(DisableUnrollLoops, LoopVectorize)); // FIXME: Because of #pragma vectorize enable, the passes below are always // inserted in the pipeline, even when the vectorizer doesn't run (ex. when @@ -260,12 +305,56 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { // as function calls, so that we can only pass them when the vectorizer // changed the code. MPM.add(createInstructionCombiningPass()); + if (OptLevel > 1 && ExtraVectorizerPasses) { + // At higher optimization levels, try to clean up any runtime overlap and + // alignment checks inserted by the vectorizer. We want to track correllated + // runtime checks for two inner loops in the same outer loop, fold any + // common computations, hoist loop-invariant aspects out of any outer loop, + // and unswitch the runtime checks if possible. Once hoisted, we may have + // dead (or speculatable) control flows or more combining opportunities. + MPM.add(createEarlyCSEPass()); + MPM.add(createCorrelatedValuePropagationPass()); + MPM.add(createInstructionCombiningPass()); + MPM.add(createLICMPass()); + MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3)); + MPM.add(createCFGSimplificationPass()); + MPM.add(createInstructionCombiningPass()); + } + + if (RunSLPAfterLoopVectorization) { + if (SLPVectorize) { + MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. + if (OptLevel > 1 && ExtraVectorizerPasses) { + MPM.add(createEarlyCSEPass()); + } + } + + if (BBVectorize) { + MPM.add(createBBVectorizePass()); + MPM.add(createInstructionCombiningPass()); + addExtensionsToPM(EP_Peephole, MPM); + if (OptLevel > 1 && UseGVNAfterVectorization) + MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies + else + MPM.add(createEarlyCSEPass()); // Catch trivial redundancies + + // BBVectorize may have significantly shortened a loop body; unroll again. + if (!DisableUnrollLoops) + MPM.add(createLoopUnrollPass()); + } + } + addExtensionsToPM(EP_Peephole, MPM); MPM.add(createCFGSimplificationPass()); + MPM.add(createInstructionCombiningPass()); if (!DisableUnrollLoops) MPM.add(createLoopUnrollPass()); // Unroll small loops + // After vectorization and unrolling, assume intrinsics may tell us more + // about pointer alignments. + MPM.add(createAlignmentFromAssumptionsPass()); + if (!DisableUnitAtATime) { // FIXME: We shouldn't bother with this anymore. MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes @@ -277,22 +366,17 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { MPM.add(createConstantMergePass()); // Merge dup global constants } } + + if (MergeFunctions) + MPM.add(createMergeFunctionsPass()); + addExtensionsToPM(EP_OptimizerLast, MPM); } -void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM, - bool Internalize, - bool RunInliner, - bool DisableGVNLoadPRE) { +void PassManagerBuilder::addLTOOptimizationPasses(PassManagerBase &PM) { // Provide AliasAnalysis services for optimizations. addInitialAliasAnalysisPasses(PM); - // Now that composite has been compiled, scan through the module, looking - // for a main function. If main is defined, mark all other functions - // internal. - if (Internalize) - PM.add(createInternalizePass("main")); - // Propagate constants at call sites into the functions they call. This // opens opportunities for globalopt (and inlining) by substituting function // pointers passed as arguments to direct uses of functions. @@ -316,8 +400,11 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM, addExtensionsToPM(EP_Peephole, PM); // Inline small functions - if (RunInliner) - PM.add(createFunctionInliningPass()); + bool RunInliner = Inliner; + if (RunInliner) { + PM.add(Inliner); + Inliner = nullptr; + } PM.add(createPruneEHPass()); // Remove dead EH info. @@ -346,6 +433,8 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM, PM.add(createGlobalsModRefPass()); // IP alias analysis. PM.add(createLICMPass()); // Hoist loop invariants. + if (EnableMLSM) + PM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds. PM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies. PM.add(createMemCpyOptPass()); // Remove dead memcpys. @@ -355,10 +444,16 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM, // More loops are countable; try to optimize them. PM.add(createIndVarSimplifyPass()); PM.add(createLoopDeletionPass()); - PM.add(createLoopVectorizePass(true, true)); + PM.add(createLoopVectorizePass(true, LoopVectorize)); // More scalar chains could be vectorized due to more alias information - PM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. + if (RunSLPAfterLoopVectorization) + if (SLPVectorize) + PM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. + + // After vectorization, assume intrinsics may tell us more about pointer + // alignments. + PM.add(createAlignmentFromAssumptionsPass()); if (LoadCombine) PM.add(createLoadCombinePass()); @@ -374,6 +469,39 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM, // Now that we have optimized the program, discard unreachable functions. PM.add(createGlobalDCEPass()); + + // FIXME: this is profitable (for compiler time) to do at -O0 too, but + // currently it damages debug info. + if (MergeFunctions) + PM.add(createMergeFunctionsPass()); +} + +void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM, + TargetMachine *TM) { + if (TM) { + PM.add(new DataLayoutPass()); + TM->addAnalysisPasses(PM); + } + + if (LibraryInfo) + PM.add(new TargetLibraryInfo(*LibraryInfo)); + + if (VerifyInput) + PM.add(createVerifierPass()); + + if (StripDebug) + PM.add(createStripSymbolsPass(true)); + + if (VerifyInput) + PM.add(createDebugInfoVerifierPass()); + + if (OptLevel != 0) + addLTOOptimizationPasses(PM); + + if (VerifyOutput) { + PM.add(createVerifierPass()); + PM.add(createDebugInfoVerifierPass()); + } } inline PassManagerBuilder *unwrap(LLVMPassManagerBuilderRef P) { @@ -457,5 +585,11 @@ void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB, LLVMBool RunInliner) { PassManagerBuilder *Builder = unwrap(PMB); PassManagerBase *LPM = unwrap(PM); - Builder->populateLTOPassManager(*LPM, Internalize != 0, RunInliner != 0); + + // A small backwards compatibility hack. populateLTOPassManager used to take + // an RunInliner option. + if (RunInliner && !Builder->Inliner) + Builder->Inliner = createFunctionInliningPass(); + + Builder->populateLTOPassManager(*LPM); } diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp index 1abbccc..3412b9e 100644 --- a/lib/Transforms/IPO/StripSymbols.cpp +++ b/lib/Transforms/IPO/StripSymbols.cpp @@ -154,9 +154,8 @@ static void RemoveDeadConstant(Constant *C) { C->destroyConstant(); // If the constant referenced anything, see if we can delete it as well. - for (SmallPtrSet<Constant*, 4>::iterator OI = Operands.begin(), - OE = Operands.end(); OI != OE; ++OI) - RemoveDeadConstant(*OI); + for (Constant *O : Operands) + RemoveDeadConstant(O); } // Strip the symbol table of its names. @@ -191,7 +190,7 @@ static void StripTypeNames(Module &M, bool PreserveDbgInfo) { /// Find values that are marked as llvm.used. static void findUsedValues(GlobalVariable *LLVMUsed, - SmallPtrSet<const GlobalValue*, 8> &UsedValues) { + SmallPtrSetImpl<const GlobalValue*> &UsedValues) { if (!LLVMUsed) return; UsedValues.insert(LLVMUsed); @@ -350,28 +349,12 @@ bool StripDeadDebugInfo::runOnModule(Module &M) { // subprogram list/global variable list with our new live subprogram/global // variable list. if (SubprogramChange) { - // Make sure that 9 is still the index of the subprograms. This is to make - // sure that an assert is hit if the location of the subprogram array - // changes. This is just to make sure that this is updated if such an - // event occurs. - assert(DIC->getNumOperands() >= 10 && - SPs == DIC->getOperand(9) && - "DICompileUnits is expected to store Subprograms in operand " - "9."); - DIC->replaceOperandWith(9, MDNode::get(C, LiveSubprograms)); + DIC.replaceSubprograms(DIArray(MDNode::get(C, LiveSubprograms))); Changed = true; } if (GlobalVariableChange) { - // Make sure that 10 is still the index of global variables. This is to - // make sure that an assert is hit if the location of the subprogram array - // changes. This is just to make sure that this index is updated if such - // an event occurs. - assert(DIC->getNumOperands() >= 11 && - GVs == DIC->getOperand(10) && - "DICompileUnits is expected to store Global Variables in operand " - "10."); - DIC->replaceOperandWith(10, MDNode::get(C, LiveGlobalVariables)); + DIC.replaceGlobalVariables(DIArray(MDNode::get(C, LiveGlobalVariables))); Changed = true; } |