diff options
Diffstat (limited to 'lib/Transforms/IPO')
-rw-r--r-- | lib/Transforms/IPO/GlobalOpt.cpp | 6 | ||||
-rw-r--r-- | lib/Transforms/IPO/InlineAlways.cpp | 95 | ||||
-rw-r--r-- | lib/Transforms/IPO/InlineSimple.cpp | 14 | ||||
-rw-r--r-- | lib/Transforms/IPO/Inliner.cpp | 108 | ||||
-rw-r--r-- | lib/Transforms/IPO/Internalize.cpp | 9 | ||||
-rw-r--r-- | lib/Transforms/IPO/PassManagerBuilder.cpp | 21 |
6 files changed, 125 insertions, 128 deletions
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index a32e550..1522aa4 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -341,6 +341,12 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP, TD, TLI)); if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr) SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); + + // If the initializer is an all-null value and we have an inbounds GEP, + // we already know what the result of any load from that GEP is. + // TODO: Handle splats. + if (Init && isa<ConstantAggregateZero>(Init) && GEP->isInBounds()) + SubInit = Constant::getNullValue(GEP->getType()->getElementType()); } Changed |= CleanupConstantGlobalUsers(GEP, SubInit, TD, TLI); diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp index 3c7fac6..664ddf6 100644 --- a/lib/Transforms/IPO/InlineAlways.cpp +++ b/lib/Transforms/IPO/InlineAlways.cpp @@ -32,7 +32,6 @@ namespace { // AlwaysInliner only inlines functions that are mark as "always inline". class AlwaysInliner : public Inliner { - InlineCostAnalyzer CA; public: // Use extremely low threshold. AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/true) { @@ -43,40 +42,11 @@ namespace { initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry()); } static char ID; // Pass identification, replacement for typeid - InlineCost getInlineCost(CallSite CS) { - Function *Callee = CS.getCalledFunction(); - // We assume indirect calls aren't calling an always-inline function. - if (!Callee) return InlineCost::getNever(); - - // We can't inline calls to external functions. - // FIXME: We shouldn't even get here. - if (Callee->isDeclaration()) return InlineCost::getNever(); - - // Return never for anything not marked as always inline. - if (!Callee->hasFnAttr(Attribute::AlwaysInline)) - return InlineCost::getNever(); - - // We still have to check the inline cost in case there are reasons to - // not inline which trump the always-inline attribute such as setjmp and - // indirectbr. - return CA.getInlineCost(CS); - } - float getInlineFudgeFactor(CallSite CS) { - return CA.getInlineFudgeFactor(CS); - } - void resetCachedCostInfo(Function *Caller) { - CA.resetCachedCostInfo(Caller); - } - void growCachedCostInfo(Function* Caller, Function* Callee) { - CA.growCachedCostInfo(Caller, Callee); - } + virtual InlineCost getInlineCost(CallSite CS); virtual bool doFinalization(CallGraph &CG) { return removeDeadFunctions(CG, /*AlwaysInlineOnly=*/true); } virtual bool doInitialization(CallGraph &CG); - void releaseMemory() { - CA.clear(); - } }; } @@ -93,9 +63,70 @@ Pass *llvm::createAlwaysInlinerPass(bool InsertLifetime) { return new AlwaysInliner(InsertLifetime); } +/// \brief Minimal filter to detect invalid constructs for inlining. +static bool isInlineViable(Function &F) { + bool ReturnsTwice = F.hasFnAttr(Attribute::ReturnsTwice); + for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { + // Disallow inlining of functions which contain an indirect branch. + if (isa<IndirectBrInst>(BI->getTerminator())) + return false; + + for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; + ++II) { + CallSite CS(II); + if (!CS) + continue; + + // Disallow recursive calls. + if (&F == CS.getCalledFunction()) + return false; + + // Disallow calls which expose returns-twice to a function not previously + // attributed as such. + if (!ReturnsTwice && CS.isCall() && + cast<CallInst>(CS.getInstruction())->canReturnTwice()) + return false; + } + } + + return true; +} + +/// \brief Get the inline cost for the always-inliner. +/// +/// The always inliner *only* handles functions which are marked with the +/// attribute to force inlining. As such, it is dramatically simpler and avoids +/// using the powerful (but expensive) inline cost analysis. Instead it uses +/// a very simple and boring direct walk of the instructions looking for +/// impossible-to-inline constructs. +/// +/// Note, it would be possible to go to some lengths to cache the information +/// computed here, but as we only expect to do this for relatively few and +/// small functions which have the explicit attribute to force inlining, it is +/// likely not worth it in practice. +InlineCost AlwaysInliner::getInlineCost(CallSite CS) { + Function *Callee = CS.getCalledFunction(); + // We assume indirect calls aren't calling an always-inline function. + if (!Callee) return InlineCost::getNever(); + + // We can't inline calls to external functions. + // FIXME: We shouldn't even get here. + if (Callee->isDeclaration()) return InlineCost::getNever(); + + // Return never for anything not marked as always inline. + if (!Callee->hasFnAttr(Attribute::AlwaysInline)) + return InlineCost::getNever(); + + // Do some minimal analysis to preclude non-viable functions. + if (!isInlineViable(*Callee)) + return InlineCost::getNever(); + + // Otherwise, force inlining. + return InlineCost::getAlways(); +} + // doInitialization - Initializes the vector of functions that have not // been annotated with the "always inline" attribute. bool AlwaysInliner::doInitialization(CallGraph &CG) { - CA.setTargetData(getAnalysisIfAvailable<TargetData>()); return false; } diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp index 03032e6..50038d8 100644 --- a/lib/Transforms/IPO/InlineSimple.cpp +++ b/lib/Transforms/IPO/InlineSimple.cpp @@ -40,21 +40,9 @@ namespace { } static char ID; // Pass identification, replacement for typeid InlineCost getInlineCost(CallSite CS) { - return CA.getInlineCost(CS); - } - float getInlineFudgeFactor(CallSite CS) { - return CA.getInlineFudgeFactor(CS); - } - void resetCachedCostInfo(Function *Caller) { - CA.resetCachedCostInfo(Caller); - } - void growCachedCostInfo(Function* Caller, Function* Callee) { - CA.growCachedCostInfo(Caller, Callee); + return CA.getInlineCost(CS, getInlineThreshold(CS)); } virtual bool doInitialization(CallGraph &CG); - void releaseMemory() { - CA.clear(); - } }; } diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index 9975333..dc9cbfb 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -19,7 +19,6 @@ #include "llvm/IntrinsicInst.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" -#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/IPO/InlinerPass.h" #include "llvm/Transforms/Utils/Cloning.h" @@ -37,6 +36,11 @@ STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined"); STATISTIC(NumDeleted, "Number of functions deleted because all callers found"); STATISTIC(NumMergedAllocas, "Number of allocas merged together"); +// This weirdly named statistic tracks the number of times that, when attemting +// to inline a function A into B, we analyze the callers of B in order to see +// if those would be more profitable and blocked inline steps. +STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed"); + static cl::opt<int> InlineLimit("inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore, cl::desc("Control the amount of inlining to perform (default = 225)")); @@ -232,14 +236,10 @@ bool Inliner::shouldInline(CallSite CS) { return false; } - int Cost = IC.getValue(); Function *Caller = CS.getCaller(); - int CurrentThreshold = getInlineThreshold(CS); - float FudgeFactor = getInlineFudgeFactor(CS); - int AdjThreshold = (int)(CurrentThreshold * FudgeFactor); - if (Cost >= AdjThreshold) { - DEBUG(dbgs() << " NOT Inlining: cost=" << Cost - << ", thres=" << AdjThreshold + if (!IC) { + DEBUG(dbgs() << " NOT Inlining: cost=" << IC.getCost() + << ", thres=" << (IC.getCostDelta() + IC.getCost()) << ", Call: " << *CS.getInstruction() << "\n"); return false; } @@ -256,12 +256,17 @@ bool Inliner::shouldInline(CallSite CS) { // are used. Thus we will always have the opportunity to make local inlining // decisions. Importantly the linkonce-ODR linkage covers inline functions // and templates in C++. + // + // FIXME: All of this logic should be sunk into getInlineCost. It relies on + // the internal implementation of the inline cost metrics rather than + // treating them as truly abstract units etc. if (Caller->hasLocalLinkage() || Caller->getLinkage() == GlobalValue::LinkOnceODRLinkage) { int TotalSecondaryCost = 0; - bool outerCallsFound = false; + // The candidate cost to be imposed upon the current function. + int CandidateCost = IC.getCost() - (InlineConstants::CallPenalty + 1); // This bool tracks what happens if we do NOT inline C into B. - bool callerWillBeRemoved = true; + bool callerWillBeRemoved = Caller->hasLocalLinkage(); // This bool tracks what happens if we DO inline C into B. bool inliningPreventsSomeOuterInline = false; for (Value::use_iterator I = Caller->use_begin(), E =Caller->use_end(); @@ -277,26 +282,20 @@ bool Inliner::shouldInline(CallSite CS) { } InlineCost IC2 = getInlineCost(CS2); - if (IC2.isNever()) + ++NumCallerCallersAnalyzed; + if (!IC2) { callerWillBeRemoved = false; - if (IC2.isAlways() || IC2.isNever()) + continue; + } + if (IC2.isAlways()) continue; - outerCallsFound = true; - int Cost2 = IC2.getValue(); - int CurrentThreshold2 = getInlineThreshold(CS2); - float FudgeFactor2 = getInlineFudgeFactor(CS2); - - if (Cost2 >= (int)(CurrentThreshold2 * FudgeFactor2)) - callerWillBeRemoved = false; - - // See if we have this case. We subtract off the penalty - // for the call instruction, which we would be deleting. - if (Cost2 < (int)(CurrentThreshold2 * FudgeFactor2) && - Cost2 + Cost - (InlineConstants::CallPenalty + 1) >= - (int)(CurrentThreshold2 * FudgeFactor2)) { + // See if inlining or original callsite would erase the cost delta of + // this callsite. We subtract off the penalty for the call instruction, + // which we would be deleting. + if (IC2.getCostDelta() <= CandidateCost) { inliningPreventsSomeOuterInline = true; - TotalSecondaryCost += Cost2; + TotalSecondaryCost += IC2.getCost(); } } // If all outer calls to Caller would get inlined, the cost for the last @@ -306,17 +305,16 @@ bool Inliner::shouldInline(CallSite CS) { if (callerWillBeRemoved && Caller->use_begin() != Caller->use_end()) TotalSecondaryCost += InlineConstants::LastCallToStaticBonus; - if (outerCallsFound && inliningPreventsSomeOuterInline && - TotalSecondaryCost < Cost) { - DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() << - " Cost = " << Cost << + if (inliningPreventsSomeOuterInline && TotalSecondaryCost < IC.getCost()) { + DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() << + " Cost = " << IC.getCost() << ", outer Cost = " << TotalSecondaryCost << '\n'); return false; } } - DEBUG(dbgs() << " Inlining: cost=" << Cost - << ", thres=" << AdjThreshold + DEBUG(dbgs() << " Inlining: cost=" << IC.getCost() + << ", thres=" << (IC.getCostDelta() + IC.getCost()) << ", Call: " << *CS.getInstruction() << '\n'); return true; } @@ -335,38 +333,6 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID, return false; } -/// \brief Simplify arguments going into a particular callsite. -/// -/// This is important to do each time we add a callsite due to inlining so that -/// constants and other entities which feed into inline cost estimation are -/// properly recognized when analyzing the new callsite. Consider: -/// void outer(int x) { -/// if (x < 42) -/// return inner(42 - x); -/// ... -/// } -/// void inner(int x) { -/// ... -/// } -/// -/// The inliner gives calls to 'outer' with a constant argument a bonus because -/// it will delete one side of a branch. But the resulting call to 'inner' -/// will, after inlining, also have a constant operand. We need to do just -/// enough constant folding to expose this for callsite arguments. The rest -/// will be taken care of after the inliner finishes running. -static void simplifyCallSiteArguments(const TargetData *TD, CallSite CS) { - // FIXME: It would be nice to avoid this smallvector if RAUW doesn't - // invalidate operand iterators in any cases. - SmallVector<std::pair<Value *, Value*>, 4> SimplifiedArgs; - for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); - I != E; ++I) - if (Instruction *Inst = dyn_cast<Instruction>(*I)) - if (Value *SimpleArg = SimplifyInstruction(Inst, TD)) - SimplifiedArgs.push_back(std::make_pair(Inst, SimpleArg)); - for (unsigned Idx = 0, Size = SimplifiedArgs.size(); Idx != Size; ++Idx) - SimplifiedArgs[Idx].first->replaceAllUsesWith(SimplifiedArgs[Idx].second); -} - bool Inliner::runOnSCC(CallGraphSCC &SCC) { CallGraph &CG = getAnalysis<CallGraph>(); const TargetData *TD = getAnalysisIfAvailable<TargetData>(); @@ -455,8 +421,6 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { CG[Caller]->removeCallEdgeFor(CS); CS.getInstruction()->eraseFromParent(); ++NumCallsDeleted; - // Update the cached cost info with the missing call - growCachedCostInfo(Caller, NULL); } else { // We can only inline direct calls to non-declarations. if (Callee == 0 || Callee->isDeclaration()) continue; @@ -494,14 +458,9 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { for (unsigned i = 0, e = InlineInfo.InlinedCalls.size(); i != e; ++i) { Value *Ptr = InlineInfo.InlinedCalls[i]; - CallSite NewCS = Ptr; - simplifyCallSiteArguments(TD, NewCS); - CallSites.push_back(std::make_pair(NewCS, NewHistoryID)); + CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID)); } } - - // Update the cached cost info with the inlined call. - growCachedCostInfo(Caller, Callee); } // If we inlined or deleted the last possible call site to the function, @@ -521,8 +480,6 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { // Remove any call graph edges from the callee to its callees. CalleeNode->removeAllCalledFunctions(); - resetCachedCostInfo(Callee); - // Removing the node for callee from the call graph and delete it. delete CG.removeFunctionFromModule(CalleeNode); ++NumDeleted; @@ -601,14 +558,13 @@ bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) { // Note that it doesn't matter that we are iterating over a non-stable order // here to do this, it doesn't matter which order the functions are deleted // in. - std::sort(FunctionsToRemove.begin(), FunctionsToRemove.end()); + array_pod_sort(FunctionsToRemove.begin(), FunctionsToRemove.end()); FunctionsToRemove.erase(std::unique(FunctionsToRemove.begin(), FunctionsToRemove.end()), FunctionsToRemove.end()); for (SmallVectorImpl<CallGraphNode *>::iterator I = FunctionsToRemove.begin(), E = FunctionsToRemove.end(); I != E; ++I) { - resetCachedCostInfo((*I)->getFunction()); delete CG.removeFunctionFromModule(*I); ++NumDeleted; } diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp index 7cb1d18..fb5869e 100644 --- a/lib/Transforms/IPO/Internalize.cpp +++ b/lib/Transforms/IPO/Internalize.cpp @@ -122,6 +122,11 @@ bool InternalizePass::runOnModule(Module &M) { bool Changed = false; + // Never internalize functions which code-gen might insert. + // FIXME: We should probably add this (and the __stack_chk_guard) via some + // type of call-back in CodeGen. + ExternalNames.insert("__stack_chk_fail"); + // Mark all functions not in the api as internal. // FIXME: maybe use private linkage? for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) @@ -148,9 +153,11 @@ bool InternalizePass::runOnModule(Module &M) { // won't find them. (see MachineModuleInfo.) ExternalNames.insert("llvm.global_ctors"); ExternalNames.insert("llvm.global_dtors"); - ExternalNames.insert("llvm.noinline"); ExternalNames.insert("llvm.global.annotations"); + // Never internalize symbols code-gen inserts. + ExternalNames.insert("__stack_chk_guard"); + // Mark all global variables with initializers that are not in the api as // internal as well. // FIXME: maybe use private linkage? diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 8408437..43b4ab5 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -35,6 +35,11 @@ using namespace llvm; static cl::opt<bool> RunVectorization("vectorize", cl::desc("Run vectorization passes")); +static cl::opt<bool> +UseGVNAfterVectorization("use-gvn-after-vectorization", + cl::init(false), cl::Hidden, + cl::desc("Run GVN instead of Early CSE after vectorization passes")); + PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; @@ -182,8 +187,10 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { if (Vectorize) { MPM.add(createBBVectorizePass()); MPM.add(createInstructionCombiningPass()); - if (OptLevel > 1) - MPM.add(createGVNPass()); // Remove redundancies + if (OptLevel > 1 && UseGVNAfterVectorization) + MPM.add(createGVNPass()); // Remove redundancies + else + MPM.add(createEarlyCSEPass()); // Catch trivial redundancies } MPM.add(createAggressiveDCEPass()); // Delete dead instructions @@ -202,11 +209,13 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { if (OptLevel > 1) MPM.add(createConstantMergePass()); // Merge dup global constants } + addExtensionsToPM(EP_OptimizerLast, MPM); } void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM, bool Internalize, - bool RunInliner) { + bool RunInliner, + bool DisableGVNLoadPRE) { // Provide AliasAnalysis services for optimizations. addInitialAliasAnalysisPasses(PM); @@ -262,9 +271,9 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM, PM.add(createFunctionAttrsPass()); // Add nocapture. PM.add(createGlobalsModRefPass()); // IP alias analysis. - PM.add(createLICMPass()); // Hoist loop invariants. - PM.add(createGVNPass()); // Remove redundancies. - PM.add(createMemCpyOptPass()); // Remove dead memcpys. + PM.add(createLICMPass()); // Hoist loop invariants. + PM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies. + PM.add(createMemCpyOptPass()); // Remove dead memcpys. // Nuke dead stores. PM.add(createDeadStoreEliminationPass()); |