aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Transforms/IPO
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/IPO')
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp6
-rw-r--r--lib/Transforms/IPO/InlineAlways.cpp95
-rw-r--r--lib/Transforms/IPO/InlineSimple.cpp14
-rw-r--r--lib/Transforms/IPO/Inliner.cpp108
-rw-r--r--lib/Transforms/IPO/Internalize.cpp9
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp21
6 files changed, 125 insertions, 128 deletions
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index a32e550..1522aa4 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -341,6 +341,12 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP, TD, TLI));
if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
+
+ // If the initializer is an all-null value and we have an inbounds GEP,
+ // we already know what the result of any load from that GEP is.
+ // TODO: Handle splats.
+ if (Init && isa<ConstantAggregateZero>(Init) && GEP->isInBounds())
+ SubInit = Constant::getNullValue(GEP->getType()->getElementType());
}
Changed |= CleanupConstantGlobalUsers(GEP, SubInit, TD, TLI);
diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp
index 3c7fac6..664ddf6 100644
--- a/lib/Transforms/IPO/InlineAlways.cpp
+++ b/lib/Transforms/IPO/InlineAlways.cpp
@@ -32,7 +32,6 @@ namespace {
// AlwaysInliner only inlines functions that are mark as "always inline".
class AlwaysInliner : public Inliner {
- InlineCostAnalyzer CA;
public:
// Use extremely low threshold.
AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/true) {
@@ -43,40 +42,11 @@ namespace {
initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
}
static char ID; // Pass identification, replacement for typeid
- InlineCost getInlineCost(CallSite CS) {
- Function *Callee = CS.getCalledFunction();
- // We assume indirect calls aren't calling an always-inline function.
- if (!Callee) return InlineCost::getNever();
-
- // We can't inline calls to external functions.
- // FIXME: We shouldn't even get here.
- if (Callee->isDeclaration()) return InlineCost::getNever();
-
- // Return never for anything not marked as always inline.
- if (!Callee->hasFnAttr(Attribute::AlwaysInline))
- return InlineCost::getNever();
-
- // We still have to check the inline cost in case there are reasons to
- // not inline which trump the always-inline attribute such as setjmp and
- // indirectbr.
- return CA.getInlineCost(CS);
- }
- float getInlineFudgeFactor(CallSite CS) {
- return CA.getInlineFudgeFactor(CS);
- }
- void resetCachedCostInfo(Function *Caller) {
- CA.resetCachedCostInfo(Caller);
- }
- void growCachedCostInfo(Function* Caller, Function* Callee) {
- CA.growCachedCostInfo(Caller, Callee);
- }
+ virtual InlineCost getInlineCost(CallSite CS);
virtual bool doFinalization(CallGraph &CG) {
return removeDeadFunctions(CG, /*AlwaysInlineOnly=*/true);
}
virtual bool doInitialization(CallGraph &CG);
- void releaseMemory() {
- CA.clear();
- }
};
}
@@ -93,9 +63,70 @@ Pass *llvm::createAlwaysInlinerPass(bool InsertLifetime) {
return new AlwaysInliner(InsertLifetime);
}
+/// \brief Minimal filter to detect invalid constructs for inlining.
+static bool isInlineViable(Function &F) {
+ bool ReturnsTwice = F.hasFnAttr(Attribute::ReturnsTwice);
+ for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
+ // Disallow inlining of functions which contain an indirect branch.
+ if (isa<IndirectBrInst>(BI->getTerminator()))
+ return false;
+
+ for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
+ ++II) {
+ CallSite CS(II);
+ if (!CS)
+ continue;
+
+ // Disallow recursive calls.
+ if (&F == CS.getCalledFunction())
+ return false;
+
+ // Disallow calls which expose returns-twice to a function not previously
+ // attributed as such.
+ if (!ReturnsTwice && CS.isCall() &&
+ cast<CallInst>(CS.getInstruction())->canReturnTwice())
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/// \brief Get the inline cost for the always-inliner.
+///
+/// The always inliner *only* handles functions which are marked with the
+/// attribute to force inlining. As such, it is dramatically simpler and avoids
+/// using the powerful (but expensive) inline cost analysis. Instead it uses
+/// a very simple and boring direct walk of the instructions looking for
+/// impossible-to-inline constructs.
+///
+/// Note, it would be possible to go to some lengths to cache the information
+/// computed here, but as we only expect to do this for relatively few and
+/// small functions which have the explicit attribute to force inlining, it is
+/// likely not worth it in practice.
+InlineCost AlwaysInliner::getInlineCost(CallSite CS) {
+ Function *Callee = CS.getCalledFunction();
+ // We assume indirect calls aren't calling an always-inline function.
+ if (!Callee) return InlineCost::getNever();
+
+ // We can't inline calls to external functions.
+ // FIXME: We shouldn't even get here.
+ if (Callee->isDeclaration()) return InlineCost::getNever();
+
+ // Return never for anything not marked as always inline.
+ if (!Callee->hasFnAttr(Attribute::AlwaysInline))
+ return InlineCost::getNever();
+
+ // Do some minimal analysis to preclude non-viable functions.
+ if (!isInlineViable(*Callee))
+ return InlineCost::getNever();
+
+ // Otherwise, force inlining.
+ return InlineCost::getAlways();
+}
+
// doInitialization - Initializes the vector of functions that have not
// been annotated with the "always inline" attribute.
bool AlwaysInliner::doInitialization(CallGraph &CG) {
- CA.setTargetData(getAnalysisIfAvailable<TargetData>());
return false;
}
diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp
index 03032e6..50038d8 100644
--- a/lib/Transforms/IPO/InlineSimple.cpp
+++ b/lib/Transforms/IPO/InlineSimple.cpp
@@ -40,21 +40,9 @@ namespace {
}
static char ID; // Pass identification, replacement for typeid
InlineCost getInlineCost(CallSite CS) {
- return CA.getInlineCost(CS);
- }
- float getInlineFudgeFactor(CallSite CS) {
- return CA.getInlineFudgeFactor(CS);
- }
- void resetCachedCostInfo(Function *Caller) {
- CA.resetCachedCostInfo(Caller);
- }
- void growCachedCostInfo(Function* Caller, Function* Callee) {
- CA.growCachedCostInfo(Caller, Callee);
+ return CA.getInlineCost(CS, getInlineThreshold(CS));
}
virtual bool doInitialization(CallGraph &CG);
- void releaseMemory() {
- CA.clear();
- }
};
}
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index 9975333..dc9cbfb 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -19,7 +19,6 @@
#include "llvm/IntrinsicInst.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
-#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Transforms/IPO/InlinerPass.h"
#include "llvm/Transforms/Utils/Cloning.h"
@@ -37,6 +36,11 @@ STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined");
STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
STATISTIC(NumMergedAllocas, "Number of allocas merged together");
+// This weirdly named statistic tracks the number of times that, when attemting
+// to inline a function A into B, we analyze the callers of B in order to see
+// if those would be more profitable and blocked inline steps.
+STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed");
+
static cl::opt<int>
InlineLimit("inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
cl::desc("Control the amount of inlining to perform (default = 225)"));
@@ -232,14 +236,10 @@ bool Inliner::shouldInline(CallSite CS) {
return false;
}
- int Cost = IC.getValue();
Function *Caller = CS.getCaller();
- int CurrentThreshold = getInlineThreshold(CS);
- float FudgeFactor = getInlineFudgeFactor(CS);
- int AdjThreshold = (int)(CurrentThreshold * FudgeFactor);
- if (Cost >= AdjThreshold) {
- DEBUG(dbgs() << " NOT Inlining: cost=" << Cost
- << ", thres=" << AdjThreshold
+ if (!IC) {
+ DEBUG(dbgs() << " NOT Inlining: cost=" << IC.getCost()
+ << ", thres=" << (IC.getCostDelta() + IC.getCost())
<< ", Call: " << *CS.getInstruction() << "\n");
return false;
}
@@ -256,12 +256,17 @@ bool Inliner::shouldInline(CallSite CS) {
// are used. Thus we will always have the opportunity to make local inlining
// decisions. Importantly the linkonce-ODR linkage covers inline functions
// and templates in C++.
+ //
+ // FIXME: All of this logic should be sunk into getInlineCost. It relies on
+ // the internal implementation of the inline cost metrics rather than
+ // treating them as truly abstract units etc.
if (Caller->hasLocalLinkage() ||
Caller->getLinkage() == GlobalValue::LinkOnceODRLinkage) {
int TotalSecondaryCost = 0;
- bool outerCallsFound = false;
+ // The candidate cost to be imposed upon the current function.
+ int CandidateCost = IC.getCost() - (InlineConstants::CallPenalty + 1);
// This bool tracks what happens if we do NOT inline C into B.
- bool callerWillBeRemoved = true;
+ bool callerWillBeRemoved = Caller->hasLocalLinkage();
// This bool tracks what happens if we DO inline C into B.
bool inliningPreventsSomeOuterInline = false;
for (Value::use_iterator I = Caller->use_begin(), E =Caller->use_end();
@@ -277,26 +282,20 @@ bool Inliner::shouldInline(CallSite CS) {
}
InlineCost IC2 = getInlineCost(CS2);
- if (IC2.isNever())
+ ++NumCallerCallersAnalyzed;
+ if (!IC2) {
callerWillBeRemoved = false;
- if (IC2.isAlways() || IC2.isNever())
+ continue;
+ }
+ if (IC2.isAlways())
continue;
- outerCallsFound = true;
- int Cost2 = IC2.getValue();
- int CurrentThreshold2 = getInlineThreshold(CS2);
- float FudgeFactor2 = getInlineFudgeFactor(CS2);
-
- if (Cost2 >= (int)(CurrentThreshold2 * FudgeFactor2))
- callerWillBeRemoved = false;
-
- // See if we have this case. We subtract off the penalty
- // for the call instruction, which we would be deleting.
- if (Cost2 < (int)(CurrentThreshold2 * FudgeFactor2) &&
- Cost2 + Cost - (InlineConstants::CallPenalty + 1) >=
- (int)(CurrentThreshold2 * FudgeFactor2)) {
+ // See if inlining or original callsite would erase the cost delta of
+ // this callsite. We subtract off the penalty for the call instruction,
+ // which we would be deleting.
+ if (IC2.getCostDelta() <= CandidateCost) {
inliningPreventsSomeOuterInline = true;
- TotalSecondaryCost += Cost2;
+ TotalSecondaryCost += IC2.getCost();
}
}
// If all outer calls to Caller would get inlined, the cost for the last
@@ -306,17 +305,16 @@ bool Inliner::shouldInline(CallSite CS) {
if (callerWillBeRemoved && Caller->use_begin() != Caller->use_end())
TotalSecondaryCost += InlineConstants::LastCallToStaticBonus;
- if (outerCallsFound && inliningPreventsSomeOuterInline &&
- TotalSecondaryCost < Cost) {
- DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() <<
- " Cost = " << Cost <<
+ if (inliningPreventsSomeOuterInline && TotalSecondaryCost < IC.getCost()) {
+ DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() <<
+ " Cost = " << IC.getCost() <<
", outer Cost = " << TotalSecondaryCost << '\n');
return false;
}
}
- DEBUG(dbgs() << " Inlining: cost=" << Cost
- << ", thres=" << AdjThreshold
+ DEBUG(dbgs() << " Inlining: cost=" << IC.getCost()
+ << ", thres=" << (IC.getCostDelta() + IC.getCost())
<< ", Call: " << *CS.getInstruction() << '\n');
return true;
}
@@ -335,38 +333,6 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID,
return false;
}
-/// \brief Simplify arguments going into a particular callsite.
-///
-/// This is important to do each time we add a callsite due to inlining so that
-/// constants and other entities which feed into inline cost estimation are
-/// properly recognized when analyzing the new callsite. Consider:
-/// void outer(int x) {
-/// if (x < 42)
-/// return inner(42 - x);
-/// ...
-/// }
-/// void inner(int x) {
-/// ...
-/// }
-///
-/// The inliner gives calls to 'outer' with a constant argument a bonus because
-/// it will delete one side of a branch. But the resulting call to 'inner'
-/// will, after inlining, also have a constant operand. We need to do just
-/// enough constant folding to expose this for callsite arguments. The rest
-/// will be taken care of after the inliner finishes running.
-static void simplifyCallSiteArguments(const TargetData *TD, CallSite CS) {
- // FIXME: It would be nice to avoid this smallvector if RAUW doesn't
- // invalidate operand iterators in any cases.
- SmallVector<std::pair<Value *, Value*>, 4> SimplifiedArgs;
- for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
- I != E; ++I)
- if (Instruction *Inst = dyn_cast<Instruction>(*I))
- if (Value *SimpleArg = SimplifyInstruction(Inst, TD))
- SimplifiedArgs.push_back(std::make_pair(Inst, SimpleArg));
- for (unsigned Idx = 0, Size = SimplifiedArgs.size(); Idx != Size; ++Idx)
- SimplifiedArgs[Idx].first->replaceAllUsesWith(SimplifiedArgs[Idx].second);
-}
-
bool Inliner::runOnSCC(CallGraphSCC &SCC) {
CallGraph &CG = getAnalysis<CallGraph>();
const TargetData *TD = getAnalysisIfAvailable<TargetData>();
@@ -455,8 +421,6 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
CG[Caller]->removeCallEdgeFor(CS);
CS.getInstruction()->eraseFromParent();
++NumCallsDeleted;
- // Update the cached cost info with the missing call
- growCachedCostInfo(Caller, NULL);
} else {
// We can only inline direct calls to non-declarations.
if (Callee == 0 || Callee->isDeclaration()) continue;
@@ -494,14 +458,9 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
for (unsigned i = 0, e = InlineInfo.InlinedCalls.size();
i != e; ++i) {
Value *Ptr = InlineInfo.InlinedCalls[i];
- CallSite NewCS = Ptr;
- simplifyCallSiteArguments(TD, NewCS);
- CallSites.push_back(std::make_pair(NewCS, NewHistoryID));
+ CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID));
}
}
-
- // Update the cached cost info with the inlined call.
- growCachedCostInfo(Caller, Callee);
}
// If we inlined or deleted the last possible call site to the function,
@@ -521,8 +480,6 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
// Remove any call graph edges from the callee to its callees.
CalleeNode->removeAllCalledFunctions();
- resetCachedCostInfo(Callee);
-
// Removing the node for callee from the call graph and delete it.
delete CG.removeFunctionFromModule(CalleeNode);
++NumDeleted;
@@ -601,14 +558,13 @@ bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) {
// Note that it doesn't matter that we are iterating over a non-stable order
// here to do this, it doesn't matter which order the functions are deleted
// in.
- std::sort(FunctionsToRemove.begin(), FunctionsToRemove.end());
+ array_pod_sort(FunctionsToRemove.begin(), FunctionsToRemove.end());
FunctionsToRemove.erase(std::unique(FunctionsToRemove.begin(),
FunctionsToRemove.end()),
FunctionsToRemove.end());
for (SmallVectorImpl<CallGraphNode *>::iterator I = FunctionsToRemove.begin(),
E = FunctionsToRemove.end();
I != E; ++I) {
- resetCachedCostInfo((*I)->getFunction());
delete CG.removeFunctionFromModule(*I);
++NumDeleted;
}
diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp
index 7cb1d18..fb5869e 100644
--- a/lib/Transforms/IPO/Internalize.cpp
+++ b/lib/Transforms/IPO/Internalize.cpp
@@ -122,6 +122,11 @@ bool InternalizePass::runOnModule(Module &M) {
bool Changed = false;
+ // Never internalize functions which code-gen might insert.
+ // FIXME: We should probably add this (and the __stack_chk_guard) via some
+ // type of call-back in CodeGen.
+ ExternalNames.insert("__stack_chk_fail");
+
// Mark all functions not in the api as internal.
// FIXME: maybe use private linkage?
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
@@ -148,9 +153,11 @@ bool InternalizePass::runOnModule(Module &M) {
// won't find them. (see MachineModuleInfo.)
ExternalNames.insert("llvm.global_ctors");
ExternalNames.insert("llvm.global_dtors");
- ExternalNames.insert("llvm.noinline");
ExternalNames.insert("llvm.global.annotations");
+ // Never internalize symbols code-gen inserts.
+ ExternalNames.insert("__stack_chk_guard");
+
// Mark all global variables with initializers that are not in the api as
// internal as well.
// FIXME: maybe use private linkage?
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 8408437..43b4ab5 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -35,6 +35,11 @@ using namespace llvm;
static cl::opt<bool>
RunVectorization("vectorize", cl::desc("Run vectorization passes"));
+static cl::opt<bool>
+UseGVNAfterVectorization("use-gvn-after-vectorization",
+ cl::init(false), cl::Hidden,
+ cl::desc("Run GVN instead of Early CSE after vectorization passes"));
+
PassManagerBuilder::PassManagerBuilder() {
OptLevel = 2;
SizeLevel = 0;
@@ -182,8 +187,10 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
if (Vectorize) {
MPM.add(createBBVectorizePass());
MPM.add(createInstructionCombiningPass());
- if (OptLevel > 1)
- MPM.add(createGVNPass()); // Remove redundancies
+ if (OptLevel > 1 && UseGVNAfterVectorization)
+ MPM.add(createGVNPass()); // Remove redundancies
+ else
+ MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
}
MPM.add(createAggressiveDCEPass()); // Delete dead instructions
@@ -202,11 +209,13 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
if (OptLevel > 1)
MPM.add(createConstantMergePass()); // Merge dup global constants
}
+ addExtensionsToPM(EP_OptimizerLast, MPM);
}
void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
bool Internalize,
- bool RunInliner) {
+ bool RunInliner,
+ bool DisableGVNLoadPRE) {
// Provide AliasAnalysis services for optimizations.
addInitialAliasAnalysisPasses(PM);
@@ -262,9 +271,9 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
PM.add(createFunctionAttrsPass()); // Add nocapture.
PM.add(createGlobalsModRefPass()); // IP alias analysis.
- PM.add(createLICMPass()); // Hoist loop invariants.
- PM.add(createGVNPass()); // Remove redundancies.
- PM.add(createMemCpyOptPass()); // Remove dead memcpys.
+ PM.add(createLICMPass()); // Hoist loop invariants.
+ PM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies.
+ PM.add(createMemCpyOptPass()); // Remove dead memcpys.
// Nuke dead stores.
PM.add(createDeadStoreEliminationPass());