diff options
author | Evan Cheng <evan.cheng@apple.com> | 2008-03-24 06:37:48 +0000 |
---|---|---|
committer | Evan Cheng <evan.cheng@apple.com> | 2008-03-24 06:37:48 +0000 |
commit | 9880e57fedff6ca8174c2cdc5f130c1669a5c09b (patch) | |
tree | 4ad44203af29e98cb7c89ddd60b731e582e9fd4a | |
parent | f4b941dedab7ce680107ef6e0596f6afa04bdfd4 (diff) | |
download | external_llvm-9880e57fedff6ca8174c2cdc5f130c1669a5c09b.zip external_llvm-9880e57fedff6ca8174c2cdc5f130c1669a5c09b.tar.gz external_llvm-9880e57fedff6ca8174c2cdc5f130c1669a5c09b.tar.bz2 |
Increasing the inline limit from (overly conservative) 200 to 300. Given each BB costs 20 and each instruction costs 5, 200 means a 4 BB function + 24 instructions (actually less because caller's size also contributes to it).
Furthermore, double the limit when more than 10% of the callee instructions are vector instructions. Multimedia kernels tend to love inlining.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@48725 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | include/llvm/Transforms/IPO/InlinerPass.h | 5 | ||||
-rw-r--r-- | include/llvm/Transforms/Utils/InlineCost.h | 13 | ||||
-rw-r--r-- | lib/Transforms/IPO/InlineSimple.cpp | 3 | ||||
-rw-r--r-- | lib/Transforms/IPO/Inliner.cpp | 10 | ||||
-rw-r--r-- | lib/Transforms/Utils/InlineCost.cpp | 34 |
5 files changed, 54 insertions, 11 deletions
diff --git a/include/llvm/Transforms/IPO/InlinerPass.h b/include/llvm/Transforms/IPO/InlinerPass.h index 26cf4e4..d34641d 100644 --- a/include/llvm/Transforms/IPO/InlinerPass.h +++ b/include/llvm/Transforms/IPO/InlinerPass.h @@ -55,6 +55,11 @@ struct Inliner : public CallGraphSCCPass { /// virtual int getInlineCost(CallSite CS) = 0; + // getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a + // higher threshold to determine if the function call should be inlined. + /// + virtual float getInlineFudgeFactor(CallSite CS) = 0; + private: // InlineThreshold - Cache the value here for easy access. unsigned InlineThreshold; diff --git a/include/llvm/Transforms/Utils/InlineCost.h b/include/llvm/Transforms/Utils/InlineCost.h index 373dc7b..6018a80 100644 --- a/include/llvm/Transforms/Utils/InlineCost.h +++ b/include/llvm/Transforms/Utils/InlineCost.h @@ -41,6 +41,10 @@ namespace llvm { // NumInsts, NumBlocks - Keep track of how large each function is, which is // used to estimate the code size cost of inlining it. unsigned NumInsts, NumBlocks; + + // NumVectorInsts - Keep track how many instrctions produce vector values. + // The inliner is being more aggressive with inlining vector kernels. + unsigned NumVectorInsts; // ArgumentWeights - Each formal argument of the function is inspected to // see if it is used in any contexts where making it a constant or alloca @@ -48,7 +52,7 @@ namespace llvm { // entry here. std::vector<ArgInfo> ArgumentWeights; - FunctionInfo() : NumInsts(0), NumBlocks(0) {} + FunctionInfo() : NumInsts(0), NumBlocks(0), NumVectorInsts(0) {} /// analyzeFunction - Fill in the current structure with information gleaned /// from the specified function. @@ -73,7 +77,12 @@ namespace llvm { // getInlineCost - The heuristic used to determine if we should inline the // function call or not. // - int getInlineCost(CallSite CS, SmallPtrSet<const Function *, 16> &NeverInline); + int getInlineCost(CallSite CS, + SmallPtrSet<const Function *, 16> &NeverInline); + + // getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a + // higher threshold to determine if the function call should be inlined. + float getInlineFudgeFactor(CallSite CS); }; } diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp index fe0ea5a..a007103 100644 --- a/lib/Transforms/IPO/InlineSimple.cpp +++ b/lib/Transforms/IPO/InlineSimple.cpp @@ -40,6 +40,9 @@ namespace { int getInlineCost(CallSite CS) { return CA.getInlineCost(CS, NeverInline); } + float getInlineFudgeFactor(CallSite CS) { + return CA.getInlineFudgeFactor(CS); + } virtual bool doInitialization(CallGraph &CG); }; char SimpleInliner::ID = 0; diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index acd3604..f33f368 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -31,9 +31,9 @@ STATISTIC(NumInlined, "Number of functions inlined"); STATISTIC(NumDeleted, "Number of functions deleted because all callers found"); namespace { - cl::opt<int> // FIXME: 200 is VERY conservative - InlineLimit("inline-threshold", cl::Hidden, cl::init(200), - cl::desc("Control the amount of inlining to perform (default = 200)")); + cl::opt<int> + InlineLimit("inline-threshold", cl::Hidden, cl::init(400), + cl::desc("Control the amount of inlining to perform (default = 400)")); } Inliner::Inliner(const void *ID) @@ -140,7 +140,9 @@ bool Inliner::runOnSCC(const std::vector<CallGraphNode*> &SCC) { // try to do so. CallSite CS = CallSites[CSi]; int InlineCost = getInlineCost(CS); - if (InlineCost >= (int)InlineThreshold) { + float FudgeFactor = getInlineFudgeFactor(CS); + + if (InlineCost >= (int)(InlineThreshold * FudgeFactor)) { DOUT << " NOT Inlining: cost=" << InlineCost << ", Call: " << *CS.getInstruction(); } else { diff --git a/lib/Transforms/Utils/InlineCost.cpp b/lib/Transforms/Utils/InlineCost.cpp index 1e1d1e4..4349d0e 100644 --- a/lib/Transforms/Utils/InlineCost.cpp +++ b/lib/Transforms/Utils/InlineCost.cpp @@ -93,7 +93,7 @@ unsigned InlineCostAnalyzer::FunctionInfo:: /// analyzeFunction - Fill in the current structure with information gleaned /// from the specified function. void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) { - unsigned NumInsts = 0, NumBlocks = 0; + unsigned NumInsts = 0, NumBlocks = 0, NumVectorInsts = 0; // Look at the size of the callee. Each basic block counts as 20 units, and // each instruction counts as 5. @@ -101,6 +101,11 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) { for (BasicBlock::const_iterator II = BB->begin(), E = BB->end(); II != E; ++II) { if (isa<DbgInfoIntrinsic>(II)) continue; // Debug intrinsics don't count. + if (isa<PHINode>(II)) continue; // PHI nodes don't count. + + if (isa<InsertElementInst>(II) || isa<ExtractElementInst>(II) || + isa<ShuffleVectorInst>(II) || isa<VectorType>(II->getType())) + ++NumVectorInsts; // Noop casts, including ptr <-> int, don't count. if (const CastInst *CI = dyn_cast<CastInst>(II)) { @@ -108,7 +113,7 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) { isa<PtrToIntInst>(CI)) continue; } else if (const GetElementPtrInst *GEPI = - dyn_cast<GetElementPtrInst>(II)) { + dyn_cast<GetElementPtrInst>(II)) { // If a GEP has all constant indices, it will probably be folded with // a load/store. bool AllConstant = true; @@ -126,8 +131,9 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) { ++NumBlocks; } - this->NumBlocks = NumBlocks; - this->NumInsts = NumInsts; + this->NumBlocks = NumBlocks; + this->NumInsts = NumInsts; + this->NumVectorInsts = NumVectorInsts; // Check out all of the arguments to the function, figuring out how much // code can be eliminated if one of the arguments is a constant. @@ -233,10 +239,28 @@ int InlineCostAnalyzer::getInlineCost(CallSite CS, // InlineCost += Caller->size()/20; - // Look at the size of the callee. Each basic block counts as 20 units, and // each instruction counts as 5. InlineCost += CalleeFI.NumInsts*5 + CalleeFI.NumBlocks*20; + return InlineCost; } +// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a +// higher threshold to determine if the function call should be inlined. +float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) { + Function *Callee = CS.getCalledFunction(); + + // Get information about the callee... + FunctionInfo &CalleeFI = CachedFunctionInfo[Callee]; + + // If we haven't calculated this information yet, do so now. + if (CalleeFI.NumBlocks == 0) + CalleeFI.analyzeFunction(Callee); + + // Be more aggressive if the function contains a good chunk (if it mades up + // at least 10% of the instructions) of vector instructions. + if (CalleeFI.NumVectorInsts > CalleeFI.NumInsts/10) + return 1.5f; + return 1.0f; +} |