diff options
author | Evan Cheng <evan.cheng@apple.com> | 2008-03-24 06:37:48 +0000 |
---|---|---|
committer | Evan Cheng <evan.cheng@apple.com> | 2008-03-24 06:37:48 +0000 |
commit | 9880e57fedff6ca8174c2cdc5f130c1669a5c09b (patch) | |
tree | 4ad44203af29e98cb7c89ddd60b731e582e9fd4a /lib/Transforms/Utils | |
parent | f4b941dedab7ce680107ef6e0596f6afa04bdfd4 (diff) | |
download | external_llvm-9880e57fedff6ca8174c2cdc5f130c1669a5c09b.zip external_llvm-9880e57fedff6ca8174c2cdc5f130c1669a5c09b.tar.gz external_llvm-9880e57fedff6ca8174c2cdc5f130c1669a5c09b.tar.bz2 |
Increasing the inline limit from (overly conservative) 200 to 300. Given each BB costs 20 and each instruction costs 5, 200 means a 4 BB function + 24 instructions (actually less because caller's size also contributes to it).
Furthermore, double the limit when more than 10% of the callee instructions are vector instructions. Multimedia kernels tend to love inlining.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@48725 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Transforms/Utils')
-rw-r--r-- | lib/Transforms/Utils/InlineCost.cpp | 34 |
1 files changed, 29 insertions, 5 deletions
diff --git a/lib/Transforms/Utils/InlineCost.cpp b/lib/Transforms/Utils/InlineCost.cpp index 1e1d1e4..4349d0e 100644 --- a/lib/Transforms/Utils/InlineCost.cpp +++ b/lib/Transforms/Utils/InlineCost.cpp @@ -93,7 +93,7 @@ unsigned InlineCostAnalyzer::FunctionInfo:: /// analyzeFunction - Fill in the current structure with information gleaned /// from the specified function. void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) { - unsigned NumInsts = 0, NumBlocks = 0; + unsigned NumInsts = 0, NumBlocks = 0, NumVectorInsts = 0; // Look at the size of the callee. Each basic block counts as 20 units, and // each instruction counts as 5. @@ -101,6 +101,11 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) { for (BasicBlock::const_iterator II = BB->begin(), E = BB->end(); II != E; ++II) { if (isa<DbgInfoIntrinsic>(II)) continue; // Debug intrinsics don't count. + if (isa<PHINode>(II)) continue; // PHI nodes don't count. + + if (isa<InsertElementInst>(II) || isa<ExtractElementInst>(II) || + isa<ShuffleVectorInst>(II) || isa<VectorType>(II->getType())) + ++NumVectorInsts; // Noop casts, including ptr <-> int, don't count. if (const CastInst *CI = dyn_cast<CastInst>(II)) { @@ -108,7 +113,7 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) { isa<PtrToIntInst>(CI)) continue; } else if (const GetElementPtrInst *GEPI = - dyn_cast<GetElementPtrInst>(II)) { + dyn_cast<GetElementPtrInst>(II)) { // If a GEP has all constant indices, it will probably be folded with // a load/store. bool AllConstant = true; @@ -126,8 +131,9 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) { ++NumBlocks; } - this->NumBlocks = NumBlocks; - this->NumInsts = NumInsts; + this->NumBlocks = NumBlocks; + this->NumInsts = NumInsts; + this->NumVectorInsts = NumVectorInsts; // Check out all of the arguments to the function, figuring out how much // code can be eliminated if one of the arguments is a constant. @@ -233,10 +239,28 @@ int InlineCostAnalyzer::getInlineCost(CallSite CS, // InlineCost += Caller->size()/20; - // Look at the size of the callee. Each basic block counts as 20 units, and // each instruction counts as 5. InlineCost += CalleeFI.NumInsts*5 + CalleeFI.NumBlocks*20; + return InlineCost; } +// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a +// higher threshold to determine if the function call should be inlined. +float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) { + Function *Callee = CS.getCalledFunction(); + + // Get information about the callee... + FunctionInfo &CalleeFI = CachedFunctionInfo[Callee]; + + // If we haven't calculated this information yet, do so now. + if (CalleeFI.NumBlocks == 0) + CalleeFI.analyzeFunction(Callee); + + // Be more aggressive if the function contains a good chunk (if it mades up + // at least 10% of the instructions) of vector instructions. + if (CalleeFI.NumVectorInsts > CalleeFI.NumInsts/10) + return 1.5f; + return 1.0f; +} |