aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/llvm/Analysis/InlineCost.h12
-rw-r--r--lib/Analysis/InlineCost.cpp70
-rw-r--r--lib/Transforms/IPO/PartialSpecialization.cpp90
-rw-r--r--test/Transforms/PartialSpecialize/heuristics.ll49
4 files changed, 181 insertions, 40 deletions
diff --git a/include/llvm/Analysis/InlineCost.h b/include/llvm/Analysis/InlineCost.h
index 9963ba4..ccec4c5 100644
--- a/include/llvm/Analysis/InlineCost.h
+++ b/include/llvm/Analysis/InlineCost.h
@@ -143,6 +143,18 @@ namespace llvm {
Function *Callee,
SmallPtrSet<const Function *, 16> &NeverInline);
+ /// getSpecializationBonus - The heuristic used to determine the per-call
+ /// performance boost for using a specialization of Callee with argument
+ /// SpecializedArgNos replaced by a constant.
+ int getSpecializationBonus(Function *Callee,
+ SmallVectorImpl<unsigned> &SpecializedArgNo);
+
+ /// getSpecializationCost - The heuristic used to determine the code-size
+ /// impact of creating a specialized version of Callee with argument
+ /// SpecializedArgNo replaced by a constant.
+ InlineCost getSpecializationCost(Function *Callee,
+ SmallVectorImpl<unsigned> &SpecializedArgNo);
+
/// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
/// higher threshold to determine if the function call should be inlined.
float getInlineFudgeFactor(CallSite CS);
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index a0e2ec7..b103897 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -312,6 +312,42 @@ bool InlineCostAnalyzer::FunctionInfo::NeverInline()
Metrics.containsIndirectBr);
}
+// getSpecializationBonus - The heuristic used to determine the per-call
+// performance boost for using a specialization of Callee with argument
+// specializedArgNo replaced by a constant.
+int InlineCostAnalyzer::getSpecializationBonus(Function *Callee,
+ SmallVectorImpl<unsigned> &SpecializedArgNos)
+{
+ if (Callee->mayBeOverridden())
+ return 0;
+
+ int Bonus = 0;
+ // If this function uses the coldcc calling convention, prefer not to
+ // specialize it.
+ if (Callee->getCallingConv() == CallingConv::Cold)
+ Bonus -= InlineConstants::ColdccPenalty;
+
+ // Get information about the callee.
+ FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
+
+ // If we haven't calculated this information yet, do so now.
+ if (CalleeFI->Metrics.NumBlocks == 0)
+ CalleeFI->analyzeFunction(Callee);
+
+
+ for (unsigned i = 0, s = SpecializedArgNos.size();
+ i < s; ++i )
+ {
+ Bonus += CalleeFI->ArgumentWeights[SpecializedArgNos[i]].ConstantBonus;
+ }
+ // Calls usually take a long time, so they make the specialization gain
+ // smaller.
+ Bonus -= CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty;
+
+ return Bonus;
+}
+
+
// getInlineCost - The heuristic used to determine if we should inline the
// function call or not.
//
@@ -442,6 +478,40 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
return llvm::InlineCost::get(InlineCost);
}
+// getSpecializationCost - The heuristic used to determine the code-size
+// impact of creating a specialized version of Callee with argument
+// SpecializedArgNo replaced by a constant.
+InlineCost InlineCostAnalyzer::getSpecializationCost(Function *Callee,
+ SmallVectorImpl<unsigned> &SpecializedArgNos)
+{
+ // Don't specialize functions which can be redefined at link-time to mean
+ // something else.
+ if (Callee->mayBeOverridden())
+ return llvm::InlineCost::getNever();
+
+ // Get information about the callee.
+ FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
+
+ // If we haven't calculated this information yet, do so now.
+ if (CalleeFI->Metrics.NumBlocks == 0)
+ CalleeFI->analyzeFunction(Callee);
+
+ int Cost = 0;
+
+ // Look at the orginal size of the callee. Each instruction counts as 5.
+ Cost += CalleeFI->Metrics.NumInsts * InlineConstants::InstrCost;
+
+ // Offset that with the amount of code that can be constant-folded
+ // away with the given arguments replaced by constants.
+ for (SmallVectorImpl<unsigned>::iterator an = SpecializedArgNos.begin(),
+ ae = SpecializedArgNos.end(); an != ae; ++an)
+ {
+ Cost -= CalleeFI->ArgumentWeights[*an].ConstantWeight;
+ }
+
+ return llvm::InlineCost::get(Cost);
+}
+
// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
// higher threshold to determine if the function call should be inlined.
float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) {
diff --git a/lib/Transforms/IPO/PartialSpecialization.cpp b/lib/Transforms/IPO/PartialSpecialization.cpp
index 756c9b3..037189e 100644
--- a/lib/Transforms/IPO/PartialSpecialization.cpp
+++ b/lib/Transforms/IPO/PartialSpecialization.cpp
@@ -25,6 +25,7 @@
#include "llvm/Module.h"
#include "llvm/Pass.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/InlineCost.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Support/CallSite.h"
#include "llvm/ADT/DenseSet.h"
@@ -37,17 +38,12 @@ STATISTIC(numReplaced, "Number of callers replaced by specialization");
// Maximum number of arguments markable interested
static const int MaxInterests = 6;
-// Call must be used at least occasionally
-static const int CallsMin = 5;
-
-// Must have 10% of calls having the same constant to specialize on
-static const double ConstValPercent = .1;
-
namespace {
typedef SmallVector<int, MaxInterests> InterestingArgVector;
class PartSpec : public ModulePass {
void scanForInterest(Function&, InterestingArgVector&);
int scanDistribution(Function&, int, std::map<Constant*, int>&);
+ InlineCostAnalyzer CA;
public :
static char ID; // Pass identification, replacement for typeid
PartSpec() : ModulePass(ID) {}
@@ -79,6 +75,10 @@ SpecializeFunction(Function* F,
NF->setLinkage(GlobalValue::InternalLinkage);
F->getParent()->getFunctionList().push_back(NF);
+ // FIXME: Specialized versions getting the same constants should also get
+ // the same name. That way, specializations for public functions can be
+ // marked linkonce_odr and reused across modules.
+
for (Value::use_iterator ii = F->use_begin(), ee = F->use_end();
ii != ee; ) {
Value::use_iterator i = ii;
@@ -144,22 +144,37 @@ bool PartSpec::runOnModule(Module &M) {
bool breakOuter = false;
for (unsigned int x = 0; !breakOuter && x < interestingArgs.size(); ++x) {
std::map<Constant*, int> distribution;
- int total = scanDistribution(F, interestingArgs[x], distribution);
- if (total > CallsMin)
- for (std::map<Constant*, int>::iterator ii = distribution.begin(),
- ee = distribution.end(); ii != ee; ++ii)
- if (total > ii->second && ii->first &&
- ii->second > total * ConstValPercent) {
- ValueMap<const Value*, Value*> m;
- Function::arg_iterator arg = F.arg_begin();
- for (int y = 0; y < interestingArgs[x]; ++y)
- ++arg;
- m[&*arg] = ii->first;
- SpecializeFunction(&F, m);
- ++numSpecialized;
- breakOuter = true;
- Changed = true;
- }
+ scanDistribution(F, interestingArgs[x], distribution);
+ for (std::map<Constant*, int>::iterator ii = distribution.begin(),
+ ee = distribution.end(); ii != ee; ++ii) {
+ // The distribution map might have an entry for NULL (i.e., one or more
+ // callsites were passing a non-constant there). We allow that to
+ // happen so that we can see whether any callsites pass a non-constant;
+ // if none do and the function is internal, we might have an opportunity
+ // to kill the original function.
+ if (!ii->first) continue;
+ int bonus = ii->second;
+ SmallVector<unsigned, 1> argnos;
+ argnos.push_back(interestingArgs[x]);
+ InlineCost cost = CA.getSpecializationCost(&F, argnos);
+ // FIXME: If this is the last constant entry, and no non-constant
+ // entries exist, and the target function is internal, the cost should
+ // be reduced by the original size of the target function, almost
+ // certainly making it negative and causing a specialization that will
+ // leave the original function dead and removable.
+ if (cost.isAlways() ||
+ (cost.isVariable() && cost.getValue() < bonus)) {
+ ValueMap<const Value*, Value*> m;
+ Function::arg_iterator arg = F.arg_begin();
+ for (int y = 0; y < interestingArgs[x]; ++y)
+ ++arg;
+ m[&*arg] = ii->first;
+ SpecializeFunction(&F, m);
+ ++numSpecialized;
+ breakOuter = true;
+ Changed = true;
+ }
+ }
}
}
return Changed;
@@ -170,28 +185,20 @@ bool PartSpec::runOnModule(Module &M) {
void PartSpec::scanForInterest(Function& F, InterestingArgVector& args) {
for(Function::arg_iterator ii = F.arg_begin(), ee = F.arg_end();
ii != ee; ++ii) {
- for(Value::use_iterator ui = ii->use_begin(), ue = ii->use_end();
- ui != ue; ++ui) {
-
- bool interesting = false;
- User *U = *ui;
- if (isa<CmpInst>(U)) interesting = true;
- else if (isa<CallInst>(U))
- interesting = ui->getOperand(0) == ii;
- else if (isa<InvokeInst>(U))
- interesting = ui->getOperand(0) == ii;
- else if (isa<SwitchInst>(U)) interesting = true;
- else if (isa<BranchInst>(U)) interesting = true;
-
- if (interesting) {
- args.push_back(std::distance(F.arg_begin(), ii));
- break;
- }
+ int argno = std::distance(F.arg_begin(), ii);
+ SmallVector<unsigned, 1> argnos;
+ argnos.push_back(argno);
+ int bonus = CA.getSpecializationBonus(&F, argnos);
+ if (bonus > 0) {
+ args.push_back(argno);
}
}
}
/// scanDistribution - Construct a histogram of constants for arg of F at arg.
+/// For each distinct constant, we'll compute the total of the specialization
+/// bonus across all callsites passing that constant; if that total exceeds
+/// the specialization cost, we will create the specialization.
int PartSpec::scanDistribution(Function& F, int arg,
std::map<Constant*, int>& dist) {
bool hasIndirect = false;
@@ -201,7 +208,10 @@ int PartSpec::scanDistribution(Function& F, int arg,
User *U = *ii;
CallSite CS(U);
if (CS && CS.getCalledFunction() == &F) {
- ++dist[dyn_cast<Constant>(CS.getArgument(arg))];
+ SmallVector<unsigned, 1> argnos;
+ argnos.push_back(arg);
+ dist[dyn_cast<Constant>(CS.getArgument(arg))] +=
+ CA.getSpecializationBonus(&F, argnos);
++total;
} else
hasIndirect = true;
diff --git a/test/Transforms/PartialSpecialize/heuristics.ll b/test/Transforms/PartialSpecialize/heuristics.ll
new file mode 100644
index 0000000..5ccf9ad
--- /dev/null
+++ b/test/Transforms/PartialSpecialize/heuristics.ll
@@ -0,0 +1,49 @@
+; If there are not enough callsites for a particular specialization to
+; justify its existence, the specialization shouldn't be created.
+;
+; RUN: opt -S -partialspecialization -disable-inlining %s | FileCheck %s
+declare void @callback1()
+declare void @callback2()
+
+declare void @othercall()
+
+define internal void @UseCallback(void()* %pCallback) {
+ call void %pCallback()
+ call void @othercall()
+ call void @othercall()
+ call void @othercall()
+ call void @othercall()
+ call void @othercall()
+ call void @othercall()
+ call void @othercall()
+ call void @othercall()
+ call void @othercall()
+ call void @othercall()
+ call void @othercall()
+ call void @othercall()
+ call void @othercall()
+ call void @othercall()
+ call void @othercall()
+ call void @othercall()
+ call void @othercall()
+ call void @othercall()
+ ret void
+}
+
+define void @foo(void()* %pNonConstCallback)
+{
+Entry:
+; CHECK: Entry
+; CHECK-NOT: call void @UseCallback(void ()* @callback1)
+; CHECK: call void @UseCallback(void ()* @callback2)
+; CHECK-NEXT: call void @UseCallback(void ()* @callback2)
+; CHECK-NEXT: ret void
+ call void @UseCallback(void()* @callback1)
+ call void @UseCallback(void()* @callback1)
+ call void @UseCallback(void()* @callback1)
+ call void @UseCallback(void()* @callback1)
+ call void @UseCallback(void()* @callback2)
+ call void @UseCallback(void()* @callback2)
+
+ ret void
+}