aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/Analysis/IPA/InlineCost.cpp113
-rw-r--r--test/Transforms/Inline/invoke-cost.ll45
2 files changed, 121 insertions, 37 deletions
diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp
index 013691b..5e6d72d 100644
--- a/lib/Analysis/IPA/InlineCost.cpp
+++ b/lib/Analysis/IPA/InlineCost.cpp
@@ -59,6 +59,8 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
bool ExposesReturnsTwice;
bool HasDynamicAlloca;
bool ContainsNoDuplicateCall;
+ bool HasReturn;
+ bool HasIndirectBr;
/// Number of bytes allocated statically by the callee.
uint64_t AllocatedSize;
@@ -132,6 +134,12 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
bool visitExtractValue(ExtractValueInst &I);
bool visitInsertValue(InsertValueInst &I);
bool visitCallSite(CallSite CS);
+ bool visitReturnInst(ReturnInst &RI);
+ bool visitBranchInst(BranchInst &BI);
+ bool visitSwitchInst(SwitchInst &SI);
+ bool visitIndirectBrInst(IndirectBrInst &IBI);
+ bool visitResumeInst(ResumeInst &RI);
+ bool visitUnreachableInst(UnreachableInst &I);
public:
CallAnalyzer(const DataLayout *TD, const TargetTransformInfo &TTI,
@@ -139,12 +147,13 @@ public:
: TD(TD), TTI(TTI), F(Callee), Threshold(Threshold), Cost(0),
IsCallerRecursive(false), IsRecursiveCall(false),
ExposesReturnsTwice(false), HasDynamicAlloca(false),
- ContainsNoDuplicateCall(false), AllocatedSize(0), NumInstructions(0),
- NumVectorInstructions(0), FiftyPercentVectorBonus(0),
- TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0),
- NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),
- NumConstantPtrDiffs(0), NumInstructionsSimplified(0),
- SROACostSavings(0), SROACostSavingsLost(0) {}
+ ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
+ AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0),
+ FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0),
+ NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
+ NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
+ NumInstructionsSimplified(0), SROACostSavings(0),
+ SROACostSavingsLost(0) {}
bool analyzeCall(CallSite CS);
@@ -785,6 +794,60 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
return Base::visitCallSite(CS);
}
+bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
+ // At least one return instruction will be free after inlining.
+ bool Free = !HasReturn;
+ HasReturn = true;
+ return Free;
+}
+
+bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
+ // We model unconditional branches as essentially free -- they really
+ // shouldn't exist at all, but handling them makes the behavior of the
+ // inliner more regular and predictable. Interestingly, conditional branches
+ // which will fold away are also free.
+ return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) ||
+ dyn_cast_or_null<ConstantInt>(
+ SimplifiedValues.lookup(BI.getCondition()));
+}
+
+bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
+ // We model unconditional switches as free, see the comments on handling
+ // branches.
+ return isa<ConstantInt>(SI.getCondition()) ||
+ dyn_cast_or_null<ConstantInt>(
+ SimplifiedValues.lookup(SI.getCondition()));
+}
+
+bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
+ // We never want to inline functions that contain an indirectbr. This is
+ // incorrect because all the blockaddress's (in static global initializers
+ // for example) would be referring to the original function, and this
+ // indirect jump would jump from the inlined copy of the function into the
+ // original function which is extremely undefined behavior.
+ // FIXME: This logic isn't really right; we can safely inline functions with
+ // indirectbr's as long as no other function or global references the
+ // blockaddress of a block within the current function. And as a QOI issue,
+ // if someone is using a blockaddress without an indirectbr, and that
+ // reference somehow ends up in another function or global, we probably don't
+ // want to inline this function.
+ HasIndirectBr = true;
+ return false;
+}
+
+bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
+ // FIXME: It's not clear that a single instruction is an accurate model for
+ // the inline cost of a resume instruction.
+ return false;
+}
+
+bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
+ // FIXME: It might be reasonably to discount the cost of instructions leading
+ // to unreachable as they have the lowest possible impact on both runtime and
+ // code size.
+ return true; // No actual code is needed for unreachable.
+}
+
bool CallAnalyzer::visitInstruction(Instruction &I) {
// Some instructions are free. All of the free intrinsics can also be
// handled by SROA, etc.
@@ -808,8 +871,7 @@ bool CallAnalyzer::visitInstruction(Instruction &I) {
/// construct has been detected. It returns false if inlining is no longer
/// viable, and true if inlining remains viable.
bool CallAnalyzer::analyzeBlock(BasicBlock *BB) {
- for (BasicBlock::iterator I = BB->begin(), E = llvm::prior(BB->end());
- I != E; ++I) {
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
++NumInstructions;
if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
++NumVectorInstructions;
@@ -825,7 +887,8 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB) {
Cost += InlineConstants::InstrCost;
// If the visit this instruction detected an uninlinable pattern, abort.
- if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca)
+ if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
+ HasIndirectBr)
return false;
// If the caller is a recursive function then we don't want to inline
@@ -989,10 +1052,6 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
}
}
- // Track whether we've seen a return instruction. The first return
- // instruction is free, as at least one will usually disappear in inlining.
- bool HasReturn = false;
-
// Populate our simplified values by mapping from function arguments to call
// arguments with known important simplifications.
CallSite::arg_iterator CAI = CS.arg_begin();
@@ -1039,33 +1098,11 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
if (BB->empty())
continue;
- // Handle the terminator cost here where we can track returns and other
- // function-wide constructs.
- TerminatorInst *TI = BB->getTerminator();
-
- // We never want to inline functions that contain an indirectbr. This is
- // incorrect because all the blockaddress's (in static global initializers
- // for example) would be referring to the original function, and this
- // indirect jump would jump from the inlined copy of the function into the
- // original function which is extremely undefined behavior.
- // FIXME: This logic isn't really right; we can safely inline functions
- // with indirectbr's as long as no other function or global references the
- // blockaddress of a block within the current function. And as a QOI issue,
- // if someone is using a blockaddress without an indirectbr, and that
- // reference somehow ends up in another function or global, we probably
- // don't want to inline this function.
- if (isa<IndirectBrInst>(TI))
- return false;
-
- if (!HasReturn && isa<ReturnInst>(TI))
- HasReturn = true;
- else
- Cost += InlineConstants::InstrCost;
-
// Analyze the cost of this block. If we blow through the threshold, this
// returns false, and we can bail on out.
if (!analyzeBlock(BB)) {
- if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca)
+ if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
+ HasIndirectBr)
return false;
// If the caller is a recursive function then we don't want to inline
@@ -1078,6 +1115,8 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
break;
}
+ TerminatorInst *TI = BB->getTerminator();
+
// Add in the live successors by first checking whether we have terminator
// that may be simplified based on the values simplified by this call.
if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
diff --git a/test/Transforms/Inline/invoke-cost.ll b/test/Transforms/Inline/invoke-cost.ll
new file mode 100644
index 0000000..84d33ad
--- /dev/null
+++ b/test/Transforms/Inline/invoke-cost.ll
@@ -0,0 +1,45 @@
+; RUN: opt -inline < %s -S -o - -inline-threshold=100 | FileCheck %s
+
+target datalayout = "p:32:32"
+
+@glbl = external global i32
+
+declare void @f()
+declare i32 @__gxx_personality_v0(...)
+declare i8* @__cxa_begin_catch(i8*)
+declare void @__cxa_end_catch()
+declare void @_ZSt9terminatev()
+
+define void @inner1() {
+entry:
+ invoke void @f() to label %cont1 unwind label %terminate.lpad
+
+cont1:
+ invoke void @f() to label %cont2 unwind label %terminate.lpad
+
+cont2:
+ invoke void @f() to label %cont3 unwind label %terminate.lpad
+
+cont3:
+ invoke void @f() to label %cont4 unwind label %terminate.lpad
+
+cont4:
+ ret void
+
+terminate.lpad:
+ landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+ catch i8* null
+ call void @_ZSt9terminatev() noreturn nounwind
+ unreachable
+}
+
+define void @outer1() {
+; CHECK-LABEL: @outer1(
+;
+; This call should not get inlined because inner1 actually calls a function
+; many times, but it only does so through invoke as opposed to call.
+;
+; CHECK: call void @inner1
+ call void @inner1()
+ ret void
+}