diff options
-rw-r--r-- | lib/Analysis/IPA/InlineCost.cpp | 113 | ||||
-rw-r--r-- | test/Transforms/Inline/invoke-cost.ll | 45 |
2 files changed, 121 insertions, 37 deletions
diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp index 013691b..5e6d72d 100644 --- a/lib/Analysis/IPA/InlineCost.cpp +++ b/lib/Analysis/IPA/InlineCost.cpp @@ -59,6 +59,8 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { bool ExposesReturnsTwice; bool HasDynamicAlloca; bool ContainsNoDuplicateCall; + bool HasReturn; + bool HasIndirectBr; /// Number of bytes allocated statically by the callee. uint64_t AllocatedSize; @@ -132,6 +134,12 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { bool visitExtractValue(ExtractValueInst &I); bool visitInsertValue(InsertValueInst &I); bool visitCallSite(CallSite CS); + bool visitReturnInst(ReturnInst &RI); + bool visitBranchInst(BranchInst &BI); + bool visitSwitchInst(SwitchInst &SI); + bool visitIndirectBrInst(IndirectBrInst &IBI); + bool visitResumeInst(ResumeInst &RI); + bool visitUnreachableInst(UnreachableInst &I); public: CallAnalyzer(const DataLayout *TD, const TargetTransformInfo &TTI, @@ -139,12 +147,13 @@ public: : TD(TD), TTI(TTI), F(Callee), Threshold(Threshold), Cost(0), IsCallerRecursive(false), IsRecursiveCall(false), ExposesReturnsTwice(false), HasDynamicAlloca(false), - ContainsNoDuplicateCall(false), AllocatedSize(0), NumInstructions(0), - NumVectorInstructions(0), FiftyPercentVectorBonus(0), - TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0), - NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0), - NumConstantPtrDiffs(0), NumInstructionsSimplified(0), - SROACostSavings(0), SROACostSavingsLost(0) {} + ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false), + AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0), + FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0), + NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), + NumConstantPtrCmps(0), NumConstantPtrDiffs(0), + NumInstructionsSimplified(0), SROACostSavings(0), + SROACostSavingsLost(0) {} bool analyzeCall(CallSite CS); @@ -785,6 +794,60 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { return Base::visitCallSite(CS); } +bool CallAnalyzer::visitReturnInst(ReturnInst &RI) { + // At least one return instruction will be free after inlining. + bool Free = !HasReturn; + HasReturn = true; + return Free; +} + +bool CallAnalyzer::visitBranchInst(BranchInst &BI) { + // We model unconditional branches as essentially free -- they really + // shouldn't exist at all, but handling them makes the behavior of the + // inliner more regular and predictable. Interestingly, conditional branches + // which will fold away are also free. + return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) || + dyn_cast_or_null<ConstantInt>( + SimplifiedValues.lookup(BI.getCondition())); +} + +bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) { + // We model unconditional switches as free, see the comments on handling + // branches. + return isa<ConstantInt>(SI.getCondition()) || + dyn_cast_or_null<ConstantInt>( + SimplifiedValues.lookup(SI.getCondition())); +} + +bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) { + // We never want to inline functions that contain an indirectbr. This is + // incorrect because all the blockaddress's (in static global initializers + // for example) would be referring to the original function, and this + // indirect jump would jump from the inlined copy of the function into the + // original function which is extremely undefined behavior. + // FIXME: This logic isn't really right; we can safely inline functions with + // indirectbr's as long as no other function or global references the + // blockaddress of a block within the current function. And as a QOI issue, + // if someone is using a blockaddress without an indirectbr, and that + // reference somehow ends up in another function or global, we probably don't + // want to inline this function. + HasIndirectBr = true; + return false; +} + +bool CallAnalyzer::visitResumeInst(ResumeInst &RI) { + // FIXME: It's not clear that a single instruction is an accurate model for + // the inline cost of a resume instruction. + return false; +} + +bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) { + // FIXME: It might be reasonably to discount the cost of instructions leading + // to unreachable as they have the lowest possible impact on both runtime and + // code size. + return true; // No actual code is needed for unreachable. +} + bool CallAnalyzer::visitInstruction(Instruction &I) { // Some instructions are free. All of the free intrinsics can also be // handled by SROA, etc. @@ -808,8 +871,7 @@ bool CallAnalyzer::visitInstruction(Instruction &I) { /// construct has been detected. It returns false if inlining is no longer /// viable, and true if inlining remains viable. bool CallAnalyzer::analyzeBlock(BasicBlock *BB) { - for (BasicBlock::iterator I = BB->begin(), E = llvm::prior(BB->end()); - I != E; ++I) { + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { ++NumInstructions; if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy()) ++NumVectorInstructions; @@ -825,7 +887,8 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB) { Cost += InlineConstants::InstrCost; // If the visit this instruction detected an uninlinable pattern, abort. - if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca) + if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca || + HasIndirectBr) return false; // If the caller is a recursive function then we don't want to inline @@ -989,10 +1052,6 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { } } - // Track whether we've seen a return instruction. The first return - // instruction is free, as at least one will usually disappear in inlining. - bool HasReturn = false; - // Populate our simplified values by mapping from function arguments to call // arguments with known important simplifications. CallSite::arg_iterator CAI = CS.arg_begin(); @@ -1039,33 +1098,11 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { if (BB->empty()) continue; - // Handle the terminator cost here where we can track returns and other - // function-wide constructs. - TerminatorInst *TI = BB->getTerminator(); - - // We never want to inline functions that contain an indirectbr. This is - // incorrect because all the blockaddress's (in static global initializers - // for example) would be referring to the original function, and this - // indirect jump would jump from the inlined copy of the function into the - // original function which is extremely undefined behavior. - // FIXME: This logic isn't really right; we can safely inline functions - // with indirectbr's as long as no other function or global references the - // blockaddress of a block within the current function. And as a QOI issue, - // if someone is using a blockaddress without an indirectbr, and that - // reference somehow ends up in another function or global, we probably - // don't want to inline this function. - if (isa<IndirectBrInst>(TI)) - return false; - - if (!HasReturn && isa<ReturnInst>(TI)) - HasReturn = true; - else - Cost += InlineConstants::InstrCost; - // Analyze the cost of this block. If we blow through the threshold, this // returns false, and we can bail on out. if (!analyzeBlock(BB)) { - if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca) + if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca || + HasIndirectBr) return false; // If the caller is a recursive function then we don't want to inline @@ -1078,6 +1115,8 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { break; } + TerminatorInst *TI = BB->getTerminator(); + // Add in the live successors by first checking whether we have terminator // that may be simplified based on the values simplified by this call. if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { diff --git a/test/Transforms/Inline/invoke-cost.ll b/test/Transforms/Inline/invoke-cost.ll new file mode 100644 index 0000000..84d33ad --- /dev/null +++ b/test/Transforms/Inline/invoke-cost.ll @@ -0,0 +1,45 @@ +; RUN: opt -inline < %s -S -o - -inline-threshold=100 | FileCheck %s + +target datalayout = "p:32:32" + +@glbl = external global i32 + +declare void @f() +declare i32 @__gxx_personality_v0(...) +declare i8* @__cxa_begin_catch(i8*) +declare void @__cxa_end_catch() +declare void @_ZSt9terminatev() + +define void @inner1() { +entry: + invoke void @f() to label %cont1 unwind label %terminate.lpad + +cont1: + invoke void @f() to label %cont2 unwind label %terminate.lpad + +cont2: + invoke void @f() to label %cont3 unwind label %terminate.lpad + +cont3: + invoke void @f() to label %cont4 unwind label %terminate.lpad + +cont4: + ret void + +terminate.lpad: + landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + catch i8* null + call void @_ZSt9terminatev() noreturn nounwind + unreachable +} + +define void @outer1() { +; CHECK-LABEL: @outer1( +; +; This call should not get inlined because inner1 actually calls a function +; many times, but it only does so through invoke as opposed to call. +; +; CHECK: call void @inner1 + call void @inner1() + ret void +} |