diff options
| author | Stephen Hines <srhines@google.com> | 2013-01-21 13:15:17 -0800 |
|---|---|---|
| committer | Stephen Hines <srhines@google.com> | 2013-01-21 13:15:17 -0800 |
| commit | 059800f9e3fee2852672f846d91a2da14da7783a (patch) | |
| tree | a6ef16b7263252ae1b8069295ea9cbbae0d9467d /lib/Analysis/InlineCost.cpp | |
| parent | cbefa15de4821975bb99fc6d74b3bdb42b2df45c (diff) | |
| parent | b6714227eda5d499f7667fc865f931126a8dc488 (diff) | |
| download | external_llvm-059800f9e3fee2852672f846d91a2da14da7783a.zip external_llvm-059800f9e3fee2852672f846d91a2da14da7783a.tar.gz external_llvm-059800f9e3fee2852672f846d91a2da14da7783a.tar.bz2 | |
Merge remote-tracking branch 'upstream/master' into merge-llvm
Conflicts:
lib/CodeGen/AsmPrinter/AsmPrinter.cpp
lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
lib/MC/MCAssembler.cpp
lib/Support/Atomic.cpp
lib/Support/Memory.cpp
lib/Target/ARM/ARMJITInfo.cpp
Change-Id: Ib339baf88df5b04870c8df1bedcfe1f877ccab8d
Diffstat (limited to 'lib/Analysis/InlineCost.cpp')
| -rw-r--r-- | lib/Analysis/InlineCost.cpp | 407 |
1 files changed, 297 insertions, 110 deletions
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index 12be7fd..6e5c035 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -13,23 +13,23 @@ #define DEBUG_TYPE "inline-cost" #include "llvm/Analysis/InlineCost.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Operator.h" +#include "llvm/InstVisitor.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/InstVisitor.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/CallingConv.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Operator.h" -#include "llvm/GlobalAlias.h" -#include "llvm/Target/TargetData.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; @@ -41,19 +41,23 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { typedef InstVisitor<CallAnalyzer, bool> Base; friend class InstVisitor<CallAnalyzer, bool>; - // TargetData if available, or null. - const TargetData *const TD; + // DataLayout if available, or null. + const DataLayout *const TD; // The called function. Function &F; int Threshold; int Cost; - const bool AlwaysInline; - bool IsRecursive; + bool IsCallerRecursive; + bool IsRecursiveCall; bool ExposesReturnsTwice; bool HasDynamicAlloca; + bool ContainsNoDuplicateCall; + + /// Number of bytes allocated statically by the callee. + uint64_t AllocatedSize; unsigned NumInstructions, NumVectorInstructions; int FiftyPercentVectorBonus, TenPercentVectorBonus; int VectorBonus; @@ -92,6 +96,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { int InstructionCost); bool isGEPOffsetConstant(GetElementPtrInst &GEP); bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset); + bool simplifyCallSite(Function *F, CallSite CS); ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V); // Custom analysis routines. @@ -120,14 +125,16 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { bool visitBinaryOperator(BinaryOperator &I); bool visitLoad(LoadInst &I); bool visitStore(StoreInst &I); + bool visitExtractValue(ExtractValueInst &I); + bool visitInsertValue(InsertValueInst &I); bool visitCallSite(CallSite CS); public: - CallAnalyzer(const TargetData *TD, Function &Callee, int Threshold) + CallAnalyzer(const DataLayout *TD, Function &Callee, int Threshold) : TD(TD), F(Callee), Threshold(Threshold), Cost(0), - AlwaysInline(F.hasFnAttr(Attribute::AlwaysInline)), - IsRecursive(false), ExposesReturnsTwice(false), HasDynamicAlloca(false), - NumInstructions(0), NumVectorInstructions(0), + IsCallerRecursive(false), IsRecursiveCall(false), + ExposesReturnsTwice(false), HasDynamicAlloca(false), ContainsNoDuplicateCall(false), + AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0), FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0), NumConstantPtrDiffs(0), @@ -269,9 +276,15 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) { // FIXME: Check whether inlining will turn a dynamic alloca into a static // alloca, and handle that case. - // We will happily inline static alloca instructions or dynamic alloca - // instructions in always-inline situations. - if (AlwaysInline || I.isStaticAlloca()) + // Accumulate the allocated size. + if (I.isStaticAlloca()) { + Type *Ty = I.getAllocatedType(); + AllocatedSize += (TD ? TD->getTypeAllocSize(Ty) : + Ty->getPrimitiveSizeInBits()); + } + + // We will happily inline static alloca instructions. + if (I.isStaticAlloca()) return Base::visitAlloca(I); // FIXME: This is overly conservative. Dynamic allocas are inefficient for @@ -345,7 +358,10 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { bool CallAnalyzer::visitBitCast(BitCastInst &I) { // Propagate constants through bitcasts. - if (Constant *COp = dyn_cast<Constant>(I.getOperand(0))) + Constant *COp = dyn_cast<Constant>(I.getOperand(0)); + if (!COp) + COp = SimplifiedValues.lookup(I.getOperand(0)); + if (COp) if (Constant *C = ConstantExpr::getBitCast(COp, I.getType())) { SimplifiedValues[&I] = C; return true; @@ -370,7 +386,10 @@ bool CallAnalyzer::visitBitCast(BitCastInst &I) { bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { // Propagate constants through ptrtoint. - if (Constant *COp = dyn_cast<Constant>(I.getOperand(0))) + Constant *COp = dyn_cast<Constant>(I.getOperand(0)); + if (!COp) + COp = SimplifiedValues.lookup(I.getOperand(0)); + if (COp) if (Constant *C = ConstantExpr::getPtrToInt(COp, I.getType())) { SimplifiedValues[&I] = C; return true; @@ -403,7 +422,10 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { // Propagate constants through ptrtoint. - if (Constant *COp = dyn_cast<Constant>(I.getOperand(0))) + Constant *COp = dyn_cast<Constant>(I.getOperand(0)); + if (!COp) + COp = SimplifiedValues.lookup(I.getOperand(0)); + if (COp) if (Constant *C = ConstantExpr::getIntToPtr(COp, I.getType())) { SimplifiedValues[&I] = C; return true; @@ -430,7 +452,10 @@ bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { bool CallAnalyzer::visitCastInst(CastInst &I) { // Propagate constants through ptrtoint. - if (Constant *COp = dyn_cast<Constant>(I.getOperand(0))) + Constant *COp = dyn_cast<Constant>(I.getOperand(0)); + if (!COp) + COp = SimplifiedValues.lookup(I.getOperand(0)); + if (COp) if (Constant *C = ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) { SimplifiedValues[&I] = C; return true; @@ -600,32 +625,109 @@ bool CallAnalyzer::visitStore(StoreInst &I) { return false; } +bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) { + // Constant folding for extract value is trivial. + Constant *C = dyn_cast<Constant>(I.getAggregateOperand()); + if (!C) + C = SimplifiedValues.lookup(I.getAggregateOperand()); + if (C) { + SimplifiedValues[&I] = ConstantExpr::getExtractValue(C, I.getIndices()); + return true; + } + + // SROA can look through these but give them a cost. + return false; +} + +bool CallAnalyzer::visitInsertValue(InsertValueInst &I) { + // Constant folding for insert value is trivial. + Constant *AggC = dyn_cast<Constant>(I.getAggregateOperand()); + if (!AggC) + AggC = SimplifiedValues.lookup(I.getAggregateOperand()); + Constant *InsertedC = dyn_cast<Constant>(I.getInsertedValueOperand()); + if (!InsertedC) + InsertedC = SimplifiedValues.lookup(I.getInsertedValueOperand()); + if (AggC && InsertedC) { + SimplifiedValues[&I] = ConstantExpr::getInsertValue(AggC, InsertedC, + I.getIndices()); + return true; + } + + // SROA can look through these but give them a cost. + return false; +} + +/// \brief Try to simplify a call site. +/// +/// Takes a concrete function and callsite and tries to actually simplify it by +/// analyzing the arguments and call itself with instsimplify. Returns true if +/// it has simplified the callsite to some other entity (a constant), making it +/// free. +bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) { + // FIXME: Using the instsimplify logic directly for this is inefficient + // because we have to continually rebuild the argument list even when no + // simplifications can be performed. Until that is fixed with remapping + // inside of instsimplify, directly constant fold calls here. + if (!canConstantFoldCallTo(F)) + return false; + + // Try to re-map the arguments to constants. + SmallVector<Constant *, 4> ConstantArgs; + ConstantArgs.reserve(CS.arg_size()); + for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); + I != E; ++I) { + Constant *C = dyn_cast<Constant>(*I); + if (!C) + C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(*I)); + if (!C) + return false; // This argument doesn't map to a constant. + + ConstantArgs.push_back(C); + } + if (Constant *C = ConstantFoldCall(F, ConstantArgs)) { + SimplifiedValues[CS.getInstruction()] = C; + return true; + } + + return false; +} + bool CallAnalyzer::visitCallSite(CallSite CS) { if (CS.isCall() && cast<CallInst>(CS.getInstruction())->canReturnTwice() && - !F.hasFnAttr(Attribute::ReturnsTwice)) { + !F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::ReturnsTwice)) { // This aborts the entire analysis. ExposesReturnsTwice = true; return false; } + if (CS.isCall() && + cast<CallInst>(CS.getInstruction())->hasFnAttr(Attribute::NoDuplicate)) + ContainsNoDuplicateCall = true; - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) { - switch (II->getIntrinsicID()) { - default: - return Base::visitCallSite(CS); + if (Function *F = CS.getCalledFunction()) { + // When we have a concrete function, first try to simplify it directly. + if (simplifyCallSite(F, CS)) + return true; - case Intrinsic::memset: - case Intrinsic::memcpy: - case Intrinsic::memmove: - // SROA can usually chew through these intrinsics, but they aren't free. - return false; + // Next check if it is an intrinsic we know about. + // FIXME: Lift this into part of the InstVisitor. + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) { + switch (II->getIntrinsicID()) { + default: + return Base::visitCallSite(CS); + + case Intrinsic::memset: + case Intrinsic::memcpy: + case Intrinsic::memmove: + // SROA can usually chew through these intrinsics, but they aren't free. + return false; + } } - } - if (Function *F = CS.getCalledFunction()) { if (F == CS.getInstruction()->getParent()->getParent()) { // This flag will fully abort the analysis, so don't bother with anything // else. - IsRecursive = true; + IsRecursiveCall = true; return false; } @@ -712,7 +814,14 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB) { Cost += InlineConstants::InstrCost; // If the visit this instruction detected an uninlinable pattern, abort. - if (IsRecursive || ExposesReturnsTwice || HasDynamicAlloca) + if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca) + return false; + + // If the caller is a recursive function then we don't want to inline + // functions which allocate a lot of stack space because it would increase + // the caller stack usage dramatically. + if (IsCallerRecursive && + AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) return false; if (NumVectorInstructions > NumInstructions/2) @@ -724,7 +833,7 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB) { // Check if we've past the threshold so we don't spin in huge basic // blocks that will never inline. - if (!AlwaysInline && Cost > (Threshold + VectorBonus)) + if (Cost > (Threshold + VectorBonus)) return false; } @@ -775,7 +884,7 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { /// viable. It computes the cost and adjusts the threshold based on numerous /// factors and heuristics. If this method returns false but the computed cost /// is below the computed threshold, then inlining was forcibly disabled by -/// some artifact of the rountine. +/// some artifact of the routine. bool CallAnalyzer::analyzeCall(CallSite CS) { ++NumCallsAnalyzed; @@ -786,72 +895,89 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { int SingleBBBonus = Threshold / 2; Threshold += SingleBBBonus; - // Unless we are always-inlining, perform some tweaks to the cost and - // threshold based on the direct callsite information. - if (!AlwaysInline) { - // We want to more aggressively inline vector-dense kernels, so up the - // threshold, and we'll lower it if the % of vector instructions gets too - // low. - assert(NumInstructions == 0); - assert(NumVectorInstructions == 0); - FiftyPercentVectorBonus = Threshold; - TenPercentVectorBonus = Threshold / 2; - - // Give out bonuses per argument, as the instructions setting them up will - // be gone after inlining. - for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) { - if (TD && CS.isByValArgument(I)) { - // We approximate the number of loads and stores needed by dividing the - // size of the byval type by the target's pointer size. - PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType()); - unsigned TypeSize = TD->getTypeSizeInBits(PTy->getElementType()); - unsigned PointerSize = TD->getPointerSizeInBits(); - // Ceiling division. - unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize; - - // If it generates more than 8 stores it is likely to be expanded as an - // inline memcpy so we take that as an upper bound. Otherwise we assume - // one load and one store per word copied. - // FIXME: The maxStoresPerMemcpy setting from the target should be used - // here instead of a magic number of 8, but it's not available via - // TargetData. - NumStores = std::min(NumStores, 8U); - - Cost -= 2 * NumStores * InlineConstants::InstrCost; - } else { - // For non-byval arguments subtract off one instruction per call - // argument. - Cost -= InlineConstants::InstrCost; - } + // Perform some tweaks to the cost and threshold based on the direct + // callsite information. + + // We want to more aggressively inline vector-dense kernels, so up the + // threshold, and we'll lower it if the % of vector instructions gets too + // low. + assert(NumInstructions == 0); + assert(NumVectorInstructions == 0); + FiftyPercentVectorBonus = Threshold; + TenPercentVectorBonus = Threshold / 2; + + // Give out bonuses per argument, as the instructions setting them up will + // be gone after inlining. + for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) { + if (TD && CS.isByValArgument(I)) { + // We approximate the number of loads and stores needed by dividing the + // size of the byval type by the target's pointer size. + PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType()); + unsigned TypeSize = TD->getTypeSizeInBits(PTy->getElementType()); + unsigned PointerSize = TD->getPointerSizeInBits(); + // Ceiling division. + unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize; + + // If it generates more than 8 stores it is likely to be expanded as an + // inline memcpy so we take that as an upper bound. Otherwise we assume + // one load and one store per word copied. + // FIXME: The maxStoresPerMemcpy setting from the target should be used + // here instead of a magic number of 8, but it's not available via + // DataLayout. + NumStores = std::min(NumStores, 8U); + + Cost -= 2 * NumStores * InlineConstants::InstrCost; + } else { + // For non-byval arguments subtract off one instruction per call + // argument. + Cost -= InlineConstants::InstrCost; } + } - // If there is only one call of the function, and it has internal linkage, - // the cost of inlining it drops dramatically. - if (F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction()) - Cost += InlineConstants::LastCallToStaticBonus; - - // If the instruction after the call, or if the normal destination of the - // invoke is an unreachable instruction, the function is noreturn. As such, - // there is little point in inlining this unless there is literally zero cost. - if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) { - if (isa<UnreachableInst>(II->getNormalDest()->begin())) - Threshold = 1; - } else if (isa<UnreachableInst>(++BasicBlock::iterator(CS.getInstruction()))) + // If there is only one call of the function, and it has internal linkage, + // the cost of inlining it drops dramatically. + bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneUse() && + &F == CS.getCalledFunction(); + if (OnlyOneCallAndLocalLinkage) + Cost += InlineConstants::LastCallToStaticBonus; + + // If the instruction after the call, or if the normal destination of the + // invoke is an unreachable instruction, the function is noreturn. As such, + // there is little point in inlining this unless there is literally zero + // cost. + Instruction *Instr = CS.getInstruction(); + if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) { + if (isa<UnreachableInst>(II->getNormalDest()->begin())) Threshold = 1; + } else if (isa<UnreachableInst>(++BasicBlock::iterator(Instr))) + Threshold = 1; - // If this function uses the coldcc calling convention, prefer not to inline - // it. - if (F.getCallingConv() == CallingConv::Cold) - Cost += InlineConstants::ColdccPenalty; + // If this function uses the coldcc calling convention, prefer not to inline + // it. + if (F.getCallingConv() == CallingConv::Cold) + Cost += InlineConstants::ColdccPenalty; - // Check if we're done. This can happen due to bonuses and penalties. - if (Cost > Threshold) - return false; - } + // Check if we're done. This can happen due to bonuses and penalties. + if (Cost > Threshold) + return false; if (F.empty()) return true; + Function *Caller = CS.getInstruction()->getParent()->getParent(); + // Check if the caller function is recursive itself. + for (Value::use_iterator U = Caller->use_begin(), E = Caller->use_end(); + U != E; ++U) { + CallSite Site(cast<Value>(*U)); + if (!Site) + continue; + Instruction *I = Site.getInstruction(); + if (I->getParent()->getParent() == Caller) { + IsCallerRecursive = true; + break; + } + } + // Track whether we've seen a return instruction. The first return // instruction is free, as at least one will usually disappear in inlining. bool HasReturn = false; @@ -895,7 +1021,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { // Bail out the moment we cross the threshold. This means we'll under-count // the cost, but only when undercounting doesn't matter. - if (!AlwaysInline && Cost > (Threshold + VectorBonus)) + if (Cost > (Threshold + VectorBonus)) break; BasicBlock *BB = BBWorklist[Idx]; @@ -908,9 +1034,9 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { // We never want to inline functions that contain an indirectbr. This is // incorrect because all the blockaddress's (in static global initializers - // for example) would be referring to the original function, and this indirect - // jump would jump from the inlined copy of the function into the original - // function which is extremely undefined behavior. + // for example) would be referring to the original function, and this + // indirect jump would jump from the inlined copy of the function into the + // original function which is extremely undefined behavior. // FIXME: This logic isn't really right; we can safely inline functions // with indirectbr's as long as no other function or global references the // blockaddress of a block within the current function. And as a QOI issue, @@ -928,8 +1054,16 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { // Analyze the cost of this block. If we blow through the threshold, this // returns false, and we can bail on out. if (!analyzeBlock(BB)) { - if (IsRecursive || ExposesReturnsTwice || HasDynamicAlloca) + if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca) return false; + + // If the caller is a recursive function then we don't want to inline + // functions which allocate a lot of stack space because it would increase + // the caller stack usage dramatically. + if (IsCallerRecursive && + AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) + return false; + break; } @@ -955,7 +1089,8 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { // If we're unable to select a particular successor, just count all of // them. - for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize; ++TIdx) + for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize; + ++TIdx) BBWorklist.insert(TI->getSuccessor(TIdx)); // If we had any successors at this point, than post-inlining is likely to @@ -969,12 +1104,18 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { } } + // If this is a noduplicate call, we can still inline as long as + // inlining this would cause the removal of the caller (so the instruction + // is not actually duplicated, just moved). + if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall) + return false; + Threshold += VectorBonus; - return AlwaysInline || Cost < Threshold; + return Cost < Threshold; } -#ifndef NDEBUG +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// \brief Dump stats about this call's analysis. void CallAnalyzer::dump() { #define DEBUG_PRINT_STAT(x) llvm::dbgs() << " " #x ": " << x << "\n" @@ -986,6 +1127,7 @@ void CallAnalyzer::dump() { DEBUG_PRINT_STAT(NumInstructionsSimplified); DEBUG_PRINT_STAT(SROACostSavings); DEBUG_PRINT_STAT(SROACostSavingsLost); + DEBUG_PRINT_STAT(ContainsNoDuplicateCall); #undef DEBUG_PRINT_STAT } #endif @@ -996,14 +1138,30 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, int Threshold) { InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, Function *Callee, int Threshold) { + // Cannot inline indirect calls. + if (!Callee) + return llvm::InlineCost::getNever(); + + // Calls to functions with always-inline attributes should be inlined + // whenever possible. + if (Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::AlwaysInline)) { + if (isInlineViable(*Callee)) + return llvm::InlineCost::getAlways(); + return llvm::InlineCost::getNever(); + } + // Don't inline functions which can be redefined at link-time to mean // something else. Don't inline functions marked noinline or call sites // marked noinline. - if (!Callee || Callee->mayBeOverridden() || - Callee->hasFnAttr(Attribute::NoInline) || CS.isNoInline()) + if (Callee->mayBeOverridden() || + Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::NoInline) || + CS.isNoInline()) return llvm::InlineCost::getNever(); - DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() << "...\n"); + DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() + << "...\n"); CallAnalyzer CA(TD, *Callee, Threshold); bool ShouldInline = CA.analyzeCall(CS); @@ -1018,3 +1176,32 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, Function *Callee, return llvm::InlineCost::get(CA.getCost(), CA.getThreshold()); } + +bool InlineCostAnalyzer::isInlineViable(Function &F) { + bool ReturnsTwice =F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::ReturnsTwice); + for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { + // Disallow inlining of functions which contain an indirect branch. + if (isa<IndirectBrInst>(BI->getTerminator())) + return false; + + for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; + ++II) { + CallSite CS(II); + if (!CS) + continue; + + // Disallow recursive calls. + if (&F == CS.getCalledFunction()) + return false; + + // Disallow calls which expose returns-twice to a function not previously + // attributed as such. + if (!ReturnsTwice && CS.isCall() && + cast<CallInst>(CS.getInstruction())->canReturnTwice()) + return false; + } + } + + return true; +} |
