diff options
author | Dale Johannesen <dalej@apple.com> | 2009-01-12 22:11:50 +0000 |
---|---|---|
committer | Dale Johannesen <dalej@apple.com> | 2009-01-12 22:11:50 +0000 |
commit | 5adaa754419fc9ee5d2f768ea848d594d1e97fe0 (patch) | |
tree | 1645efb483d9e0820b3f8cff1a9e3e6065a48d77 | |
parent | 896b0f24b62a729616620e7258c883f6e76b5be2 (diff) | |
download | external_llvm-5adaa754419fc9ee5d2f768ea848d594d1e97fe0.zip external_llvm-5adaa754419fc9ee5d2f768ea848d594d1e97fe0.tar.gz external_llvm-5adaa754419fc9ee5d2f768ea848d594d1e97fe0.tar.bz2 |
Enable recursive inlining. Reduce inlining threshold
back to 200; 400 seems to be too high, loses more than
it gains.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@62107 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Transforms/IPO/Inliner.cpp | 10 | ||||
-rw-r--r-- | lib/Transforms/Utils/InlineCost.cpp | 4 | ||||
-rw-r--r-- | test/Transforms/Inline/2009-01-12-RecursiveInline.ll | 92 |
3 files changed, 98 insertions, 8 deletions
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index 65c0ace..8d40c9b 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -31,8 +31,8 @@ STATISTIC(NumInlined, "Number of functions inlined"); STATISTIC(NumDeleted, "Number of functions deleted because all callers found"); static cl::opt<int> -InlineLimit("inline-threshold", cl::Hidden, cl::init(400), - cl::desc("Control the amount of inlining to perform (default = 400)")); +InlineLimit("inline-threshold", cl::Hidden, cl::init(200), + cl::desc("Control the amount of inlining to perform (default = 200)")); Inliner::Inliner(void *ID) : CallGraphSCCPass(ID), InlineThreshold(InlineLimit) {} @@ -168,8 +168,7 @@ bool Inliner::runOnSCC(const std::vector<CallGraphNode*> &SCC) { for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) if (Function *Callee = CallSites[CSi].getCalledFunction()) { // Calls to external functions are never inlinable. - if (Callee->isDeclaration() || - CallSites[CSi].getInstruction()->getParent()->getParent() ==Callee){ + if (Callee->isDeclaration()) { if (SCC.size() == 1) { std::swap(CallSites[CSi], CallSites.back()); CallSites.pop_back(); @@ -190,7 +189,8 @@ bool Inliner::runOnSCC(const std::vector<CallGraphNode*> &SCC) { if (InlineCallIfPossible(CS, CG, SCCFunctions, getAnalysis<TargetData>())) { // Remove any cached cost info for this caller, as inlining the callee - // has increased the size of the caller. + // has increased the size of the caller (which may be the same as the + // callee). resetCachedCostInfo(Caller); // Remove this call site from the list. If possible, use diff --git a/lib/Transforms/Utils/InlineCost.cpp b/lib/Transforms/Utils/InlineCost.cpp index 90d72ef..97f0bf8 100644 --- a/lib/Transforms/Utils/InlineCost.cpp +++ b/lib/Transforms/Utils/InlineCost.cpp @@ -180,14 +180,12 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, Function *Callee = CS.getCalledFunction(); Function *Caller = TheCall->getParent()->getParent(); - // Don't inline a directly recursive call. - if (Caller == Callee || // Don't inline functions which can be redefined at link-time to mean // something else. // FIXME: We allow link-once linkage since in practice all versions of // the function have the same body (C++ ODR) - but the LLVM definition // of LinkOnceLinkage doesn't require this. - (Callee->mayBeOverridden() && !Callee->hasLinkOnceLinkage()) || + if ((Callee->mayBeOverridden() && !Callee->hasLinkOnceLinkage()) || // Don't inline functions marked noinline. Callee->hasFnAttr(Attribute::NoInline) || NeverInline.count(Callee)) return llvm::InlineCost::getNever(); diff --git a/test/Transforms/Inline/2009-01-12-RecursiveInline.ll b/test/Transforms/Inline/2009-01-12-RecursiveInline.ll new file mode 100644 index 0000000..8a4b2e0 --- /dev/null +++ b/test/Transforms/Inline/2009-01-12-RecursiveInline.ll @@ -0,0 +1,92 @@ +; RUN: llvm-as < %s | opt -inline | llvm-dis | grep {call.*fib} | count 4 +; First call to fib from fib is inlined, producing 2 instead of 1, total 3. +; Second call to fib from fib is not inlined because new body of fib exceeds +; inlining limit of 200. Plus call in main = 4 total. + +; ModuleID = '<stdin>' +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target triple = "i386-apple-darwin9.6" +@"\01LC" = internal constant [5 x i8] c"%ld\0A\00" ; <[5 x i8]*> [#uses=1] + +define i32 @fib(i32 %n) nounwind { +entry: + %n_addr = alloca i32 ; <i32*> [#uses=4] + %retval = alloca i32 ; <i32*> [#uses=2] + %0 = alloca i32 ; <i32*> [#uses=3] + %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] + store i32 %n, i32* %n_addr + %1 = load i32* %n_addr, align 4 ; <i32> [#uses=1] + %2 = icmp ule i32 %1, 1 ; <i1> [#uses=1] + br i1 %2, label %bb, label %bb1 + +bb: ; preds = %entry + store i32 1, i32* %0, align 4 + br label %bb2 + +bb1: ; preds = %entry + %3 = load i32* %n_addr, align 4 ; <i32> [#uses=1] + %4 = sub i32 %3, 2 ; <i32> [#uses=1] + %5 = call i32 @fib(i32 %4) nounwind ; <i32> [#uses=1] + %6 = load i32* %n_addr, align 4 ; <i32> [#uses=1] + %7 = sub i32 %6, 1 ; <i32> [#uses=1] + %8 = call i32 @fib(i32 %7) nounwind ; <i32> [#uses=1] + %9 = add i32 %5, %8 ; <i32> [#uses=1] + store i32 %9, i32* %0, align 4 + br label %bb2 + +bb2: ; preds = %bb1, %bb + %10 = load i32* %0, align 4 ; <i32> [#uses=1] + store i32 %10, i32* %retval, align 4 + br label %return + +return: ; preds = %bb2 + %retval3 = load i32* %retval ; <i32> [#uses=1] + ret i32 %retval3 +} + +define i32 @main(i32 %argc, i8** %argv) nounwind { +entry: + %argc_addr = alloca i32 ; <i32*> [#uses=2] + %argv_addr = alloca i8** ; <i8***> [#uses=2] + %retval = alloca i32 ; <i32*> [#uses=2] + %N = alloca i32 ; <i32*> [#uses=2] + %0 = alloca i32 ; <i32*> [#uses=2] + %iftmp.0 = alloca i32 ; <i32*> [#uses=3] + %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] + store i32 %argc, i32* %argc_addr + store i8** %argv, i8*** %argv_addr + %1 = load i32* %argc_addr, align 4 ; <i32> [#uses=1] + %2 = icmp eq i32 %1, 2 ; <i1> [#uses=1] + br i1 %2, label %bb, label %bb1 + +bb: ; preds = %entry + %3 = load i8*** %argv_addr, align 4 ; <i8**> [#uses=1] + %4 = getelementptr i8** %3, i32 1 ; <i8**> [#uses=1] + %5 = load i8** %4, align 4 ; <i8*> [#uses=1] + %6 = call i32 @atoi(i8* %5) nounwind ; <i32> [#uses=1] + store i32 %6, i32* %iftmp.0, align 4 + br label %bb2 + +bb1: ; preds = %entry + store i32 43, i32* %iftmp.0, align 4 + br label %bb2 + +bb2: ; preds = %bb1, %bb + %7 = load i32* %iftmp.0, align 4 ; <i32> [#uses=1] + store i32 %7, i32* %N, align 4 + %8 = load i32* %N, align 4 ; <i32> [#uses=1] + %9 = call i32 @fib(i32 %8) nounwind ; <i32> [#uses=1] + %10 = call i32 (i8*, ...)* @printf(i8* getelementptr ([5 x i8]* @"\01LC", i32 0, i32 0), i32 %9) nounwind ; <i32> [#uses=0] + store i32 0, i32* %0, align 4 + %11 = load i32* %0, align 4 ; <i32> [#uses=1] + store i32 %11, i32* %retval, align 4 + br label %return + +return: ; preds = %bb2 + %retval3 = load i32* %retval ; <i32> [#uses=1] + ret i32 %retval3 +} + +declare i32 @atoi(i8*) + +declare i32 @printf(i8*, ...) nounwind |