diff options
-rw-r--r-- | lib/Analysis/InlineCost.cpp | 76 | ||||
-rw-r--r-- | test/Transforms/Inline/alloca-bonus.ll | 44 |
2 files changed, 104 insertions, 16 deletions
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index 226b473..fb5861c 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -221,23 +221,67 @@ unsigned CodeMetrics::CountCodeReductionForConstant(Value *V) { unsigned CodeMetrics::CountCodeReductionForAlloca(Value *V) { if (!V->getType()->isPointerTy()) return 0; // Not a pointer unsigned Reduction = 0; - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ - Instruction *I = cast<Instruction>(*UI); - if (isa<LoadInst>(I) || isa<StoreInst>(I)) - Reduction += InlineConstants::InstrCost; - else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { - // If the GEP has variable indices, we won't be able to do much with it. - if (GEP->hasAllConstantIndices()) - Reduction += CountCodeReductionForAlloca(GEP); - } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) { - // Track pointer through bitcasts. - Reduction += CountCodeReductionForAlloca(BCI); - } else { - // If there is some other strange instruction, we're not going to be able - // to do much if we inline this. - return 0; + + SmallVector<Value *, 4> Worklist; + Worklist.push_back(V); + do { + Value *V = Worklist.pop_back_val(); + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); + UI != E; ++UI){ + Instruction *I = cast<Instruction>(*UI); + if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + if (!LI->isSimple()) + return 0; + Reduction += InlineConstants::InstrCost; + } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { + if (!SI->isSimple()) + return 0; + Reduction += InlineConstants::InstrCost; + } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { + // If the GEP has variable indices, we won't be able to do much with it. + if (!GEP->hasAllConstantIndices()) + return 0; + // A non-zero GEP will likely become a mask operation after SROA. + if (GEP->hasAllZeroIndices()) + Reduction += InlineConstants::InstrCost; + Worklist.push_back(GEP); + } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) { + // Track pointer through bitcasts. + Worklist.push_back(BCI); + Reduction += InlineConstants::InstrCost; + } else if (SelectInst *SI = dyn_cast<SelectInst>(I)) { + // SROA can handle a select of alloca iff all uses of the alloca are + // loads, and dereferenceable. We assume it's dereferenceable since + // we're told the input is an alloca. + for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end(); + UI != UE; ++UI) { + LoadInst *LI = dyn_cast<LoadInst>(*UI); + if (LI == 0 || !LI->isSimple()) return 0; + } + // We don't know whether we'll be deleting the rest of the chain of + // instructions from the SelectInst on, because we don't know whether + // the other side of the select is also an alloca or not. + continue; + } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + switch (II->getIntrinsicID()) { + default: + return 0; + case Intrinsic::memset: + case Intrinsic::memcpy: + case Intrinsic::memmove: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + // SROA can usually chew through these intrinsics. + Reduction += InlineConstants::InstrCost; + break; + } + } else { + // If there is some other strange instruction, we're not going to be + // able to do much if we inline this. + return 0; + } } - } + } while (!Worklist.empty()); return Reduction; } diff --git a/test/Transforms/Inline/alloca-bonus.ll b/test/Transforms/Inline/alloca-bonus.ll new file mode 100644 index 0000000..2587ae1 --- /dev/null +++ b/test/Transforms/Inline/alloca-bonus.ll @@ -0,0 +1,44 @@ +; RUN: opt -inline < %s -S -o - -inline-threshold=8 | FileCheck %s + +declare void @llvm.lifetime.start(i64 %size, i8* nocapture %ptr) + +@glbl = external global i32 + +define void @outer1() { +; CHECK: @outer1 +; CHECK-NOT: call void @inner1 + %ptr = alloca i32 + call void @inner1(i32* %ptr) + ret void +} + +define void @inner1(i32 *%ptr) { + %A = load i32* %ptr + store i32 0, i32* %ptr + %C = getelementptr i32* %ptr, i32 0 + %D = getelementptr i32* %ptr, i32 1 + %E = bitcast i32* %ptr to i8* + %F = select i1 false, i32* %ptr, i32* @glbl + call void @llvm.lifetime.start(i64 0, i8* %E) + ret void +} + +define void @outer2() { +; CHECK: @outer2 +; CHECK: call void @inner2 + %ptr = alloca i32 + call void @inner2(i32* %ptr) + ret void +} + +; %D poisons this call, scalar-repl can't handle that instruction. +define void @inner2(i32 *%ptr) { + %A = load i32* %ptr + store i32 0, i32* %ptr + %C = getelementptr i32* %ptr, i32 0 + %D = getelementptr i32* %ptr, i32 %A + %E = bitcast i32* %ptr to i8* + %F = select i1 false, i32* %ptr, i32* @glbl + call void @llvm.lifetime.start(i64 0, i8* %E) + ret void +} |