aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/Analysis/InlineCost.cpp76
-rw-r--r--test/Transforms/Inline/alloca-bonus.ll44
2 files changed, 104 insertions, 16 deletions
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index 226b473..fb5861c 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -221,23 +221,67 @@ unsigned CodeMetrics::CountCodeReductionForConstant(Value *V) {
unsigned CodeMetrics::CountCodeReductionForAlloca(Value *V) {
if (!V->getType()->isPointerTy()) return 0; // Not a pointer
unsigned Reduction = 0;
- for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
- Instruction *I = cast<Instruction>(*UI);
- if (isa<LoadInst>(I) || isa<StoreInst>(I))
- Reduction += InlineConstants::InstrCost;
- else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
- // If the GEP has variable indices, we won't be able to do much with it.
- if (GEP->hasAllConstantIndices())
- Reduction += CountCodeReductionForAlloca(GEP);
- } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) {
- // Track pointer through bitcasts.
- Reduction += CountCodeReductionForAlloca(BCI);
- } else {
- // If there is some other strange instruction, we're not going to be able
- // to do much if we inline this.
- return 0;
+
+ SmallVector<Value *, 4> Worklist;
+ Worklist.push_back(V);
+ do {
+ Value *V = Worklist.pop_back_val();
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
+ UI != E; ++UI){
+ Instruction *I = cast<Instruction>(*UI);
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ if (!LI->isSimple())
+ return 0;
+ Reduction += InlineConstants::InstrCost;
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ if (!SI->isSimple())
+ return 0;
+ Reduction += InlineConstants::InstrCost;
+ } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
+ // If the GEP has variable indices, we won't be able to do much with it.
+ if (!GEP->hasAllConstantIndices())
+ return 0;
+ // A non-zero GEP will likely become a mask operation after SROA.
+ if (GEP->hasAllZeroIndices())
+ Reduction += InlineConstants::InstrCost;
+ Worklist.push_back(GEP);
+ } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) {
+ // Track pointer through bitcasts.
+ Worklist.push_back(BCI);
+ Reduction += InlineConstants::InstrCost;
+ } else if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
+ // SROA can handle a select of alloca iff all uses of the alloca are
+ // loads, and dereferenceable. We assume it's dereferenceable since
+ // we're told the input is an alloca.
+ for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end();
+ UI != UE; ++UI) {
+ LoadInst *LI = dyn_cast<LoadInst>(*UI);
+ if (LI == 0 || !LI->isSimple()) return 0;
+ }
+ // We don't know whether we'll be deleting the rest of the chain of
+ // instructions from the SelectInst on, because we don't know whether
+ // the other side of the select is also an alloca or not.
+ continue;
+ } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ default:
+ return 0;
+ case Intrinsic::memset:
+ case Intrinsic::memcpy:
+ case Intrinsic::memmove:
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ // SROA can usually chew through these intrinsics.
+ Reduction += InlineConstants::InstrCost;
+ break;
+ }
+ } else {
+ // If there is some other strange instruction, we're not going to be
+ // able to do much if we inline this.
+ return 0;
+ }
}
- }
+ } while (!Worklist.empty());
return Reduction;
}
diff --git a/test/Transforms/Inline/alloca-bonus.ll b/test/Transforms/Inline/alloca-bonus.ll
new file mode 100644
index 0000000..2587ae1
--- /dev/null
+++ b/test/Transforms/Inline/alloca-bonus.ll
@@ -0,0 +1,44 @@
+; RUN: opt -inline < %s -S -o - -inline-threshold=8 | FileCheck %s
+
+declare void @llvm.lifetime.start(i64 %size, i8* nocapture %ptr)
+
+@glbl = external global i32
+
+define void @outer1() {
+; CHECK: @outer1
+; CHECK-NOT: call void @inner1
+ %ptr = alloca i32
+ call void @inner1(i32* %ptr)
+ ret void
+}
+
+define void @inner1(i32 *%ptr) {
+ %A = load i32* %ptr
+ store i32 0, i32* %ptr
+ %C = getelementptr i32* %ptr, i32 0
+ %D = getelementptr i32* %ptr, i32 1
+ %E = bitcast i32* %ptr to i8*
+ %F = select i1 false, i32* %ptr, i32* @glbl
+ call void @llvm.lifetime.start(i64 0, i8* %E)
+ ret void
+}
+
+define void @outer2() {
+; CHECK: @outer2
+; CHECK: call void @inner2
+ %ptr = alloca i32
+ call void @inner2(i32* %ptr)
+ ret void
+}
+
+; %D poisons this call, scalar-repl can't handle that instruction.
+define void @inner2(i32 *%ptr) {
+ %A = load i32* %ptr
+ store i32 0, i32* %ptr
+ %C = getelementptr i32* %ptr, i32 0
+ %D = getelementptr i32* %ptr, i32 %A
+ %E = bitcast i32* %ptr to i8*
+ %F = select i1 false, i32* %ptr, i32* @glbl
+ call void @llvm.lifetime.start(i64 0, i8* %E)
+ ret void
+}