diff options
-rw-r--r-- | lib/Transforms/IPO/Inliner.cpp | 18 | ||||
-rw-r--r-- | test/Transforms/Inline/alloca-merge-align-nodl.ll | 93 | ||||
-rw-r--r-- | test/Transforms/Inline/alloca-merge-align.ll | 94 |
3 files changed, 203 insertions, 2 deletions
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index 663ddb7..f72121d 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -116,7 +116,8 @@ static void AdjustCallerSSPLevel(Function *Caller, Function *Callee) { /// any new allocas to the set if not possible. static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, InlinedArrayAllocasTy &InlinedArrayAllocas, - int InlineHistory, bool InsertLifetime) { + int InlineHistory, bool InsertLifetime, + const DataLayout *TD) { Function *Callee = CS.getCalledFunction(); Function *Caller = CS.getCaller(); @@ -189,6 +190,14 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, bool MergedAwayAlloca = false; for (unsigned i = 0, e = AllocasForType.size(); i != e; ++i) { AllocaInst *AvailableAlloca = AllocasForType[i]; + + unsigned Align1 = AI->getAlignment(), + Align2 = AvailableAlloca->getAlignment(); + // If we don't have data layout information, and only one alloca is using + // the target default, then we can't safely merge them because we can't + // pick the greater alignment. + if (!TD && (!Align1 || !Align2) && Align1 != Align2) + continue; // The available alloca has to be in the right function, not in some other // function in this SCC. @@ -206,6 +215,11 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, << *AvailableAlloca << '\n'); AI->replaceAllUsesWith(AvailableAlloca); + + if (Align1 > Align2 || (!Align1 && TD && + TD->getABITypeAlignment(AI->getAllocatedType()) > Align2)) + AvailableAlloca->setAlignment(Align1); + AI->eraseFromParent(); MergedAwayAlloca = true; ++NumMergedAllocas; @@ -482,7 +496,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { // Attempt to inline the function. if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas, - InlineHistoryID, InsertLifetime)) + InlineHistoryID, InsertLifetime, TD)) continue; ++NumInlined; diff --git a/test/Transforms/Inline/alloca-merge-align-nodl.ll b/test/Transforms/Inline/alloca-merge-align-nodl.ll new file mode 100644 index 0000000..203f52b --- /dev/null +++ b/test/Transforms/Inline/alloca-merge-align-nodl.ll @@ -0,0 +1,93 @@ +; RUN: opt < %s -inline -S | FileCheck %s +; This variant of the test has no data layout information. +target triple = "powerpc64-unknown-linux-gnu" + +%struct.s = type { i32, i32 } + +define void @foo(%struct.s* byval nocapture readonly %a) { +entry: + %x = alloca [2 x i32], align 4 + %a1 = getelementptr inbounds %struct.s* %a, i64 0, i32 0 + %0 = load i32* %a1, align 4, !tbaa !0 + %arrayidx = getelementptr inbounds [2 x i32]* %x, i64 0, i64 0 + store i32 %0, i32* %arrayidx, align 4, !tbaa !0 + %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1 + %1 = load i32* %b, align 4, !tbaa !0 + %arrayidx2 = getelementptr inbounds [2 x i32]* %x, i64 0, i64 1 + store i32 %1, i32* %arrayidx2, align 4, !tbaa !0 + call void @bar(i32* %arrayidx) #2 + ret void +} + +define void @foo0(%struct.s* byval nocapture readonly %a) { +entry: + %x = alloca [2 x i32] + %a1 = getelementptr inbounds %struct.s* %a, i64 0, i32 0 + %0 = load i32* %a1, align 4, !tbaa !0 + %arrayidx = getelementptr inbounds [2 x i32]* %x, i64 0, i64 0 + store i32 %0, i32* %arrayidx, align 4, !tbaa !0 + %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1 + %1 = load i32* %b, align 4, !tbaa !0 + %arrayidx2 = getelementptr inbounds [2 x i32]* %x, i64 0, i64 1 + store i32 %1, i32* %arrayidx2, align 4, !tbaa !0 + call void @bar(i32* %arrayidx) #2 + ret void +} + +declare void @bar(i32*) #1 + +define void @goo(%struct.s* byval nocapture readonly %a) { +entry: + %x = alloca [2 x i32], align 32 + %a1 = getelementptr inbounds %struct.s* %a, i64 0, i32 0 + %0 = load i32* %a1, align 4, !tbaa !0 + %arrayidx = getelementptr inbounds [2 x i32]* %x, i64 0, i64 0 + store i32 %0, i32* %arrayidx, align 32, !tbaa !0 + %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1 + %1 = load i32* %b, align 4, !tbaa !0 + %arrayidx2 = getelementptr inbounds [2 x i32]* %x, i64 0, i64 1 + store i32 %1, i32* %arrayidx2, align 4, !tbaa !0 + call void @bar(i32* %arrayidx) #2 + ret void +} + +; CHECK-LABEL: @main +; CHECK: alloca [2 x i32], align 32 +; CHECK-NOT: alloca [2 x i32] +; CHECK: ret i32 0 + +define signext i32 @main() { +entry: + %a = alloca i64, align 8 + %tmpcast = bitcast i64* %a to %struct.s* + store i64 0, i64* %a, align 8 + %a1 = bitcast i64* %a to i32* + store i32 1, i32* %a1, align 8, !tbaa !0 + call void @foo(%struct.s* byval %tmpcast) + store i32 2, i32* %a1, align 8, !tbaa !0 + call void @goo(%struct.s* byval %tmpcast) + ret i32 0 +} + +; CHECK-LABEL: @test0 +; CHECK: alloca [2 x i32], align 32 +; CHECK: alloca [2 x i32] +; CHECK: ret i32 0 + +define signext i32 @test0() { +entry: + %a = alloca i64, align 8 + %tmpcast = bitcast i64* %a to %struct.s* + store i64 0, i64* %a, align 8 + %a1 = bitcast i64* %a to i32* + store i32 1, i32* %a1, align 8, !tbaa !0 + call void @foo0(%struct.s* byval %tmpcast) + store i32 2, i32* %a1, align 8, !tbaa !0 + call void @goo(%struct.s* byval %tmpcast) + ret i32 0 +} + +!0 = metadata !{metadata !"int", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"} + diff --git a/test/Transforms/Inline/alloca-merge-align.ll b/test/Transforms/Inline/alloca-merge-align.ll new file mode 100644 index 0000000..de4707d --- /dev/null +++ b/test/Transforms/Inline/alloca-merge-align.ll @@ -0,0 +1,94 @@ +; RUN: opt < %s -inline -S | FileCheck %s + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +%struct.s = type { i32, i32 } + +define void @foo(%struct.s* byval nocapture readonly %a) { +entry: + %x = alloca [2 x i32], align 4 + %a1 = getelementptr inbounds %struct.s* %a, i64 0, i32 0 + %0 = load i32* %a1, align 4, !tbaa !0 + %arrayidx = getelementptr inbounds [2 x i32]* %x, i64 0, i64 0 + store i32 %0, i32* %arrayidx, align 4, !tbaa !0 + %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1 + %1 = load i32* %b, align 4, !tbaa !0 + %arrayidx2 = getelementptr inbounds [2 x i32]* %x, i64 0, i64 1 + store i32 %1, i32* %arrayidx2, align 4, !tbaa !0 + call void @bar(i32* %arrayidx) #2 + ret void +} + +define void @foo0(%struct.s* byval nocapture readonly %a) { +entry: + %x = alloca [2 x i32] + %a1 = getelementptr inbounds %struct.s* %a, i64 0, i32 0 + %0 = load i32* %a1, align 4, !tbaa !0 + %arrayidx = getelementptr inbounds [2 x i32]* %x, i64 0, i64 0 + store i32 %0, i32* %arrayidx, align 4, !tbaa !0 + %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1 + %1 = load i32* %b, align 4, !tbaa !0 + %arrayidx2 = getelementptr inbounds [2 x i32]* %x, i64 0, i64 1 + store i32 %1, i32* %arrayidx2, align 4, !tbaa !0 + call void @bar(i32* %arrayidx) #2 + ret void +} + +declare void @bar(i32*) #1 + +define void @goo(%struct.s* byval nocapture readonly %a) { +entry: + %x = alloca [2 x i32], align 32 + %a1 = getelementptr inbounds %struct.s* %a, i64 0, i32 0 + %0 = load i32* %a1, align 4, !tbaa !0 + %arrayidx = getelementptr inbounds [2 x i32]* %x, i64 0, i64 0 + store i32 %0, i32* %arrayidx, align 32, !tbaa !0 + %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1 + %1 = load i32* %b, align 4, !tbaa !0 + %arrayidx2 = getelementptr inbounds [2 x i32]* %x, i64 0, i64 1 + store i32 %1, i32* %arrayidx2, align 4, !tbaa !0 + call void @bar(i32* %arrayidx) #2 + ret void +} + +; CHECK-LABEL: @main +; CHECK: alloca [2 x i32], align 32 +; CHECK-NOT: alloca [2 x i32] +; CHECK: ret i32 0 + +define signext i32 @main() { +entry: + %a = alloca i64, align 8 + %tmpcast = bitcast i64* %a to %struct.s* + store i64 0, i64* %a, align 8 + %a1 = bitcast i64* %a to i32* + store i32 1, i32* %a1, align 8, !tbaa !0 + call void @foo(%struct.s* byval %tmpcast) + store i32 2, i32* %a1, align 8, !tbaa !0 + call void @goo(%struct.s* byval %tmpcast) + ret i32 0 +} + +; CHECK-LABEL: @test0 +; CHECK: alloca [2 x i32], align 32 +; CHECK-NOT: alloca [2 x i32] +; CHECK: ret i32 0 + +define signext i32 @test0() { +entry: + %a = alloca i64, align 8 + %tmpcast = bitcast i64* %a to %struct.s* + store i64 0, i64* %a, align 8 + %a1 = bitcast i64* %a to i32* + store i32 1, i32* %a1, align 8, !tbaa !0 + call void @foo0(%struct.s* byval %tmpcast) + store i32 2, i32* %a1, align 8, !tbaa !0 + call void @goo(%struct.s* byval %tmpcast) + ret i32 0 +} + +!0 = metadata !{metadata !"int", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"} + |