aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/Transforms/Utils/InlineFunction.cpp47
-rw-r--r--test/Transforms/Inline/byval.ll23
2 files changed, 66 insertions, 4 deletions
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index bca9fc4..76fdd09 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -229,17 +229,56 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
CallerNode->removeCallEdgeFor(CS);
}
+/// HandleByValArgument - When inlining a call site that has a byval argument,
+/// we have to make the implicit memcpy explicit by adding it.
static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
const Function *CalledFunc,
InlineFunctionInfo &IFI,
unsigned ByValAlignment) {
- if (CalledFunc->onlyReadsMemory())
- return Arg;
+ const Type *AggTy = cast<PointerType>(Arg->getType())->getElementType();
+
+ // If the called function is readonly, then it could not mutate the caller's
+ // copy of the byval'd memory. In this case, it is safe to elide the copy and
+ // temporary.
+ if (CalledFunc->onlyReadsMemory()) {
+ // If the byval argument has a specified alignment that is greater than the
+ // passed in pointer, then we either have to round up the input pointer or
+ // give up on this transformation.
+ if (ByValAlignment <= 1) // 0 = unspecified, 1 = no particular alignment.
+ return Arg;
+
+ // See if the argument is a (bitcasted) pointer to an alloca. If so, we can
+ // round up the alloca if needed.
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(Arg->stripPointerCasts())) {
+ unsigned AIAlign = AI->getAlignment();
+
+ // If the alloca is known at least aligned as much as the byval, we can do
+ // this optimization.
+ if (AIAlign >= ByValAlignment)
+ return Arg;
+
+ // If the alloca has a specified alignment that is less than the byval,
+ // then we can safely bump it up.
+ if (AIAlign) {
+ AI->setAlignment(ByValAlignment);
+ return Arg;
+ }
+
+ // If the alignment has an unspecified alignment, then we can only modify
+ // it if we have TD information. Doing so without TD info could end up
+ // with us rounding the alignment *down* accidentally, which is badness.
+ if (IFI.TD) {
+ AIAlign = std::max(ByValAlignment, IFI.TD->getPrefTypeAlignment(AggTy));
+ AI->setAlignment(AIAlign);
+ return Arg;
+ }
+ }
+
+ // Otherwise, we have to make a memcpy to get a safe alignment, pretty lame.
+ }
LLVMContext &Context = Arg->getContext();
-
- const Type *AggTy = cast<PointerType>(Arg->getType())->getElementType();
const Type *VoidPtrTy = Type::getInt8PtrTy(Context);
// Create the alloca. If we have TargetData, use nice alignment.
diff --git a/test/Transforms/Inline/byval.ll b/test/Transforms/Inline/byval.ll
index 08219fc..e601faf 100644
--- a/test/Transforms/Inline/byval.ll
+++ b/test/Transforms/Inline/byval.ll
@@ -81,3 +81,26 @@ entry:
; CHECK: call void @g3(%struct.ss* %S1)
; CHECK: ret void
}
+
+
+; Inlining a byval struct should NOT cause an explicit copy
+; into an alloca if the function is readonly, but should increase an alloca's
+; alignment to satisfy an explicit alignment request.
+
+define internal i32 @f4(%struct.ss* byval align 64 %b) nounwind readonly {
+ call void @g3(%struct.ss* %b)
+ ret i32 4
+}
+
+define i32 @test4() nounwind {
+entry:
+ %S = alloca %struct.ss, align 2 ; <%struct.ss*> [#uses=4]
+ %X = call i32 @f4( %struct.ss* byval align 64 %S ) nounwind
+ ret i32 %X
+; CHECK: @test4()
+; CHECK: %S = alloca %struct.ss, align 64
+; CHECK-NOT: call void @llvm.memcpy
+; CHECK: call void @g3
+; CHECK: ret i32 4
+}
+