diff options
-rw-r--r-- | lib/Transforms/Utils/InlineFunction.cpp | 47 | ||||
-rw-r--r-- | test/Transforms/Inline/byval.ll | 23 |
2 files changed, 66 insertions, 4 deletions
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index bca9fc4..76fdd09 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -229,17 +229,56 @@ static void UpdateCallGraphAfterInlining(CallSite CS, CallerNode->removeCallEdgeFor(CS); } +/// HandleByValArgument - When inlining a call site that has a byval argument, +/// we have to make the implicit memcpy explicit by adding it. static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, const Function *CalledFunc, InlineFunctionInfo &IFI, unsigned ByValAlignment) { - if (CalledFunc->onlyReadsMemory()) - return Arg; + const Type *AggTy = cast<PointerType>(Arg->getType())->getElementType(); + + // If the called function is readonly, then it could not mutate the caller's + // copy of the byval'd memory. In this case, it is safe to elide the copy and + // temporary. + if (CalledFunc->onlyReadsMemory()) { + // If the byval argument has a specified alignment that is greater than the + // passed in pointer, then we either have to round up the input pointer or + // give up on this transformation. + if (ByValAlignment <= 1) // 0 = unspecified, 1 = no particular alignment. + return Arg; + + // See if the argument is a (bitcasted) pointer to an alloca. If so, we can + // round up the alloca if needed. + if (AllocaInst *AI = dyn_cast<AllocaInst>(Arg->stripPointerCasts())) { + unsigned AIAlign = AI->getAlignment(); + + // If the alloca is known at least aligned as much as the byval, we can do + // this optimization. + if (AIAlign >= ByValAlignment) + return Arg; + + // If the alloca has a specified alignment that is less than the byval, + // then we can safely bump it up. + if (AIAlign) { + AI->setAlignment(ByValAlignment); + return Arg; + } + + // If the alignment has an unspecified alignment, then we can only modify + // it if we have TD information. Doing so without TD info could end up + // with us rounding the alignment *down* accidentally, which is badness. + if (IFI.TD) { + AIAlign = std::max(ByValAlignment, IFI.TD->getPrefTypeAlignment(AggTy)); + AI->setAlignment(AIAlign); + return Arg; + } + } + + // Otherwise, we have to make a memcpy to get a safe alignment, pretty lame. + } LLVMContext &Context = Arg->getContext(); - - const Type *AggTy = cast<PointerType>(Arg->getType())->getElementType(); const Type *VoidPtrTy = Type::getInt8PtrTy(Context); // Create the alloca. If we have TargetData, use nice alignment. diff --git a/test/Transforms/Inline/byval.ll b/test/Transforms/Inline/byval.ll index 08219fc..e601faf 100644 --- a/test/Transforms/Inline/byval.ll +++ b/test/Transforms/Inline/byval.ll @@ -81,3 +81,26 @@ entry: ; CHECK: call void @g3(%struct.ss* %S1) ; CHECK: ret void } + + +; Inlining a byval struct should NOT cause an explicit copy +; into an alloca if the function is readonly, but should increase an alloca's +; alignment to satisfy an explicit alignment request. + +define internal i32 @f4(%struct.ss* byval align 64 %b) nounwind readonly { + call void @g3(%struct.ss* %b) + ret i32 4 +} + +define i32 @test4() nounwind { +entry: + %S = alloca %struct.ss, align 2 ; <%struct.ss*> [#uses=4] + %X = call i32 @f4( %struct.ss* byval align 64 %S ) nounwind + ret i32 %X +; CHECK: @test4() +; CHECK: %S = alloca %struct.ss, align 64 +; CHECK-NOT: call void @llvm.memcpy +; CHECK: call void @g3 +; CHECK: ret i32 4 +} + |