From 6fb881c036c32728c4a128d81b6083457e534e09 Mon Sep 17 00:00:00 2001 From: Duncan Sands Date: Wed, 17 Nov 2010 11:16:23 +0000 Subject: Have InlineFunction use SimplifyInstruction rather than hasConstantValue. I was leery of using SimplifyInstruction while the IR was still in a half-baked state, which is the reason for delaying the simplification until the IR is fully cooked. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@119494 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/InlineFunction.cpp | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'lib/Transforms/Utils/InlineFunction.cpp') diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index e09666a..f57fec7 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -22,6 +22,7 @@ #include "llvm/Attributes.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/DebugInfo.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Target/TargetData.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" @@ -579,10 +580,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { // any users of the original call/invoke instruction. const Type *RTy = CalledFunc->getReturnType(); + PHINode *PHI = 0; if (Returns.size() > 1) { // The PHI node should go at the front of the new basic block to merge all // possible incoming values. - PHINode *PHI = 0; if (!TheCall->use_empty()) { PHI = PHINode::Create(RTy, TheCall->getName(), AfterCallBB->begin()); @@ -600,14 +601,6 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { "Ret value not consistent in function!"); PHI->addIncoming(RI->getReturnValue(), RI->getParent()); } - - // Now that we inserted the PHI, check to see if it has a single value - // (e.g. all the entries are the same or undef). If so, remove the PHI so - // it doesn't block other optimizations. - if (Value *V = PHI->hasConstantValue()) { - PHI->replaceAllUsesWith(V); - PHI->eraseFromParent(); - } } @@ -664,5 +657,14 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { // Now we can remove the CalleeEntry block, which is now empty. Caller->getBasicBlockList().erase(CalleeEntry); + // If we inserted a phi node, check to see if it has a single value (e.g. all + // the entries are the same or undef). If so, remove the PHI so it doesn't + // block other optimizations. + if (PHI) + if (Value *V = SimplifyInstruction(PHI, IFI.TD)) { + PHI->replaceAllUsesWith(V); + PHI->eraseFromParent(); + } + return true; } -- cgit v1.1 From f601d6df6f43bb833461cbcee475c36998e6c259 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 20 Nov 2010 18:43:35 +0000 Subject: Simplify code. No change in functionality. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@119908 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/InlineFunction.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Transforms/Utils/InlineFunction.cpp') diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index f57fec7..d60f908 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -338,7 +338,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { Value *CallArgs[] = { DestCast, SrcCast, Size, ConstantInt::get(Type::getInt32Ty(Context), 1), - ConstantInt::get(Type::getInt1Ty(Context), 0) + ConstantInt::getFalse(Context) // isVolatile }; CallInst *TheMemCpy = CallInst::Create(MemCpyFn, CallArgs, CallArgs+5, "", TheCall); -- cgit v1.1 From 39add23dc54a1580983b1901384688d6622daa3b Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Mon, 6 Dec 2010 07:43:04 +0000 Subject: improve comment git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@120994 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/InlineFunction.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'lib/Transforms/Utils/InlineFunction.cpp') diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index d60f908..5cf0fed 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -400,8 +400,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { if (!isa(AI->getArraySize())) continue; - // Keep track of the static allocas that we inline into the caller if the - // StaticAllocas pointer is non-null. + // Keep track of the static allocas that we inline into the caller. IFI.StaticAllocas.push_back(AI); // Scan for the block of allocas that we can move over, and move them -- cgit v1.1 From 018fb767b91d5ec396d56a3e61bd124918b59fa1 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Mon, 20 Dec 2010 07:45:28 +0000 Subject: fix PR8769, a miscompilation by inliner when inlining a function with a byval argument. The generated alloca has to have at least the alignment of the byval, if not, the client may be making assumptions that the new alloca won't satisfy. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@122234 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/InlineFunction.cpp | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'lib/Transforms/Utils/InlineFunction.cpp') diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 5cf0fed..3f50ccd 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -252,7 +252,6 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { CalledFunc->isDeclaration() || // call, or call to a vararg function! CalledFunc->getFunctionType()->isVarArg()) return false; - // If the call to the callee is not a tail call, we must clear the 'tail' // flags on any calls that we inline. bool MustClearTailCallFlags = @@ -308,14 +307,19 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal) && !CalledFunc->onlyReadsMemory()) { const Type *AggTy = cast(I->getType())->getElementType(); - const Type *VoidPtrTy = - Type::getInt8PtrTy(Context); + const Type *VoidPtrTy = Type::getInt8PtrTy(Context); // Create the alloca. If we have TargetData, use nice alignment. unsigned Align = 1; - if (IFI.TD) Align = IFI.TD->getPrefTypeAlignment(AggTy); - Value *NewAlloca = new AllocaInst(AggTy, 0, Align, - I->getName(), + if (IFI.TD) + Align = IFI.TD->getPrefTypeAlignment(AggTy); + + // If the byval had an alignment specified, we *must* use at least that + // alignment, as it is required by the byval argument (and uses of the + // pointer inside the callee). + Align = std::max(Align, CalledFunc->getParamAlignment(ArgNo+1)); + + Value *NewAlloca = new AllocaInst(AggTy, 0, Align, I->getName(), &*Caller->begin()->begin()); // Emit a memcpy. const Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)}; -- cgit v1.1 From e7ae705c32906979a527926864345016e76867b9 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Mon, 20 Dec 2010 07:57:41 +0000 Subject: pull byval processing out to its own helper function. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@122235 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/InlineFunction.cpp | 128 ++++++++++++++++++-------------- 1 file changed, 72 insertions(+), 56 deletions(-) (limited to 'lib/Transforms/Utils/InlineFunction.cpp') diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 3f50ccd..bca9fc4 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -229,6 +229,71 @@ static void UpdateCallGraphAfterInlining(CallSite CS, CallerNode->removeCallEdgeFor(CS); } +static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, + const Function *CalledFunc, + InlineFunctionInfo &IFI, + unsigned ByValAlignment) { + if (CalledFunc->onlyReadsMemory()) + return Arg; + + LLVMContext &Context = Arg->getContext(); + + + const Type *AggTy = cast(Arg->getType())->getElementType(); + const Type *VoidPtrTy = Type::getInt8PtrTy(Context); + + // Create the alloca. If we have TargetData, use nice alignment. + unsigned Align = 1; + if (IFI.TD) + Align = IFI.TD->getPrefTypeAlignment(AggTy); + + // If the byval had an alignment specified, we *must* use at least that + // alignment, as it is required by the byval argument (and uses of the + // pointer inside the callee). + Align = std::max(Align, ByValAlignment); + + Function *Caller = TheCall->getParent()->getParent(); + + Value *NewAlloca = new AllocaInst(AggTy, 0, Align, Arg->getName(), + &*Caller->begin()->begin()); + // Emit a memcpy. + const Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)}; + Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(), + Intrinsic::memcpy, + Tys, 3); + Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall); + Value *SrcCast = new BitCastInst(Arg, VoidPtrTy, "tmp", TheCall); + + Value *Size; + if (IFI.TD == 0) + Size = ConstantExpr::getSizeOf(AggTy); + else + Size = ConstantInt::get(Type::getInt64Ty(Context), + IFI.TD->getTypeStoreSize(AggTy)); + + // Always generate a memcpy of alignment 1 here because we don't know + // the alignment of the src pointer. Other optimizations can infer + // better alignment. + Value *CallArgs[] = { + DestCast, SrcCast, Size, + ConstantInt::get(Type::getInt32Ty(Context), 1), + ConstantInt::getFalse(Context) // isVolatile + }; + CallInst *TheMemCpy = + CallInst::Create(MemCpyFn, CallArgs, CallArgs+5, "", TheCall); + + // If we have a call graph, update it. + if (CallGraph *CG = IFI.CG) { + CallGraphNode *MemCpyCGN = CG->getOrInsertFunction(MemCpyFn); + CallGraphNode *CallerNode = (*CG)[Caller]; + CallerNode->addCalledFunction(TheMemCpy, MemCpyCGN); + } + + // Uses of the argument in the function should use our new alloca + // instead. + return NewAlloca; +} + // InlineFunction - This function inlines the called function into the basic // block of the caller. This returns false if it is not possible to inline this // call. The program is still in a well defined state if this occurs though. @@ -304,63 +369,14 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { // by them explicit. However, we don't do this if the callee is readonly // or readnone, because the copy would be unneeded: the callee doesn't // modify the struct. - if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal) && - !CalledFunc->onlyReadsMemory()) { - const Type *AggTy = cast(I->getType())->getElementType(); - const Type *VoidPtrTy = Type::getInt8PtrTy(Context); - - // Create the alloca. If we have TargetData, use nice alignment. - unsigned Align = 1; - if (IFI.TD) - Align = IFI.TD->getPrefTypeAlignment(AggTy); - - // If the byval had an alignment specified, we *must* use at least that - // alignment, as it is required by the byval argument (and uses of the - // pointer inside the callee). - Align = std::max(Align, CalledFunc->getParamAlignment(ArgNo+1)); - - Value *NewAlloca = new AllocaInst(AggTy, 0, Align, I->getName(), - &*Caller->begin()->begin()); - // Emit a memcpy. - const Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)}; - Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(), - Intrinsic::memcpy, - Tys, 3); - Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall); - Value *SrcCast = new BitCastInst(*AI, VoidPtrTy, "tmp", TheCall); - - Value *Size; - if (IFI.TD == 0) - Size = ConstantExpr::getSizeOf(AggTy); - else - Size = ConstantInt::get(Type::getInt64Ty(Context), - IFI.TD->getTypeStoreSize(AggTy)); - - // Always generate a memcpy of alignment 1 here because we don't know - // the alignment of the src pointer. Other optimizations can infer - // better alignment. - Value *CallArgs[] = { - DestCast, SrcCast, Size, - ConstantInt::get(Type::getInt32Ty(Context), 1), - ConstantInt::getFalse(Context) // isVolatile - }; - CallInst *TheMemCpy = - CallInst::Create(MemCpyFn, CallArgs, CallArgs+5, "", TheCall); - - // If we have a call graph, update it. - if (CallGraph *CG = IFI.CG) { - CallGraphNode *MemCpyCGN = CG->getOrInsertFunction(MemCpyFn); - CallGraphNode *CallerNode = (*CG)[Caller]; - CallerNode->addCalledFunction(TheMemCpy, MemCpyCGN); - } - - // Uses of the argument in the function should use our new alloca - // instead. - ActualArg = NewAlloca; - + if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal)) { + ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI, + CalledFunc->getParamAlignment(ArgNo+1)); + // Calls that we inline may use the new alloca, so we need to clear - // their 'tail' flags. - MustClearTailCallFlags = true; + // their 'tail' flags if HandleByValArgument introduced a new alloca and + // the callee has calls. + MustClearTailCallFlags |= ActualArg != *AI; } VMap[I] = ActualArg; -- cgit v1.1 From 0b66f63a26387f5c0360a4324fc3c31e0599a6e0 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Mon, 20 Dec 2010 08:10:40 +0000 Subject: when eliding a byval copy due to inlining a readonly function, we have to make sure that the reused alloca has sufficient alignment. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@122236 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/InlineFunction.cpp | 47 ++++++++++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 4 deletions(-) (limited to 'lib/Transforms/Utils/InlineFunction.cpp') diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index bca9fc4..76fdd09 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -229,17 +229,56 @@ static void UpdateCallGraphAfterInlining(CallSite CS, CallerNode->removeCallEdgeFor(CS); } +/// HandleByValArgument - When inlining a call site that has a byval argument, +/// we have to make the implicit memcpy explicit by adding it. static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, const Function *CalledFunc, InlineFunctionInfo &IFI, unsigned ByValAlignment) { - if (CalledFunc->onlyReadsMemory()) - return Arg; + const Type *AggTy = cast(Arg->getType())->getElementType(); + + // If the called function is readonly, then it could not mutate the caller's + // copy of the byval'd memory. In this case, it is safe to elide the copy and + // temporary. + if (CalledFunc->onlyReadsMemory()) { + // If the byval argument has a specified alignment that is greater than the + // passed in pointer, then we either have to round up the input pointer or + // give up on this transformation. + if (ByValAlignment <= 1) // 0 = unspecified, 1 = no particular alignment. + return Arg; + + // See if the argument is a (bitcasted) pointer to an alloca. If so, we can + // round up the alloca if needed. + if (AllocaInst *AI = dyn_cast(Arg->stripPointerCasts())) { + unsigned AIAlign = AI->getAlignment(); + + // If the alloca is known at least aligned as much as the byval, we can do + // this optimization. + if (AIAlign >= ByValAlignment) + return Arg; + + // If the alloca has a specified alignment that is less than the byval, + // then we can safely bump it up. + if (AIAlign) { + AI->setAlignment(ByValAlignment); + return Arg; + } + + // If the alignment has an unspecified alignment, then we can only modify + // it if we have TD information. Doing so without TD info could end up + // with us rounding the alignment *down* accidentally, which is badness. + if (IFI.TD) { + AIAlign = std::max(ByValAlignment, IFI.TD->getPrefTypeAlignment(AggTy)); + AI->setAlignment(AIAlign); + return Arg; + } + } + + // Otherwise, we have to make a memcpy to get a safe alignment, pretty lame. + } LLVMContext &Context = Arg->getContext(); - - const Type *AggTy = cast(Arg->getType())->getElementType(); const Type *VoidPtrTy = Type::getInt8PtrTy(Context); // Create the alloca. If we have TargetData, use nice alignment. -- cgit v1.1 From 7569d79de1bd97a6a17b81340ea6ce97d8a3c279 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Sat, 25 Dec 2010 20:42:38 +0000 Subject: switch the inliner alignment enforcement stuff to use the getOrEnforceKnownAlignment function, which simplifies the code and makes it stronger. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@122555 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/InlineFunction.cpp | 35 ++++++++------------------------- 1 file changed, 8 insertions(+), 27 deletions(-) (limited to 'lib/Transforms/Utils/InlineFunction.cpp') diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 76fdd09..c1faf24 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -24,6 +24,7 @@ #include "llvm/Analysis/DebugInfo.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Target/TargetData.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/CallSite.h" @@ -247,34 +248,14 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, if (ByValAlignment <= 1) // 0 = unspecified, 1 = no particular alignment. return Arg; - // See if the argument is a (bitcasted) pointer to an alloca. If so, we can - // round up the alloca if needed. - if (AllocaInst *AI = dyn_cast(Arg->stripPointerCasts())) { - unsigned AIAlign = AI->getAlignment(); - - // If the alloca is known at least aligned as much as the byval, we can do - // this optimization. - if (AIAlign >= ByValAlignment) - return Arg; - - // If the alloca has a specified alignment that is less than the byval, - // then we can safely bump it up. - if (AIAlign) { - AI->setAlignment(ByValAlignment); - return Arg; - } - - // If the alignment has an unspecified alignment, then we can only modify - // it if we have TD information. Doing so without TD info could end up - // with us rounding the alignment *down* accidentally, which is badness. - if (IFI.TD) { - AIAlign = std::max(ByValAlignment, IFI.TD->getPrefTypeAlignment(AggTy)); - AI->setAlignment(AIAlign); - return Arg; - } - } + // If the pointer is already known to be sufficiently aligned, or if we can + // round it up to a larger alignment, then we don't need a temporary. + if (getOrEnforceKnownAlignment(Arg, ByValAlignment, + IFI.TD) >= ByValAlignment) + return Arg; - // Otherwise, we have to make a memcpy to get a safe alignment, pretty lame. + // Otherwise, we have to make a memcpy to get a safe alignment. This is bad + // for code quality, but rarely happens and is required for correctness. } LLVMContext &Context = Arg->getContext(); -- cgit v1.1