From 6fb881c036c32728c4a128d81b6083457e534e09 Mon Sep 17 00:00:00 2001
From: Duncan Sands <baldrick@free.fr>
Date: Wed, 17 Nov 2010 11:16:23 +0000
Subject: Have InlineFunction use SimplifyInstruction rather than
 hasConstantValue.  I was leery of using SimplifyInstruction while the IR was
 still in a half-baked state, which is the reason for delaying the
 simplification until the IR is fully cooked.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@119494 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Utils/InlineFunction.cpp | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

(limited to 'lib/Transforms/Utils/InlineFunction.cpp')

diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index e09666a..f57fec7 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -22,6 +22,7 @@
 #include "llvm/Attributes.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
@@ -579,10 +580,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
   // any users of the original call/invoke instruction.
   const Type *RTy = CalledFunc->getReturnType();
 
+  PHINode *PHI = 0;
   if (Returns.size() > 1) {
     // The PHI node should go at the front of the new basic block to merge all
     // possible incoming values.
-    PHINode *PHI = 0;
     if (!TheCall->use_empty()) {
       PHI = PHINode::Create(RTy, TheCall->getName(),
                             AfterCallBB->begin());
@@ -600,14 +601,6 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
                "Ret value not consistent in function!");
         PHI->addIncoming(RI->getReturnValue(), RI->getParent());
       }
-    
-      // Now that we inserted the PHI, check to see if it has a single value
-      // (e.g. all the entries are the same or undef).  If so, remove the PHI so
-      // it doesn't block other optimizations.
-      if (Value *V = PHI->hasConstantValue()) {
-        PHI->replaceAllUsesWith(V);
-        PHI->eraseFromParent();
-      }
     }
 
 
@@ -664,5 +657,14 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
   // Now we can remove the CalleeEntry block, which is now empty.
   Caller->getBasicBlockList().erase(CalleeEntry);
 
+  // If we inserted a phi node, check to see if it has a single value (e.g. all
+  // the entries are the same or undef).  If so, remove the PHI so it doesn't
+  // block other optimizations.
+  if (PHI)
+    if (Value *V = SimplifyInstruction(PHI, IFI.TD)) {
+      PHI->replaceAllUsesWith(V);
+      PHI->eraseFromParent();
+    }
+
   return true;
 }
-- 
cgit v1.1


From f601d6df6f43bb833461cbcee475c36998e6c259 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Sat, 20 Nov 2010 18:43:35 +0000
Subject: Simplify code. No change in functionality.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@119908 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Utils/InlineFunction.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/Transforms/Utils/InlineFunction.cpp')

diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index f57fec7..d60f908 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -338,7 +338,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
         Value *CallArgs[] = {
           DestCast, SrcCast, Size,
           ConstantInt::get(Type::getInt32Ty(Context), 1),
-          ConstantInt::get(Type::getInt1Ty(Context), 0)
+          ConstantInt::getFalse(Context) // isVolatile
         };
         CallInst *TheMemCpy =
           CallInst::Create(MemCpyFn, CallArgs, CallArgs+5, "", TheCall);
-- 
cgit v1.1


From 39add23dc54a1580983b1901384688d6622daa3b Mon Sep 17 00:00:00 2001
From: Chris Lattner <sabre@nondot.org>
Date: Mon, 6 Dec 2010 07:43:04 +0000
Subject: improve comment

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@120994 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Utils/InlineFunction.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'lib/Transforms/Utils/InlineFunction.cpp')

diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index d60f908..5cf0fed 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -400,8 +400,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
       if (!isa<Constant>(AI->getArraySize()))
         continue;
       
-      // Keep track of the static allocas that we inline into the caller if the
-      // StaticAllocas pointer is non-null.
+      // Keep track of the static allocas that we inline into the caller.
       IFI.StaticAllocas.push_back(AI);
       
       // Scan for the block of allocas that we can move over, and move them
-- 
cgit v1.1


From 018fb767b91d5ec396d56a3e61bd124918b59fa1 Mon Sep 17 00:00:00 2001
From: Chris Lattner <sabre@nondot.org>
Date: Mon, 20 Dec 2010 07:45:28 +0000
Subject: fix PR8769, a miscompilation by inliner when inlining a function with
 a byval argument.  The generated alloca has to have at least the alignment of
 the byval, if not, the client may be making assumptions that the new alloca
 won't satisfy.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@122234 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Utils/InlineFunction.cpp | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'lib/Transforms/Utils/InlineFunction.cpp')

diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 5cf0fed..3f50ccd 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -252,7 +252,6 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
       CalledFunc->isDeclaration() || // call, or call to a vararg function!
       CalledFunc->getFunctionType()->isVarArg()) return false;
 
-
   // If the call to the callee is not a tail call, we must clear the 'tail'
   // flags on any calls that we inline.
   bool MustClearTailCallFlags =
@@ -308,14 +307,19 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
       if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal) &&
           !CalledFunc->onlyReadsMemory()) {
         const Type *AggTy = cast<PointerType>(I->getType())->getElementType();
-        const Type *VoidPtrTy = 
-            Type::getInt8PtrTy(Context);
+        const Type *VoidPtrTy = Type::getInt8PtrTy(Context);
 
         // Create the alloca.  If we have TargetData, use nice alignment.
         unsigned Align = 1;
-        if (IFI.TD) Align = IFI.TD->getPrefTypeAlignment(AggTy);
-        Value *NewAlloca = new AllocaInst(AggTy, 0, Align, 
-                                          I->getName(), 
+        if (IFI.TD)
+          Align = IFI.TD->getPrefTypeAlignment(AggTy);
+        
+        // If the byval had an alignment specified, we *must* use at least that
+        // alignment, as it is required by the byval argument (and uses of the
+        // pointer inside the callee).
+        Align = std::max(Align, CalledFunc->getParamAlignment(ArgNo+1));
+        
+        Value *NewAlloca = new AllocaInst(AggTy, 0, Align, I->getName(), 
                                           &*Caller->begin()->begin());
         // Emit a memcpy.
         const Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)};
-- 
cgit v1.1


From e7ae705c32906979a527926864345016e76867b9 Mon Sep 17 00:00:00 2001
From: Chris Lattner <sabre@nondot.org>
Date: Mon, 20 Dec 2010 07:57:41 +0000
Subject: pull byval processing out to its own helper function.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@122235 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Utils/InlineFunction.cpp | 128 ++++++++++++++++++--------------
 1 file changed, 72 insertions(+), 56 deletions(-)

(limited to 'lib/Transforms/Utils/InlineFunction.cpp')

diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 3f50ccd..bca9fc4 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -229,6 +229,71 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
   CallerNode->removeCallEdgeFor(CS);
 }
 
+static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
+                                  const Function *CalledFunc,
+                                  InlineFunctionInfo &IFI,
+                                  unsigned ByValAlignment) {
+  if (CalledFunc->onlyReadsMemory())
+    return Arg;
+  
+  LLVMContext &Context = Arg->getContext();
+
+  
+  const Type *AggTy = cast<PointerType>(Arg->getType())->getElementType();
+  const Type *VoidPtrTy = Type::getInt8PtrTy(Context);
+  
+  // Create the alloca.  If we have TargetData, use nice alignment.
+  unsigned Align = 1;
+  if (IFI.TD)
+    Align = IFI.TD->getPrefTypeAlignment(AggTy);
+  
+  // If the byval had an alignment specified, we *must* use at least that
+  // alignment, as it is required by the byval argument (and uses of the
+  // pointer inside the callee).
+  Align = std::max(Align, ByValAlignment);
+  
+  Function *Caller = TheCall->getParent()->getParent(); 
+  
+  Value *NewAlloca = new AllocaInst(AggTy, 0, Align, Arg->getName(), 
+                                    &*Caller->begin()->begin());
+  // Emit a memcpy.
+  const Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)};
+  Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(),
+                                                 Intrinsic::memcpy, 
+                                                 Tys, 3);
+  Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall);
+  Value *SrcCast = new BitCastInst(Arg, VoidPtrTy, "tmp", TheCall);
+  
+  Value *Size;
+  if (IFI.TD == 0)
+    Size = ConstantExpr::getSizeOf(AggTy);
+  else
+    Size = ConstantInt::get(Type::getInt64Ty(Context),
+                            IFI.TD->getTypeStoreSize(AggTy));
+  
+  // Always generate a memcpy of alignment 1 here because we don't know
+  // the alignment of the src pointer.  Other optimizations can infer
+  // better alignment.
+  Value *CallArgs[] = {
+    DestCast, SrcCast, Size,
+    ConstantInt::get(Type::getInt32Ty(Context), 1),
+    ConstantInt::getFalse(Context) // isVolatile
+  };
+  CallInst *TheMemCpy =
+    CallInst::Create(MemCpyFn, CallArgs, CallArgs+5, "", TheCall);
+  
+  // If we have a call graph, update it.
+  if (CallGraph *CG = IFI.CG) {
+    CallGraphNode *MemCpyCGN = CG->getOrInsertFunction(MemCpyFn);
+    CallGraphNode *CallerNode = (*CG)[Caller];
+    CallerNode->addCalledFunction(TheMemCpy, MemCpyCGN);
+  }
+  
+  // Uses of the argument in the function should use our new alloca
+  // instead.
+  return NewAlloca;
+}
+
 // InlineFunction - This function inlines the called function into the basic
 // block of the caller.  This returns false if it is not possible to inline this
 // call.  The program is still in a well defined state if this occurs though.
@@ -304,63 +369,14 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
       // by them explicit.  However, we don't do this if the callee is readonly
       // or readnone, because the copy would be unneeded: the callee doesn't
       // modify the struct.
-      if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal) &&
-          !CalledFunc->onlyReadsMemory()) {
-        const Type *AggTy = cast<PointerType>(I->getType())->getElementType();
-        const Type *VoidPtrTy = Type::getInt8PtrTy(Context);
-
-        // Create the alloca.  If we have TargetData, use nice alignment.
-        unsigned Align = 1;
-        if (IFI.TD)
-          Align = IFI.TD->getPrefTypeAlignment(AggTy);
-        
-        // If the byval had an alignment specified, we *must* use at least that
-        // alignment, as it is required by the byval argument (and uses of the
-        // pointer inside the callee).
-        Align = std::max(Align, CalledFunc->getParamAlignment(ArgNo+1));
-        
-        Value *NewAlloca = new AllocaInst(AggTy, 0, Align, I->getName(), 
-                                          &*Caller->begin()->begin());
-        // Emit a memcpy.
-        const Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)};
-        Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(),
-                                                       Intrinsic::memcpy, 
-                                                       Tys, 3);
-        Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall);
-        Value *SrcCast = new BitCastInst(*AI, VoidPtrTy, "tmp", TheCall);
-
-        Value *Size;
-        if (IFI.TD == 0)
-          Size = ConstantExpr::getSizeOf(AggTy);
-        else
-          Size = ConstantInt::get(Type::getInt64Ty(Context),
-                                  IFI.TD->getTypeStoreSize(AggTy));
-
-        // Always generate a memcpy of alignment 1 here because we don't know
-        // the alignment of the src pointer.  Other optimizations can infer
-        // better alignment.
-        Value *CallArgs[] = {
-          DestCast, SrcCast, Size,
-          ConstantInt::get(Type::getInt32Ty(Context), 1),
-          ConstantInt::getFalse(Context) // isVolatile
-        };
-        CallInst *TheMemCpy =
-          CallInst::Create(MemCpyFn, CallArgs, CallArgs+5, "", TheCall);
-
-        // If we have a call graph, update it.
-        if (CallGraph *CG = IFI.CG) {
-          CallGraphNode *MemCpyCGN = CG->getOrInsertFunction(MemCpyFn);
-          CallGraphNode *CallerNode = (*CG)[Caller];
-          CallerNode->addCalledFunction(TheMemCpy, MemCpyCGN);
-        }
-
-        // Uses of the argument in the function should use our new alloca
-        // instead.
-        ActualArg = NewAlloca;
-
+      if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal)) {
+        ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI,
+                                        CalledFunc->getParamAlignment(ArgNo+1));
+ 
         // Calls that we inline may use the new alloca, so we need to clear
-        // their 'tail' flags.
-        MustClearTailCallFlags = true;
+        // their 'tail' flags if HandleByValArgument introduced a new alloca and
+        // the callee has calls.
+        MustClearTailCallFlags |= ActualArg != *AI;
       }
 
       VMap[I] = ActualArg;
-- 
cgit v1.1


From 0b66f63a26387f5c0360a4324fc3c31e0599a6e0 Mon Sep 17 00:00:00 2001
From: Chris Lattner <sabre@nondot.org>
Date: Mon, 20 Dec 2010 08:10:40 +0000
Subject: when eliding a byval copy due to inlining a readonly function, we
 have to make sure that the reused alloca has sufficient alignment.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@122236 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Utils/InlineFunction.cpp | 47 ++++++++++++++++++++++++++++++---
 1 file changed, 43 insertions(+), 4 deletions(-)

(limited to 'lib/Transforms/Utils/InlineFunction.cpp')

diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index bca9fc4..76fdd09 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -229,17 +229,56 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
   CallerNode->removeCallEdgeFor(CS);
 }
 
+/// HandleByValArgument - When inlining a call site that has a byval argument,
+/// we have to make the implicit memcpy explicit by adding it.
 static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
                                   const Function *CalledFunc,
                                   InlineFunctionInfo &IFI,
                                   unsigned ByValAlignment) {
-  if (CalledFunc->onlyReadsMemory())
-    return Arg;
+  const Type *AggTy = cast<PointerType>(Arg->getType())->getElementType();
+
+  // If the called function is readonly, then it could not mutate the caller's
+  // copy of the byval'd memory.  In this case, it is safe to elide the copy and
+  // temporary.
+  if (CalledFunc->onlyReadsMemory()) {
+    // If the byval argument has a specified alignment that is greater than the
+    // passed in pointer, then we either have to round up the input pointer or
+    // give up on this transformation.
+    if (ByValAlignment <= 1)  // 0 = unspecified, 1 = no particular alignment.
+      return Arg;
+
+    // See if the argument is a (bitcasted) pointer to an alloca.  If so, we can
+    // round up the alloca if needed.
+    if (AllocaInst *AI = dyn_cast<AllocaInst>(Arg->stripPointerCasts())) {
+      unsigned AIAlign = AI->getAlignment();
+      
+      // If the alloca is known at least aligned as much as the byval, we can do
+      // this optimization.
+      if (AIAlign >= ByValAlignment)
+        return Arg;
+      
+      // If the alloca has a specified alignment that is less than the byval,
+      // then we can safely bump it up.
+      if (AIAlign) {
+        AI->setAlignment(ByValAlignment);
+        return Arg;
+      }
+      
+      // If the alignment has an unspecified alignment, then we can only modify
+      // it if we have TD information.  Doing so without TD info could end up
+      // with us rounding the alignment *down* accidentally, which is badness.
+      if (IFI.TD) {
+        AIAlign = std::max(ByValAlignment, IFI.TD->getPrefTypeAlignment(AggTy));
+        AI->setAlignment(AIAlign);
+        return Arg;
+      }
+    }
+    
+    // Otherwise, we have to make a memcpy to get a safe alignment, pretty lame.
+  }
   
   LLVMContext &Context = Arg->getContext();
 
-  
-  const Type *AggTy = cast<PointerType>(Arg->getType())->getElementType();
   const Type *VoidPtrTy = Type::getInt8PtrTy(Context);
   
   // Create the alloca.  If we have TargetData, use nice alignment.
-- 
cgit v1.1


From 7569d79de1bd97a6a17b81340ea6ce97d8a3c279 Mon Sep 17 00:00:00 2001
From: Chris Lattner <sabre@nondot.org>
Date: Sat, 25 Dec 2010 20:42:38 +0000
Subject: switch the inliner alignment enforcement stuff to use the
 getOrEnforceKnownAlignment function, which simplifies the code and makes it
 stronger.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@122555 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Utils/InlineFunction.cpp | 35 ++++++++-------------------------
 1 file changed, 8 insertions(+), 27 deletions(-)

(limited to 'lib/Transforms/Utils/InlineFunction.cpp')

diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 76fdd09..c1faf24 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -24,6 +24,7 @@
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/CallSite.h"
@@ -247,34 +248,14 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
     if (ByValAlignment <= 1)  // 0 = unspecified, 1 = no particular alignment.
       return Arg;
 
-    // See if the argument is a (bitcasted) pointer to an alloca.  If so, we can
-    // round up the alloca if needed.
-    if (AllocaInst *AI = dyn_cast<AllocaInst>(Arg->stripPointerCasts())) {
-      unsigned AIAlign = AI->getAlignment();
-      
-      // If the alloca is known at least aligned as much as the byval, we can do
-      // this optimization.
-      if (AIAlign >= ByValAlignment)
-        return Arg;
-      
-      // If the alloca has a specified alignment that is less than the byval,
-      // then we can safely bump it up.
-      if (AIAlign) {
-        AI->setAlignment(ByValAlignment);
-        return Arg;
-      }
-      
-      // If the alignment has an unspecified alignment, then we can only modify
-      // it if we have TD information.  Doing so without TD info could end up
-      // with us rounding the alignment *down* accidentally, which is badness.
-      if (IFI.TD) {
-        AIAlign = std::max(ByValAlignment, IFI.TD->getPrefTypeAlignment(AggTy));
-        AI->setAlignment(AIAlign);
-        return Arg;
-      }
-    }
+    // If the pointer is already known to be sufficiently aligned, or if we can
+    // round it up to a larger alignment, then we don't need a temporary.
+    if (getOrEnforceKnownAlignment(Arg, ByValAlignment,
+                                   IFI.TD) >= ByValAlignment)
+      return Arg;
     
-    // Otherwise, we have to make a memcpy to get a safe alignment, pretty lame.
+    // Otherwise, we have to make a memcpy to get a safe alignment.  This is bad
+    // for code quality, but rarely happens and is required for correctness.
   }
   
   LLVMContext &Context = Arg->getContext();
-- 
cgit v1.1