3 files changed, 227 insertions, 113 deletions
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 1b51255..7afca82 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -30,6 +30,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
@@ -45,6 +46,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Metadata.h"
+#include "llvm/InstVisitor.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include <algorithm>
@@ -56,56 +58,13 @@ STATISTIC(NumSingleStore,   "Number of alloca's promoted with a single store");
 STATISTIC(NumDeadAlloca,    "Number of dead alloca's removed");
 STATISTIC(NumPHIInsert,     "Number of PHI nodes inserted");
 
-bool llvm::isAllocaPromotable(const AllocaInst *AI) {
-  // FIXME: If the memory unit is of pointer or integer type, we can permit
-  // assignments to subsections of the memory unit.
-
-  // Only allow direct and non-volatile loads and stores...
-  for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end();
-       UI != UE; ++UI) { // Loop over all of the uses of the alloca
-    const User *U = *UI;
-    if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
-      // Note that atomic loads can be transformed; atomic semantics do
-      // not have any meaning for a local alloca.
-      if (LI->isVolatile())
-        return false;
-    } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
-      if (SI->getOperand(0) == AI)
-        return false; // Don't allow a store OF the AI, only INTO the AI.
-      // Note that atomic stores can be transformed; atomic semantics do
-      // not have any meaning for a local alloca.
-      if (SI->isVolatile())
-        return false;
-    } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
-      if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
-          II->getIntrinsicID() != Intrinsic::lifetime_end)
-        return false;
-    } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
-      if (BCI->getType() != Type::getInt8PtrTy(U->getContext()))
-        return false;
-      if (!onlyUsedByLifetimeMarkers(BCI))
-        return false;
-    } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
-      if (GEPI->getType() != Type::getInt8PtrTy(U->getContext()))
-        return false;
-      if (!GEPI->hasAllZeroIndices())
-        return false;
-      if (!onlyUsedByLifetimeMarkers(GEPI))
-        return false;
-    } else {
-      return false;
-    }
-  }
-
-  return true;
-}
-
 namespace {
 
-struct AllocaInfo {
+struct AllocaInfo : private InstVisitor<AllocaInfo, bool> {
   SmallVector<BasicBlock *, 32> DefiningBlocks;
   SmallVector<BasicBlock *, 32> UsingBlocks;
 
+  Type *AllocaTy;
   StoreInst *OnlyStore;
   BasicBlock *OnlyBlock;
   bool OnlyUsedInOneBlock;
@@ -116,6 +75,7 @@ struct AllocaInfo {
   void clear() {
     DefiningBlocks.clear();
     UsingBlocks.clear();
+    AllocaTy = 0;
     OnlyStore = 0;
     OnlyBlock = 0;
     OnlyUsedInOneBlock = true;
@@ -125,39 +85,107 @@ struct AllocaInfo {
 
   /// Scan the uses of the specified alloca, filling in the AllocaInfo used
   /// by the rest of the pass to reason about the uses of this alloca.
-  void AnalyzeAlloca(AllocaInst *AI) {
+  bool analyzeAlloca(AllocaInst &AI) {
     clear();
 
-    // As we scan the uses of the alloca instruction, keep track of stores,
-    // and decide whether all of the loads and stores to the alloca are within
-    // the same basic block.
-    for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
-         UI != E;) {
-      Instruction *User = cast<Instruction>(*UI++);
-
-      if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
-        // Remember the basic blocks which define new values for the alloca
-        DefiningBlocks.push_back(SI->getParent());
-        AllocaPointerVal = SI->getOperand(0);
-        OnlyStore = SI;
-      } else {
-        LoadInst *LI = cast<LoadInst>(User);
-        // Otherwise it must be a load instruction, keep track of variable
-        // reads.
-        UsingBlocks.push_back(LI->getParent());
-        AllocaPointerVal = LI;
-      }
+    AllocaTy = AI.getAllocatedType();
+    enqueueUsers(AI);
+
+    // Walk queued up uses in the worklist to handle nested uses.
+    while (!UseWorklist.empty()) {
+      U = UseWorklist.pop_back_val();
+      Instruction &I = *cast<Instruction>(U->getUser());
+      if (!visit(I))
+        return false; // Propagate failure to promote up.
 
       if (OnlyUsedInOneBlock) {
         if (OnlyBlock == 0)
-          OnlyBlock = User->getParent();
-        else if (OnlyBlock != User->getParent())
+          OnlyBlock = I.getParent();
+        else if (OnlyBlock != I.getParent())
           OnlyUsedInOneBlock = false;
       }
     }
 
-    DbgDeclare = FindAllocaDbgDeclare(AI);
+    DbgDeclare = FindAllocaDbgDeclare(&AI);
+    return true;
+  }
+
+private:
+  // Befriend the base class so it can call through private visitor methods.
+  friend class InstVisitor<AllocaInfo, bool>;
+
+  /// \brief A use pointer that is non-null when visiting uses.
+  Use *U;
+
+  /// \brief A worklist for recursively visiting all uses of an alloca.
+  SmallVector<Use *, 8> UseWorklist;
+
+  /// \brief A set for preventing cyclic visitation.
+  SmallPtrSet<Use *, 8> VisitedUses;
+
+  void enqueueUsers(Instruction &I) {
+    for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE;
+         ++UI)
+      if (VisitedUses.insert(&UI.getUse()))
+        UseWorklist.push_back(&UI.getUse());
+  }
+
+  bool visitLoadInst(LoadInst &LI) {
+    if (LI.isVolatile() || LI.getType() != AllocaTy)
+      return false;
+
+    // Keep track of variable reads.
+    UsingBlocks.push_back(LI.getParent());
+    AllocaPointerVal = &LI;
+    return true;
+  }
+
+  bool visitStoreInst(StoreInst &SI) {
+    if (SI.isVolatile() || SI.getValueOperand() == U->get() ||
+        SI.getValueOperand()->getType() != AllocaTy)
+      return false;
+
+    // Remember the basic blocks which define new values for the alloca
+    DefiningBlocks.push_back(SI.getParent());
+    AllocaPointerVal = SI.getOperand(0);
+    OnlyStore = &SI;
+    return true;
+  }
+
+  bool visitBitCastInst(BitCastInst &BC) {
+    enqueueUsers(BC);
+    return true;
   }
+
+  bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
+    if (GEPI.use_empty())
+      return true;
+
+    enqueueUsers(GEPI);
+
+    return GEPI.hasAllZeroIndices();
+  }
+
+  // We can promote through debug info intrinsics as they don't alter the
+  // value stored in memory.
+  bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &I) { return true; }
+
+  bool visitIntrinsicInst(IntrinsicInst &II) {
+    switch (II.getIntrinsicID()) {
+    default:
+      return false;
+
+      // Lifetime intrinsics don't preclude promoting the memory to a register.
+      // FIXME: We should use these to promote to undef when outside of a valid
+      // lifetime.
+    case Intrinsic::lifetime_start:
+    case Intrinsic::lifetime_end:
+      return true;
+    }
+  }
+
+  // The fallback is that the alloca cannot be promoted.
+  bool visitInstruction(Instruction &I) { return false; }
 };
 
 // Data package used by RenamePass()
@@ -316,24 +344,56 @@ private:
 static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
   // Knowing that this alloca is promotable, we know that it's safe to kill all
   // instructions except for load and store.
+  // FIXME: This function isn't actually specific to lifetime instrinsics. It
+  // also is redundant with the actual analysis of the loads and stores and
+  // should be folded into that.
+
+  SmallVector<Instruction *, 8> Worklist;
+  SmallPtrSet<Instruction *, 8> Visited;
+  SmallVector<Instruction *, 8> Dead;
+  Worklist.push_back(AI);
+
+  do {
+    Instruction *I = Worklist.pop_back_val();
 
-  for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end();
-       UI != UE;) {
-    Instruction *I = cast<Instruction>(*UI);
-    ++UI;
-    if (isa<LoadInst>(I) || isa<StoreInst>(I))
+    if (I->use_empty()) {
+      Dead.push_back(I);
       continue;
+    }
 
-    if (!I->getType()->isVoidTy()) {
-      // The only users of this bitcast/GEP instruction are lifetime intrinsics.
-      // Follow the use/def chain to erase them now instead of leaving it for
-      // dead code elimination later.
-      for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
-           UI != UE;) {
-        Instruction *Inst = cast<Instruction>(*UI);
-        ++UI;
-        Inst->eraseFromParent();
-      }
+    for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+         UI != UE; ++UI)
+      if (!isa<LoadInst>(*UI) && !isa<StoreInst>(*UI) &&
+          Visited.insert(cast<Instruction>(*UI)))
+        Worklist.push_back(cast<Instruction>(*UI));
+  } while (!Worklist.empty());
+
+  while (!Dead.empty()) {
+    Instruction *I = Dead.pop_back_val();
+
+    // Don't delete the alloca itself.
+    if (I == AI)
+      continue;
+
+    // Note that we open code the deletion algorithm here because we know
+    // apriori that all of the instructions using an alloca that reaches here
+    // are trivially dead when their use list becomes empty (The only risk are
+    // lifetime markers which we specifically want to nuke). By coding it here
+    // we can skip the triviality test and be more efficient.
+    //
+    // Null out all of the instruction's operands to see if any operand becomes
+    // dead as we go.
+    for (User::op_iterator OI = I->op_begin(), OE = I->op_end(); OI != OE;
+         ++OI) {
+      Instruction *Op = dyn_cast<Instruction>(*OI);
+      if (!Op)
+        continue;
+
+      OI->set(0);
+      if (!Op->use_empty())
+        continue;
+
+      Dead.push_back(Op);
     }
     I->eraseFromParent();
   }
@@ -540,7 +600,7 @@ void PromoteMem2Reg::run() {
   for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) {
     AllocaInst *AI = Allocas[AllocaNum];
 
-    assert(isAllocaPromotable(AI) && "Cannot promote non-promotable alloca!");
+    //assert(isAllocaPromotable(AI) && "Cannot promote non-promotable alloca!");
     assert(AI->getParent()->getParent() == &F &&
            "All allocas should be in the same function, which is same as DF!");
 
@@ -560,7 +620,9 @@ void PromoteMem2Reg::run() {
 
     // Calculate the set of read and write-locations for each alloca.  This is
     // analogous to finding the 'uses' and 'definitions' of each variable.
-    Info.AnalyzeAlloca(AI);
+    bool Good = Info.analyzeAlloca(*AI);
+    (void)Good;
+    assert(Good && "Cannot promote non-promotable alloca!");
 
     // If there is only a single store to this value, replace any loads of
     // it that are directly dominated by the definition with the value stored.
@@ -1087,8 +1149,16 @@ NextIteration:
   goto NextIteration;
 }
 
-void llvm::PromoteMemToReg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
-                           AliasSetTracker *AST) {
+bool llvm::isAllocaPromotable(const AllocaInst *AI) {
+  // We cast away constness because we re-use the non-const analysis that the
+  // actual promotion routine uses. While it is non-const, it doesn't actually
+  // mutate anything at this phase, and we discard the non-const results that
+  // promotion uses to mutate the alloca.
+  return AllocaInfo().analyzeAlloca(*const_cast<AllocaInst *>(AI));
+}
+
+void llvm::PromoteMemToReg(ArrayRef<AllocaInst *> Allocas,
+                           DominatorTree &DT, AliasSetTracker *AST) {
   // If there is nothing to do, bail out...
   if (Allocas.empty())
     return;
diff --git a/test/Transforms/Mem2Reg/ignore-lifetime.ll b/test/Transforms/Mem2Reg/ignore-lifetime.ll
deleted file mode 100644
index 5e4f9bf..0000000
--- a/test/Transforms/Mem2Reg/ignore-lifetime.ll
+++ /dev/null
@@ -1,26 +0,0 @@
-; RUN: opt -mem2reg -S -o - < %s | FileCheck %s
-
-declare void @llvm.lifetime.start(i64 %size, i8* nocapture %ptr)
-declare void @llvm.lifetime.end(i64 %size, i8* nocapture %ptr)
-
-define void @test1() {
-; CHECK: test1
-; CHECK-NOT: alloca
-  %A = alloca i32
-  %B = bitcast i32* %A to i8*
-  call void @llvm.lifetime.start(i64 2, i8* %B)
-  store i32 1, i32* %A
-  call void @llvm.lifetime.end(i64 2, i8* %B)
-  ret void
-}
-
-define void @test2() {
-; CHECK: test2
-; CHECK-NOT: alloca
-  %A = alloca {i8, i16}
-  %B = getelementptr {i8, i16}* %A, i32 0, i32 0
-  call void @llvm.lifetime.start(i64 2, i8* %B)
-  store {i8, i16} zeroinitializer, {i8, i16}* %A
-  call void @llvm.lifetime.end(i64 2, i8* %B)
-  ret void
-}
diff --git a/test/Transforms/Mem2Reg/use-analysis.ll b/test/Transforms/Mem2Reg/use-analysis.ll
new file mode 100644
index 0000000..b08b1f1
--- /dev/null
+++ b/test/Transforms/Mem2Reg/use-analysis.ll
@@ -0,0 +1,70 @@
+; RUN: opt -mem2reg -S -o - < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+
+declare void @llvm.lifetime.start(i64 %size, i8* nocapture %ptr)
+declare void @llvm.lifetime.end(i64 %size, i8* nocapture %ptr)
+
+define void @test1() {
+; Ensure we can look through a bitcast to i8* and the addition of lifetime
+; markers.
+;
+; CHECK-LABEL: @test1(
+; CHECK-NOT: alloca
+; CHECK: ret void
+
+  %A = alloca i32
+  %B = bitcast i32* %A to i8*
+  call void @llvm.lifetime.start(i64 2, i8* %B)
+  store i32 1, i32* %A
+  call void @llvm.lifetime.end(i64 2, i8* %B)
+  ret void
+}
+
+define void @test2() {
+; Ensure we can look through a GEP to i8* and the addition of lifetime
+; markers.
+;
+; CHECK-LABEL: @test2(
+; CHECK-NOT: alloca
+; CHECK: ret void
+
+  %A = alloca {i8, i16}
+  %B = getelementptr {i8, i16}* %A, i32 0, i32 0
+  call void @llvm.lifetime.start(i64 2, i8* %B)
+  store {i8, i16} zeroinitializer, {i8, i16}* %A
+  call void @llvm.lifetime.end(i64 2, i8* %B)
+  ret void
+}
+
+define i32 @test3(i32 %x) {
+; CHECK-LABEL: @test3(
+;
+; Check that we recursively walk the uses of the alloca and thus can see
+; through round trip bitcasts, dead bitcasts, GEPs, multiple GEPs, and lifetime
+; markers.
+entry:
+  %a = alloca i32
+; CHECK-NOT: alloca
+
+  %b = bitcast i32* %a to i8*
+  %b2 = getelementptr inbounds i8* %b, i32 0
+  %b3 = getelementptr inbounds i8* %b2, i32 0
+  call void @llvm.lifetime.start(i64 -1, i8* %b3)
+; CHECK-NOT: call void @llvm.lifetime.start
+
+  store i32 %x, i32* %a
+; CHECK-NOT: store
+
+  %dead = bitcast i32* %a to i4096*
+  %dead1 = bitcast i4096* %dead to i42*
+  %dead2 = getelementptr inbounds i32* %a, i32 %x
+; CHECK-NOT: bitcast
+; CHECK-NOT: getelementptr
+
+  %ret = load i32* %a
+; CHECK-NOT: load
+
+  ret i32 %ret
+; CHECK: ret i32 %x
+}