2 files changed, 51 insertions, 15 deletions
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index a83c4e6..021f263 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -498,13 +498,19 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
         ++OldI;
       }
     }
-    // NOTE: We cannot eliminate single entry phi nodes here, because of
-    // VMap.  Single entry phi nodes can have multiple VMap entries
-    // pointing at them.  Thus, deleting one would require scanning the VMap
-    // to update any entries in it that would require that.  This would be
-    // really slow.
   }
-  
+
+  // Make a second pass over the PHINodes now that all of them have been
+  // remapped into the new function, simplifying the PHINode and performing any
+  // recursive simplifications exposed. This will transparently update the
+  // TrackingVH in the VMap. Notably, we rely on that so that if we coalesce
+  // two PHINodes, the iteration over the old PHIs remains valid, and the
+  // mapping will just map us to the new node (which may not even be a PHI
+  // node).
+  for (unsigned Idx = 0, Size = PHIToResolve.size(); Idx != Size; ++Idx)
+    if (PHINode *PN = dyn_cast<PHINode>(VMap[PHIToResolve[Idx]]))
+      recursivelySimplifyInstruction(PN, TD);
+
   // Now that the inlined function body has been fully constructed, go through
   // and zap unconditional fall-through branches.  This happen all the time when
   // specializing code: code specialization turns conditional branches into
@@ -514,15 +520,15 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
     BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator());
     if (!BI || BI->isConditional()) { ++I; continue; }
     
-    // Note that we can't eliminate uncond branches if the destination has
-    // single-entry PHI nodes.  Eliminating the single-entry phi nodes would
-    // require scanning the VMap to update any entries that point to the phi
-    // node.
     BasicBlock *Dest = BI->getSuccessor(0);
-    if (!Dest->getSinglePredecessor() || isa<PHINode>(Dest->begin())) {
+    if (!Dest->getSinglePredecessor()) {
       ++I; continue;
     }
-    
+
+    // We shouldn't be able to get single-entry PHI nodes here, as instsimplify
+    // above should have zapped all of them..
+    assert(!isa<PHINode>(Dest->begin()));
+
     // We know all single-entry PHI nodes in the inlined function have been
     // removed, so we just need to splice the blocks.
     BI->eraseFromParent();
diff --git a/test/Transforms/Inline/inline_cleanup.ll b/test/Transforms/Inline/inline_cleanup.ll
index 7583ddd..cf57cf0 100644
--- a/test/Transforms/Inline/inline_cleanup.ll
+++ b/test/Transforms/Inline/inline_cleanup.ll
@@ -74,7 +74,7 @@ entry:
 
 declare void @f(i32 %x)
 
-define void @inner2(i32 %x, i32 %y, i32 %z) {
+define void @inner2(i32 %x, i32 %y, i32 %z, i1 %b) {
 entry:
   %cmp1 = icmp ne i32 %x, 0
   br i1 %cmp1, label %then1, label %end1
@@ -102,17 +102,47 @@ then3:
   br label %end3
 
 end3:
+  br i1 %b, label %end3.1, label %end3.2
+
+end3.1:
+  %x3.1 = or i32 %x, 10
+  br label %end3.3
+
+end3.2:
+  %x3.2 = or i32 %x, 10
+  br label %end3.3
+
+end3.3:
+  %x3.3 = phi i32 [ %x3.1, %end3.1 ], [ %x3.2, %end3.2 ]
+  %cmp4 = icmp slt i32 %x3.3, 1
+  br i1 %cmp4, label %then4, label %end4
+
+then4:
+  call void @f(i32 %x3.3)
+  br label %end4
+
+end4:
   ret void
 }
 
-define void @outer2(i32 %z) {
+define void @outer2(i32 %z, i1 %b) {
 ; Ensure that after inlining, none of the blocks with a call to @f actually
 ; make it through inlining.
 ; CHECK: define void @outer2
 ; CHECK-NOT: call
+;
+; FIXME: Currently, we aren't smart enough to delete the last dead basic block.
+; However, we do make the condition a constant. Check that at least until we can
+; start removing the block itself.
+; CHECK: br i1 false, label %[[LABEL:[a-z0-9_.]+]],
+; CHECK-NOT: call
+; CHECK: [[LABEL]]:
+; CHECK-NEXT: call void @f(i32 10)
+; CHECK-NOT: call
+;
 ; CHECK: ret void
 
 entry:
-  call void @inner2(i32 0, i32 -1, i32 %z)
+  call void @inner2(i32 0, i32 -1, i32 %z, i1 %b)
   ret void
 }