23 files changed, 2045 insertions, 1034 deletions
diff --git a/lib/Transforms/Utils/Android.mk b/lib/Transforms/Utils/Android.mk
index df87208..9bf9ef3 100644
--- a/lib/Transforms/Utils/Android.mk
+++ b/lib/Transforms/Utils/Android.mk
@@ -29,6 +29,7 @@ transforms_utils_SRC_FILES := \
   SimplifyIndVar.cpp \
   SimplifyInstructions.cpp \
   SimplifyLibCalls.cpp \
+  SpecialCaseList.cpp \
   UnifyFunctionExitNodes.cpp \
   Utils.cpp \
   ValueMapper.cpp
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index ba99d2e..e17a416 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -14,6 +14,7 @@
 
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
@@ -170,7 +171,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
       if (DomTreeNode *DTN = DT->getNode(BB)) {
         DomTreeNode *PredDTN = DT->getNode(PredBB);
         SmallVector<DomTreeNode*, 8> Children(DTN->begin(), DTN->end());
-        for (SmallVector<DomTreeNode*, 8>::iterator DI = Children.begin(),
+        for (SmallVectorImpl<DomTreeNode *>::iterator DI = Children.begin(),
              DE = Children.end(); DI != DE; ++DI)
           DT->changeImmediateDominator(*DI, PredDTN);
 
@@ -235,22 +236,6 @@ void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) {
   ReplaceInstWithInst(From->getParent()->getInstList(), BI, To);
 }
 
-/// GetSuccessorNumber - Search for the specified successor of basic block BB
-/// and return its position in the terminator instruction's list of
-/// successors.  It is an error to call this with a block that is not a
-/// successor.
-unsigned llvm::GetSuccessorNumber(BasicBlock *BB, BasicBlock *Succ) {
-  TerminatorInst *Term = BB->getTerminator();
-#ifndef NDEBUG
-  unsigned e = Term->getNumSuccessors();
-#endif
-  for (unsigned i = 0; ; ++i) {
-    assert(i != e && "Didn't find edge?");
-    if (Term->getSuccessor(i) == Succ)
-      return i;
-  }
-}
-
 /// SplitEdge -  Split the edge connecting specified block. Pass P must
 /// not be NULL.
 BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
@@ -598,52 +583,6 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
   }
 }
 
-/// FindFunctionBackedges - Analyze the specified function to find all of the
-/// loop backedges in the function and return them.  This is a relatively cheap
-/// (compared to computing dominators and loop info) analysis.
-///
-/// The output is added to Result, as pairs of <from,to> edge info.
-void llvm::FindFunctionBackedges(const Function &F,
-     SmallVectorImpl<std::pair<const BasicBlock*,const BasicBlock*> > &Result) {
-  const BasicBlock *BB = &F.getEntryBlock();
-  if (succ_begin(BB) == succ_end(BB))
-    return;
-
-  SmallPtrSet<const BasicBlock*, 8> Visited;
-  SmallVector<std::pair<const BasicBlock*, succ_const_iterator>, 8> VisitStack;
-  SmallPtrSet<const BasicBlock*, 8> InStack;
-
-  Visited.insert(BB);
-  VisitStack.push_back(std::make_pair(BB, succ_begin(BB)));
-  InStack.insert(BB);
-  do {
-    std::pair<const BasicBlock*, succ_const_iterator> &Top = VisitStack.back();
-    const BasicBlock *ParentBB = Top.first;
-    succ_const_iterator &I = Top.second;
-
-    bool FoundNew = false;
-    while (I != succ_end(ParentBB)) {
-      BB = *I++;
-      if (Visited.insert(BB)) {
-        FoundNew = true;
-        break;
-      }
-      // Successor is in VisitStack, it's a back edge.
-      if (InStack.count(BB))
-        Result.push_back(std::make_pair(ParentBB, BB));
-    }
-
-    if (FoundNew) {
-      // Go down one level if there is a unvisited successor.
-      InStack.insert(BB);
-      VisitStack.push_back(std::make_pair(BB, succ_begin(BB)));
-    } else {
-      // Go up one level.
-      InStack.erase(VisitStack.pop_back_val().first);
-    }
-  } while (!VisitStack.empty());
-}
-
 /// FoldReturnIntoUncondBranch - This method duplicates the specified return
 /// instruction into a predecessor which ends in an unconditional branch. If
 /// the return instruction returns a value defined by a PHI, propagate the
@@ -726,3 +665,104 @@ TerminatorInst *llvm::SplitBlockAndInsertIfThen(Instruction *Cmp,
   ReplaceInstWithInst(HeadOldTerm, HeadNewTerm);
   return CheckTerm;
 }
+
+/// GetIfCondition - Given a basic block (BB) with two predecessors,
+/// check to see if the merge at this block is due
+/// to an "if condition".  If so, return the boolean condition that determines
+/// which entry into BB will be taken.  Also, return by references the block
+/// that will be entered from if the condition is true, and the block that will
+/// be entered if the condition is false.
+///
+/// This does no checking to see if the true/false blocks have large or unsavory
+/// instructions in them.
+Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
+                             BasicBlock *&IfFalse) {
+  PHINode *SomePHI = dyn_cast<PHINode>(BB->begin());
+  BasicBlock *Pred1 = NULL;
+  BasicBlock *Pred2 = NULL;
+
+  if (SomePHI) {
+    if (SomePHI->getNumIncomingValues() != 2)
+      return NULL;
+    Pred1 = SomePHI->getIncomingBlock(0);
+    Pred2 = SomePHI->getIncomingBlock(1);
+  } else {
+    pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+    if (PI == PE) // No predecessor
+      return NULL;
+    Pred1 = *PI++;
+    if (PI == PE) // Only one predecessor
+      return NULL;
+    Pred2 = *PI++;
+    if (PI != PE) // More than two predecessors
+      return NULL;
+  }
+
+  // We can only handle branches.  Other control flow will be lowered to
+  // branches if possible anyway.
+  BranchInst *Pred1Br = dyn_cast<BranchInst>(Pred1->getTerminator());
+  BranchInst *Pred2Br = dyn_cast<BranchInst>(Pred2->getTerminator());
+  if (Pred1Br == 0 || Pred2Br == 0)
+    return 0;
+
+  // Eliminate code duplication by ensuring that Pred1Br is conditional if
+  // either are.
+  if (Pred2Br->isConditional()) {
+    // If both branches are conditional, we don't have an "if statement".  In
+    // reality, we could transform this case, but since the condition will be
+    // required anyway, we stand no chance of eliminating it, so the xform is
+    // probably not profitable.
+    if (Pred1Br->isConditional())
+      return 0;
+
+    std::swap(Pred1, Pred2);
+    std::swap(Pred1Br, Pred2Br);
+  }
+
+  if (Pred1Br->isConditional()) {
+    // The only thing we have to watch out for here is to make sure that Pred2
+    // doesn't have incoming edges from other blocks.  If it does, the condition
+    // doesn't dominate BB.
+    if (Pred2->getSinglePredecessor() == 0)
+      return 0;
+
+    // If we found a conditional branch predecessor, make sure that it branches
+    // to BB and Pred2Br.  If it doesn't, this isn't an "if statement".
+    if (Pred1Br->getSuccessor(0) == BB &&
+        Pred1Br->getSuccessor(1) == Pred2) {
+      IfTrue = Pred1;
+      IfFalse = Pred2;
+    } else if (Pred1Br->getSuccessor(0) == Pred2 &&
+               Pred1Br->getSuccessor(1) == BB) {
+      IfTrue = Pred2;
+      IfFalse = Pred1;
+    } else {
+      // We know that one arm of the conditional goes to BB, so the other must
+      // go somewhere unrelated, and this must not be an "if statement".
+      return 0;
+    }
+
+    return Pred1Br->getCondition();
+  }
+
+  // Ok, if we got here, both predecessors end with an unconditional branch to
+  // BB.  Don't panic!  If both blocks only have a single (identical)
+  // predecessor, and THAT is a conditional branch, then we're all ok!
+  BasicBlock *CommonPred = Pred1->getSinglePredecessor();
+  if (CommonPred == 0 || CommonPred != Pred2->getSinglePredecessor())
+    return 0;
+
+  // Otherwise, if this is a conditional branch, then we can use it!
+  BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator());
+  if (BI == 0) return 0;
+
+  assert(BI->isConditional() && "Two successors but not conditional?");
+  if (BI->getSuccessor(0) == Pred1) {
+    IfTrue = Pred1;
+    IfFalse = Pred2;
+  } else {
+    IfTrue = Pred2;
+    IfFalse = Pred1;
+  }
+  return BI->getCondition();
+}
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 8513772..8f3ff96 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/ProfileInfo.h"
@@ -84,39 +85,6 @@ bool BreakCriticalEdges::runOnFunction(Function &F) {
 //    Implementation of the external critical edge manipulation functions
 //===----------------------------------------------------------------------===//
 
-// isCriticalEdge - Return true if the specified edge is a critical edge.
-// Critical edges are edges from a block with multiple successors to a block
-// with multiple predecessors.
-//
-bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum,
-                          bool AllowIdenticalEdges) {
-  assert(SuccNum < TI->getNumSuccessors() && "Illegal edge specification!");
-  if (TI->getNumSuccessors() == 1) return false;
-
-  const BasicBlock *Dest = TI->getSuccessor(SuccNum);
-  const_pred_iterator I = pred_begin(Dest), E = pred_end(Dest);
-
-  // If there is more than one predecessor, this is a critical edge...
-  assert(I != E && "No preds, but we have an edge to the block?");
-  const BasicBlock *FirstPred = *I;
-  ++I;        // Skip one edge due to the incoming arc from TI.
-  if (!AllowIdenticalEdges)
-    return I != E;
-
-  // If AllowIdenticalEdges is true, then we allow this edge to be considered
-  // non-critical iff all preds come from TI's block.
-  while (I != E) {
-    const BasicBlock *P = *I;
-    if (P != FirstPred)
-      return true;
-    // Note: leave this as is until no one ever compiles with either gcc 4.0.1
-    // or Xcode 2. This seems to work around the pred_iterator assert in PR 2207
-    E = pred_end(P);
-    ++I;
-  }
-  return false;
-}
-
 /// createPHIsForSplitLoopExit - When a loop exit edge is split, LCSSA form
 /// may require new PHIs in the new exit block. This function inserts the
 /// new PHIs, as needed. Preds is a list of preds inside the loop, SplitBB
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index b71628b..3648fd6 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -25,9 +25,11 @@ add_llvm_library(LLVMTransformUtils
   PromoteMemoryToRegister.cpp
   SSAUpdater.cpp
   SimplifyCFG.cpp
+  FlattenCFG.cpp
   SimplifyIndVar.cpp
   SimplifyInstructions.cpp
   SimplifyLibCalls.cpp
+  SpecialCaseList.cpp
   UnifyFunctionExitNodes.cpp
   Utils.cpp
   ValueMapper.cpp
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index be8d39e..d105f5e 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -78,7 +78,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
                              bool ModuleLevelChanges,
                              SmallVectorImpl<ReturnInst*> &Returns,
                              const char *NameSuffix, ClonedCodeInfo *CodeInfo,
-                             ValueMapTypeRemapper *TypeMapper) {
+                             ValueMapTypeRemapper *TypeMapper,
+                             ValueMaterializer *Materializer) {
   assert(NameSuffix && "NameSuffix cannot be null!");
 
 #ifndef NDEBUG
@@ -147,7 +148,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
     for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II)
       RemapInstruction(II, VMap,
                        ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
-                       TypeMapper);
+                       TypeMapper, Materializer);
 }
 
 /// CloneFunction - Return a copy of the specified function, but without
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index f7c659f..82013f9 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -277,8 +277,8 @@ void CodeExtractor::splitReturnBlocks() {
 
         DomTreeNode *NewNode = DT->addNewBlock(New, *I);
 
-        for (SmallVector<DomTreeNode*, 8>::iterator I = Children.begin(),
-               E = Children.end(); I != E; ++I) 
+        for (SmallVectorImpl<DomTreeNode *>::iterator I = Children.begin(),
+               E = Children.end(); I != E; ++I)
           DT->changeImmediateDominator(*I, NewNode);
       }
     }
diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp
index db525cd..0723b35 100644
--- a/lib/Transforms/Utils/DemoteRegToStack.cpp
+++ b/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -10,6 +10,7 @@
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/Analysis/CFG.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Type.h"
diff --git a/lib/Transforms/Utils/FlattenCFG.cpp b/lib/Transforms/Utils/FlattenCFG.cpp
new file mode 100644
index 0000000..9cbe15d
--- /dev/null
+++ b/lib/Transforms/Utils/FlattenCFG.cpp
@@ -0,0 +1,488 @@
+//===- FlatternCFG.cpp - Code to perform CFG flattening ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Reduce conditional branches in CFG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "flattencfg"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+using namespace llvm;
+
+namespace {
+class FlattenCFGOpt {
+  AliasAnalysis *AA;
+  /// \brief Use parallel-and or parallel-or to generate conditions for
+  /// conditional branches.
+  bool FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder, Pass *P = 0);
+  /// \brief If \param BB is the merge block of an if-region, attempt to merge
+  /// the if-region with an adjacent if-region upstream if two if-regions
+  /// contain identical instructions.
+  bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder, Pass *P = 0);
+  /// \brief Compare a pair of blocks: \p Block1 and \p Block2, which
+  /// are from two if-regions whose entry blocks are \p Head1 and \p
+  /// Head2.  \returns true if \p Block1 and \p Block2 contain identical
+  /// instructions, and have no memory reference alias with \p Head2.
+  /// This is used as a legality check for merging if-regions.
+  bool CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
+                            BasicBlock *Block1, BasicBlock *Block2);
+
+public:
+  FlattenCFGOpt(AliasAnalysis *AA) : AA(AA) {}
+  bool run(BasicBlock *BB);
+};
+}
+
+/// If \param [in] BB has more than one predecessor that is a conditional
+/// branch, attempt to use parallel and/or for the branch condition. \returns
+/// true on success.
+///
+/// Before:
+///   ......
+///   %cmp10 = fcmp une float %tmp1, %tmp2
+///   br i1 %cmp1, label %if.then, label %lor.rhs
+///
+/// lor.rhs:
+///   ......
+///   %cmp11 = fcmp une float %tmp3, %tmp4
+///   br i1 %cmp11, label %if.then, label %ifend
+///
+/// if.end:  // the merge block
+///   ......
+///
+/// if.then: // has two predecessors, both of them contains conditional branch.
+///   ......
+///   br label %if.end;
+///
+/// After:
+///  ......
+///  %cmp10 = fcmp une float %tmp1, %tmp2
+///  ......
+///  %cmp11 = fcmp une float %tmp3, %tmp4
+///  %cmp12 = or i1 %cmp10, %cmp11    // parallel-or mode.
+///  br i1 %cmp12, label %if.then, label %ifend
+///
+///  if.end:
+///    ......
+///
+///  if.then:
+///    ......
+///    br label %if.end;
+///
+///  Current implementation handles two cases.
+///  Case 1: \param BB is on the else-path.
+///
+///          BB1
+///        /     |
+///       BB2    |
+///      /   \   |
+///     BB3   \  |     where, BB1, BB2 contain conditional branches.
+///      \    |  /     BB3 contains unconditional branch.
+///       \   | /      BB4 corresponds to \param BB which is also the merge.
+///  BB => BB4
+///
+///
+///  Corresponding source code:
+///
+///  if (a == b && c == d)
+///    statement; // BB3
+///
+///  Case 2: \param BB BB is on the then-path.
+///
+///             BB1
+///          /      |
+///         |      BB2
+///         \    /    |  where BB1, BB2 contain conditional branches.
+///  BB =>   BB3      |  BB3 contains unconditiona branch and corresponds
+///           \     /    to \param BB.  BB4 is the merge.
+///             BB4
+///
+///  Corresponding source code:
+///
+///  if (a == b || c == d)
+///    statement;  // BB3
+///
+///  In both cases,  \param BB is the common successor of conditional branches.
+///  In Case 1, \param BB (BB4) has an unconditional branch (BB3) as
+///  its predecessor.  In Case 2, \param BB (BB3) only has conditional branches
+///  as its predecessors.
+///
+bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder,
+                                         Pass *P) {
+  PHINode *PHI = dyn_cast<PHINode>(BB->begin());
+  if (PHI)
+    return false; // For simplicity, avoid cases containing PHI nodes.
+
+  BasicBlock *LastCondBlock = NULL;
+  BasicBlock *FirstCondBlock = NULL;
+  BasicBlock *UnCondBlock = NULL;
+  int Idx = -1;
+
+  // Check predecessors of \param BB.
+  SmallPtrSet<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
+  for (SmallPtrSetIterator<BasicBlock *> PI = Preds.begin(), PE = Preds.end();
+       PI != PE; ++PI) {
+    BasicBlock *Pred = *PI;
+    BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator());
+
+    // All predecessors should terminate with a branch.
+    if (!PBI)
+      return false;
+
+    BasicBlock *PP = Pred->getSinglePredecessor();
+
+    if (PBI->isUnconditional()) {
+      // Case 1: Pred (BB3) is an unconditional block, it should
+      // have a single predecessor (BB2) that is also a predecessor
+      // of \param BB (BB4) and should not have address-taken.
+      // There should exist only one such unconditional
+      // branch among the predecessors.
+      if (UnCondBlock || !PP || (Preds.count(PP) == 0) ||
+          Pred->hasAddressTaken())
+        return false;
+
+      UnCondBlock = Pred;
+      continue;
+    }
+
+    // Only conditional branches are allowed beyond this point.
+    assert(PBI->isConditional());
+
+    // Condition's unique use should be the branch instruction.
+    Value *PC = PBI->getCondition();
+    if (!PC || !PC->hasOneUse())
+      return false;
+
+    if (PP && Preds.count(PP)) {
+      // These are internal condition blocks to be merged from, e.g.,
+      // BB2 in both cases.
+      // Should not be address-taken.
+      if (Pred->hasAddressTaken())
+        return false;
+
+      // Instructions in the internal condition blocks should be safe
+      // to hoist up.
+      for (BasicBlock::iterator BI = Pred->begin(), BE = PBI; BI != BE;) {
+        Instruction *CI = BI++;
+        if (isa<PHINode>(CI) || !isSafeToSpeculativelyExecute(CI))
+          return false;
+      }
+    } else {
+      // This is the condition block to be merged into, e.g. BB1 in
+      // both cases.
+      if (FirstCondBlock)
+        return false;
+      FirstCondBlock = Pred;
+    }
+
+    // Find whether BB is uniformly on the true (or false) path
+    // for all of its predecessors.
+    BasicBlock *PS1 = PBI->getSuccessor(0);
+    BasicBlock *PS2 = PBI->getSuccessor(1);
+    BasicBlock *PS = (PS1 == BB) ? PS2 : PS1;
+    int CIdx = (PS1 == BB) ? 0 : 1;
+
+    if (Idx == -1)
+      Idx = CIdx;
+    else if (CIdx != Idx)
+      return false;
+
+    // PS is the successor which is not BB. Check successors to identify
+    // the last conditional branch.
+    if (Preds.count(PS) == 0) {
+      // Case 2.
+      LastCondBlock = Pred;
+    } else {
+      // Case 1
+      BranchInst *BPS = dyn_cast<BranchInst>(PS->getTerminator());
+      if (BPS && BPS->isUnconditional()) {
+        // Case 1: PS(BB3) should be an unconditional branch.
+        LastCondBlock = Pred;
+      }
+    }
+  }
+
+  if (!FirstCondBlock || !LastCondBlock || (FirstCondBlock == LastCondBlock))
+    return false;
+
+  TerminatorInst *TBB = LastCondBlock->getTerminator();
+  BasicBlock *PS1 = TBB->getSuccessor(0);
+  BasicBlock *PS2 = TBB->getSuccessor(1);
+  BranchInst *PBI1 = dyn_cast<BranchInst>(PS1->getTerminator());
+  BranchInst *PBI2 = dyn_cast<BranchInst>(PS2->getTerminator());
+
+  // If PS1 does not jump into PS2, but PS2 jumps into PS1,
+  // attempt branch inversion.
+  if (!PBI1 || !PBI1->isUnconditional() ||
+      (PS1->getTerminator()->getSuccessor(0) != PS2)) {
+    // Check whether PS2 jumps into PS1.
+    if (!PBI2 || !PBI2->isUnconditional() ||
+        (PS2->getTerminator()->getSuccessor(0) != PS1))
+      return false;
+
+    // Do branch inversion.
+    BasicBlock *CurrBlock = LastCondBlock;
+    bool EverChanged = false;
+    while (1) {
+      BranchInst *BI = dyn_cast<BranchInst>(CurrBlock->getTerminator());
+      CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
+      CmpInst::Predicate Predicate = CI->getPredicate();
+      // Cannonicalize icmp_ne -> icmp_eq, fcmp_one -> fcmp_oeq
+      if ((Predicate == CmpInst::ICMP_NE) || (Predicate == CmpInst::FCMP_ONE)) {
+        CI->setPredicate(ICmpInst::getInversePredicate(Predicate));
+        BI->swapSuccessors();
+        EverChanged = true;
+      }
+      if (CurrBlock == FirstCondBlock)
+        break;
+      CurrBlock = CurrBlock->getSinglePredecessor();
+    }
+    return EverChanged;
+  }
+
+  // PS1 must have a conditional branch.
+  if (!PBI1 || !PBI1->isUnconditional())
+    return false;
+
+  // PS2 should not contain PHI node.
+  PHI = dyn_cast<PHINode>(PS2->begin());
+  if (PHI)
+    return false;
+
+  // Do the transformation.
+  BasicBlock *CB;
+  BranchInst *PBI = dyn_cast<BranchInst>(FirstCondBlock->getTerminator());
+  bool Iteration = true;
+  BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+  BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+  Value *PC = PBI->getCondition();
+
+  do {
+    CB = PBI->getSuccessor(1 - Idx);
+    // Delete the conditional branch.
+    FirstCondBlock->getInstList().pop_back();
+    FirstCondBlock->getInstList()
+        .splice(FirstCondBlock->end(), CB->getInstList());
+    PBI = cast<BranchInst>(FirstCondBlock->getTerminator());
+    Value *CC = PBI->getCondition();
+    // Merge conditions.
+    Builder.SetInsertPoint(PBI);
+    Value *NC;
+    if (Idx == 0)
+      // Case 2, use parallel or.
+      NC = Builder.CreateOr(PC, CC);
+    else
+      // Case 1, use parallel and.
+      NC = Builder.CreateAnd(PC, CC);
+
+    PBI->replaceUsesOfWith(CC, NC);
+    PC = NC;
+    if (CB == LastCondBlock)
+      Iteration = false;
+    // Remove internal conditional branches.
+    CB->dropAllReferences();
+    // make CB unreachable and let downstream to delete the block.
+    new UnreachableInst(CB->getContext(), CB);
+  } while (Iteration);
+
+  Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt);
+  DEBUG(dbgs() << "Use parallel and/or in:\n" << *FirstCondBlock);
+  return true;
+}
+
+/// Compare blocks from two if-regions, where \param Head1 is the entry of the
+/// 1st if-region. \param Head2 is the entry of the 2nd if-region. \param
+/// Block1 is a block in the 1st if-region to compare. \param Block2 is a block
+//  in the 2nd if-region to compare.  \returns true if \param Block1 and \param
+/// Block2 have identical instructions and do not have memory reference alias
+/// with \param Head2.
+///
+bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
+                                         BasicBlock *Block1,
+                                         BasicBlock *Block2) {
+  TerminatorInst *PTI2 = Head2->getTerminator();
+  Instruction *PBI2 = Head2->begin();
+
+  bool eq1 = (Block1 == Head1);
+  bool eq2 = (Block2 == Head2);
+  if (eq1 || eq2) {
+    // An empty then-path or else-path.
+    return (eq1 == eq2);
+  }
+
+  // Check whether instructions in Block1 and Block2 are identical
+  // and do not alias with instructions in Head2.
+  BasicBlock::iterator iter1 = Block1->begin();
+  BasicBlock::iterator end1 = Block1->getTerminator();
+  BasicBlock::iterator iter2 = Block2->begin();
+  BasicBlock::iterator end2 = Block2->getTerminator();
+
+  while (1) {
+    if (iter1 == end1) {
+      if (iter2 != end2)
+        return false;
+      break;
+    }
+
+    if (!iter1->isIdenticalTo(iter2))
+      return false;
+
+    // Illegal to remove instructions with side effects except
+    // non-volatile stores.
+    if (iter1->mayHaveSideEffects()) {
+      Instruction *CurI = &*iter1;
+      StoreInst *SI = dyn_cast<StoreInst>(CurI);
+      if (!SI || SI->isVolatile())
+        return false;
+    }
+
+    // For simplicity and speed, data dependency check can be
+    // avoided if read from memory doesn't exist.
+    if (iter1->mayReadFromMemory())
+      return false;
+
+    if (iter1->mayWriteToMemory()) {
+      for (BasicBlock::iterator BI = PBI2, BE = PTI2; BI != BE; ++BI) {
+        if (BI->mayReadFromMemory() || BI->mayWriteToMemory()) {
+          // Check alias with Head2.
+          if (!AA || AA->alias(iter1, BI))
+            return false;
+        }
+      }
+    }
+    ++iter1;
+    ++iter2;
+  }
+
+  return true;
+}
+
+/// Check whether \param BB is the merge block of a if-region.  If yes, check
+/// whether there exists an adjacent if-region upstream, the two if-regions
+/// contain identical instuctions and can be legally merged.  \returns true if
+/// the two if-regions are merged.
+///
+/// From:
+/// if (a)
+///   statement;
+/// if (b)
+///   statement;
+///
+/// To:
+/// if (a || b)
+///   statement;
+///
+bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder,
+                                  Pass *P) {
+  BasicBlock *IfTrue2, *IfFalse2;
+  Value *IfCond2 = GetIfCondition(BB, IfTrue2, IfFalse2);
+  Instruction *CInst2 = dyn_cast_or_null<Instruction>(IfCond2);
+  if (!CInst2)
+    return false;
+
+  BasicBlock *SecondEntryBlock = CInst2->getParent();
+  if (SecondEntryBlock->hasAddressTaken())
+    return false;
+
+  BasicBlock *IfTrue1, *IfFalse1;
+  Value *IfCond1 = GetIfCondition(SecondEntryBlock, IfTrue1, IfFalse1);
+  Instruction *CInst1 = dyn_cast_or_null<Instruction>(IfCond1);
+  if (!CInst1)
+    return false;
+
+  BasicBlock *FirstEntryBlock = CInst1->getParent();
+
+  // Either then-path or else-path should be empty.
+  if ((IfTrue1 != FirstEntryBlock) && (IfFalse1 != FirstEntryBlock))
+    return false;
+  if ((IfTrue2 != SecondEntryBlock) && (IfFalse2 != SecondEntryBlock))
+    return false;
+
+  TerminatorInst *PTI2 = SecondEntryBlock->getTerminator();
+  Instruction *PBI2 = SecondEntryBlock->begin();
+
+  if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfTrue1,
+                            IfTrue2))
+    return false;
+
+  if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfFalse1,
+                            IfFalse2))
+    return false;
+
+  // Check whether \param SecondEntryBlock has side-effect and is safe to
+  // speculate.
+  for (BasicBlock::iterator BI = PBI2, BE = PTI2; BI != BE; ++BI) {
+    Instruction *CI = BI;
+    if (isa<PHINode>(CI) || CI->mayHaveSideEffects() ||
+        !isSafeToSpeculativelyExecute(CI))
+      return false;
+  }
+
+  // Merge \param SecondEntryBlock into \param FirstEntryBlock.
+  FirstEntryBlock->getInstList().pop_back();
+  FirstEntryBlock->getInstList()
+      .splice(FirstEntryBlock->end(), SecondEntryBlock->getInstList());
+  BranchInst *PBI = dyn_cast<BranchInst>(FirstEntryBlock->getTerminator());
+  Value *CC = PBI->getCondition();
+  BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+  BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+  Builder.SetInsertPoint(PBI);
+  Value *NC = Builder.CreateOr(CInst1, CC);
+  PBI->replaceUsesOfWith(CC, NC);
+  Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt);
+
+  // Remove IfTrue1
+  if (IfTrue1 != FirstEntryBlock) {
+    IfTrue1->dropAllReferences();
+    IfTrue1->eraseFromParent();
+  }
+
+  // Remove IfFalse1
+  if (IfFalse1 != FirstEntryBlock) {
+    IfFalse1->dropAllReferences();
+    IfFalse1->eraseFromParent();
+  }
+
+  // Remove \param SecondEntryBlock
+  SecondEntryBlock->dropAllReferences();
+  SecondEntryBlock->eraseFromParent();
+  DEBUG(dbgs() << "If conditions merged into:\n" << *FirstEntryBlock);
+  return true;
+}
+
+bool FlattenCFGOpt::run(BasicBlock *BB) {
+  bool Changed = false;
+  assert(BB && BB->getParent() && "Block not embedded in function!");
+  assert(BB->getTerminator() && "Degenerate basic block encountered!");
+
+  IRBuilder<> Builder(BB);
+
+  if (FlattenParallelAndOr(BB, Builder))
+    return true;
+
+  if (MergeIfRegion(BB, Builder))
+    return true;
+
+  return Changed;
+}
+
+/// FlattenCFG - This function is used to flatten a CFG.  For
+/// example, it uses parallel-and and parallel-or mode to collapse
+//  if-conditions and merge if-regions with identical statements.
+///
+bool llvm::FlattenCFG(BasicBlock *BB, AliasAnalysis *AA) {
+  return FlattenCFGOpt(AA).run(BB);
+}
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 0d2598a..dabb67b 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -82,7 +82,8 @@ namespace {
     /// a simple branch. When there is more than one predecessor, we need to
     /// split the landing pad block after the landingpad instruction and jump
     /// to there.
-    void forwardResume(ResumeInst *RI);
+    void forwardResume(ResumeInst *RI,
+                       SmallPtrSet<LandingPadInst*, 16> &InlinedLPads);
 
     /// addIncomingPHIValuesFor - Add incoming-PHI values to the unwind
     /// destination block for the given basic block, using the values for the
@@ -140,8 +141,10 @@ BasicBlock *InvokeInliningInfo::getInnerResumeDest() {
 /// block. When the landing pad block has only one predecessor, this is a simple
 /// branch. When there is more than one predecessor, we need to split the
 /// landing pad block after the landingpad instruction and jump to there.
-void InvokeInliningInfo::forwardResume(ResumeInst *RI) {
+void InvokeInliningInfo::forwardResume(ResumeInst *RI,
+                               SmallPtrSet<LandingPadInst*, 16> &InlinedLPads) {
   BasicBlock *Dest = getInnerResumeDest();
+  LandingPadInst *OuterLPad = getLandingPadInst();
   BasicBlock *Src = RI->getParent();
 
   BranchInst::Create(Dest, Src);
@@ -152,6 +155,16 @@ void InvokeInliningInfo::forwardResume(ResumeInst *RI) {
 
   InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src);
   RI->eraseFromParent();
+
+  // Append the clauses from the outer landing pad instruction into the inlined
+  // landing pad instructions.
+  for (SmallPtrSet<LandingPadInst*, 16>::iterator I = InlinedLPads.begin(),
+         E = InlinedLPads.end(); I != E; ++I) {
+    LandingPadInst *InlinedLPad = *I;
+    for (unsigned OuterIdx = 0, OuterNum = OuterLPad->getNumClauses();
+         OuterIdx != OuterNum; ++OuterIdx)
+      InlinedLPad->addClause(OuterLPad->getClause(OuterIdx));
+  }
 }
 
 /// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into
@@ -229,19 +242,15 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
 
   // The inlined code is currently at the end of the function, scan from the
   // start of the inlined code to its end, checking for stuff we need to
-  // rewrite.  If the code doesn't have calls or unwinds, we know there is
-  // nothing to rewrite.
-  if (!InlinedCodeInfo.ContainsCalls) {
-    // Now that everything is happy, we have one final detail.  The PHI nodes in
-    // the exception destination block still have entries due to the original
-    // invoke instruction.  Eliminate these entries (which might even delete the
-    // PHI node) now.
-    InvokeDest->removePredecessor(II->getParent());
-    return;
-  }
-
+  // rewrite.
   InvokeInliningInfo Invoke(II);
-  
+
+  // Get all of the inlined landing pad instructions.
+  SmallPtrSet<LandingPadInst*, 16> InlinedLPads;
+  for (Function::iterator I = FirstNewBlock, E = Caller->end(); I != E; ++I)
+    if (InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator()))
+      InlinedLPads.insert(II->getLandingPadInst());
+
   for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){
     if (InlinedCodeInfo.ContainsCalls)
       if (HandleCallsInBlockInlinedThroughInvoke(BB, Invoke)) {
@@ -250,13 +259,14 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
         continue;
       }
 
+    // Forward any resumes that are remaining here.
     if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator()))
-      Invoke.forwardResume(RI);
+      Invoke.forwardResume(RI, InlinedLPads);
   }
 
   // Now that everything is happy, we have one final detail.  The PHI nodes in
   // the exception destination block still have entries due to the original
-  // invoke instruction.  Eliminate these entries (which might even delete the
+  // invoke instruction. Eliminate these entries (which might even delete the
   // PHI node) now.
   InvokeDest->removePredecessor(II->getParent());
 }
@@ -748,8 +758,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
 
     // If the call site was an invoke instruction, add a branch to the normal
     // destination.
-    if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
-      BranchInst::Create(II->getNormalDest(), TheCall);
+    if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
+      BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall);
+      NewBr->setDebugLoc(Returns[0]->getDebugLoc());
+    }
 
     // If the return instruction returned a value, replace uses of the call with
     // uses of the returned value.
@@ -777,15 +789,16 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
   // "starter" and "ender" blocks.  How we accomplish this depends on whether
   // this is an invoke instruction or a call instruction.
   BasicBlock *AfterCallBB;
+  BranchInst *CreatedBranchToNormalDest = NULL;
   if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
 
     // Add an unconditional branch to make this look like the CallInst case...
-    BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall);
+    CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), TheCall);
 
     // Split the basic block.  This guarantees that no PHI nodes will have to be
     // updated due to new incoming edges, and make the invoke case more
     // symmetric to the call case.
-    AfterCallBB = OrigBB->splitBasicBlock(NewBr,
+    AfterCallBB = OrigBB->splitBasicBlock(CreatedBranchToNormalDest,
                                           CalledFunc->getName()+".exit");
 
   } else {  // It's a call
@@ -840,11 +853,20 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
 
 
     // Add a branch to the merge points and remove return instructions.
+    DebugLoc Loc;
     for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
       ReturnInst *RI = Returns[i];
-      BranchInst::Create(AfterCallBB, RI);
+      BranchInst* BI = BranchInst::Create(AfterCallBB, RI);
+      Loc = RI->getDebugLoc();
+      BI->setDebugLoc(Loc);
       RI->eraseFromParent();
     }
+    // We need to set the debug location to *somewhere* inside the
+    // inlined function. The line number may be nonsensical, but the
+    // instruction will at least be associated with the right
+    // function.
+    if (CreatedBranchToNormalDest)
+      CreatedBranchToNormalDest->setDebugLoc(Loc);
   } else if (!Returns.empty()) {
     // Otherwise, if there is exactly one return value, just replace anything
     // using the return value of the call with the computed value.
@@ -864,6 +886,9 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
     AfterCallBB->getInstList().splice(AfterCallBB->begin(),
                                       ReturnBB->getInstList());
 
+    if (CreatedBranchToNormalDest)
+      CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc());
+
     // Delete the return instruction now and empty ReturnBB now.
     Returns[0]->eraseFromParent();
     ReturnBB->eraseFromParent();
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index a54ee08..08e1808 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -84,7 +84,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
       BI->eraseFromParent();
       return true;
     }
-    
+
     if (Dest2 == Dest1) {       // Conditional branch to same location?
       // This branch matches something like this:
       //     br bool %cond, label %Dest, label %Dest
@@ -104,7 +104,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
     }
     return false;
   }
-  
+
   if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
     // If we are switching on a constant, we can convert the switch into a
     // single branch instruction!
@@ -188,7 +188,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
         RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI);
       return true;
     }
-    
+
     if (SI->getNumCases() == 1) {
       // Otherwise, we can fold this switch into a conditional branch
       // instruction if it has only one non-default destination.
@@ -231,7 +231,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
       BasicBlock *TheOnlyDest = BA->getBasicBlock();
       // Insert the new branch.
       Builder.CreateBr(TheOnlyDest);
-      
+
       for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
         if (IBI->getDestination(i) == TheOnlyDest)
           TheOnlyDest = 0;
@@ -242,7 +242,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
       IBI->eraseFromParent();
       if (DeleteDeadConditions)
         RecursivelyDeleteTriviallyDeadInstructions(Address, TLI);
-      
+
       // If we didn't find our destination in the IBI successor list, then we
       // have undefined behavior.  Replace the unconditional branch with an
       // 'unreachable' instruction.
@@ -250,11 +250,11 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
         BB->getTerminator()->eraseFromParent();
         new UnreachableInst(BB->getContext(), BB);
       }
-      
+
       return true;
     }
   }
-  
+
   return false;
 }
 
@@ -321,10 +321,10 @@ llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V,
   Instruction *I = dyn_cast<Instruction>(V);
   if (!I || !I->use_empty() || !isInstructionTriviallyDead(I, TLI))
     return false;
-  
+
   SmallVector<Instruction*, 16> DeadInsts;
   DeadInsts.push_back(I);
-  
+
   do {
     I = DeadInsts.pop_back_val();
 
@@ -333,9 +333,9 @@ llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V,
     for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
       Value *OpV = I->getOperand(i);
       I->setOperand(i, 0);
-      
+
       if (!OpV->use_empty()) continue;
-    
+
       // If the operand is an instruction that became dead as we nulled out the
       // operand, and if it is 'trivially' dead, delete it in a future loop
       // iteration.
@@ -343,7 +343,7 @@ llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V,
         if (isInstructionTriviallyDead(OpI, TLI))
           DeadInsts.push_back(OpI);
     }
-    
+
     I->eraseFromParent();
   } while (!DeadInsts.empty());
 
@@ -450,12 +450,12 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
   // This only adjusts blocks with PHI nodes.
   if (!isa<PHINode>(BB->begin()))
     return;
-  
+
   // Remove the entries for Pred from the PHI nodes in BB, but do not simplify
   // them down.  This will leave us with single entry phi nodes and other phis
   // that can be removed.
   BB->removePredecessor(Pred, true);
-  
+
   WeakVH PhiIt = &BB->front();
   while (PHINode *PN = dyn_cast<PHINode>(PhiIt)) {
     PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt));
@@ -486,10 +486,10 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
     PN->replaceAllUsesWith(NewVal);
     PN->eraseFromParent();
   }
-  
+
   BasicBlock *PredBB = DestBB->getSinglePredecessor();
   assert(PredBB && "Block doesn't have a single predecessor!");
-  
+
   // Zap anything that took the address of DestBB.  Not doing this will give the
   // address an invalid value.
   if (DestBB->hasAddressTaken()) {
@@ -500,10 +500,10 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
                                                      BA->getType()));
     BA->destroyConstant();
   }
-  
+
   // Anything that branched to PredBB now branches to DestBB.
   PredBB->replaceAllUsesWith(DestBB);
-  
+
   // Splice all the instructions from PredBB to DestBB.
   PredBB->getTerminator()->eraseFromParent();
   DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList());
@@ -525,6 +525,13 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
   PredBB->eraseFromParent();
 }
 
+/// CanMergeValues - Return true if we can choose one of these values to use
+/// in place of the other. Note that we will always choose the non-undef
+/// value to keep.
+static bool CanMergeValues(Value *First, Value *Second) {
+  return First == Second || isa<UndefValue>(First) || isa<UndefValue>(Second);
+}
+
 /// CanPropagatePredecessorsForPHIs - Return true if we can fold BB, an
 /// almost-empty BB ending in an unconditional branch to Succ, into succ.
 ///
@@ -533,7 +540,7 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
 static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
   assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!");
 
-  DEBUG(dbgs() << "Looking to fold " << BB->getName() << " into " 
+  DEBUG(dbgs() << "Looking to fold " << BB->getName() << " into "
         << Succ->getName() << "\n");
   // Shortcut, if there is only a single predecessor it must be BB and merging
   // is always safe
@@ -555,9 +562,10 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
       for (unsigned PI = 0, PE = PN->getNumIncomingValues(); PI != PE; ++PI) {
         BasicBlock *IBB = PN->getIncomingBlock(PI);
         if (BBPreds.count(IBB) &&
-            BBPN->getIncomingValueForBlock(IBB) != PN->getIncomingValue(PI)) {
-          DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in " 
-                << Succ->getName() << " is conflicting with " 
+            !CanMergeValues(BBPN->getIncomingValueForBlock(IBB),
+                            PN->getIncomingValue(PI))) {
+          DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in "
+                << Succ->getName() << " is conflicting with "
                 << BBPN->getName() << " with regard to common predecessor "
                 << IBB->getName() << "\n");
           return false;
@@ -570,8 +578,9 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
         // one for BB, in which case this phi node will not prevent the merging
         // of the block.
         BasicBlock *IBB = PN->getIncomingBlock(PI);
-        if (BBPreds.count(IBB) && Val != PN->getIncomingValue(PI)) {
-          DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in " 
+        if (BBPreds.count(IBB) &&
+            !CanMergeValues(Val, PN->getIncomingValue(PI))) {
+          DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in "
                 << Succ->getName() << " is conflicting with regard to common "
                 << "predecessor " << IBB->getName() << "\n");
           return false;
@@ -583,6 +592,139 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
   return true;
 }
 
+typedef SmallVector<BasicBlock *, 16> PredBlockVector;
+typedef DenseMap<BasicBlock *, Value *> IncomingValueMap;
+
+/// \brief Determines the value to use as the phi node input for a block.
+///
+/// Select between \p OldVal any value that we know flows from \p BB
+/// to a particular phi on the basis of which one (if either) is not
+/// undef. Update IncomingValues based on the selected value.
+///
+/// \param OldVal The value we are considering selecting.
+/// \param BB The block that the value flows in from.
+/// \param IncomingValues A map from block-to-value for other phi inputs
+/// that we have examined.
+///
+/// \returns the selected value.
+static Value *selectIncomingValueForBlock(Value *OldVal, BasicBlock *BB,
+                                          IncomingValueMap &IncomingValues) {
+  if (!isa<UndefValue>(OldVal)) {
+    assert((!IncomingValues.count(BB) ||
+            IncomingValues.find(BB)->second == OldVal) &&
+           "Expected OldVal to match incoming value from BB!");
+
+    IncomingValues.insert(std::make_pair(BB, OldVal));
+    return OldVal;
+  }
+
+  IncomingValueMap::const_iterator It = IncomingValues.find(BB);
+  if (It != IncomingValues.end()) return It->second;
+
+  return OldVal;
+}
+
+/// \brief Create a map from block to value for the operands of a
+/// given phi.
+///
+/// Create a map from block to value for each non-undef value flowing
+/// into \p PN.
+///
+/// \param PN The phi we are collecting the map for.
+/// \param IncomingValues [out] The map from block to value for this phi.
+static void gatherIncomingValuesToPhi(PHINode *PN,
+                                      IncomingValueMap &IncomingValues) {
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+    BasicBlock *BB = PN->getIncomingBlock(i);
+    Value *V = PN->getIncomingValue(i);
+
+    if (!isa<UndefValue>(V))
+      IncomingValues.insert(std::make_pair(BB, V));
+  }
+}
+
+/// \brief Replace the incoming undef values to a phi with the values
+/// from a block-to-value map.
+///
+/// \param PN The phi we are replacing the undefs in.
+/// \param IncomingValues A map from block to value.
+static void replaceUndefValuesInPhi(PHINode *PN,
+                                    const IncomingValueMap &IncomingValues) {
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+    Value *V = PN->getIncomingValue(i);
+
+    if (!isa<UndefValue>(V)) continue;
+
+    BasicBlock *BB = PN->getIncomingBlock(i);
+    IncomingValueMap::const_iterator It = IncomingValues.find(BB);
+    if (It == IncomingValues.end()) continue;
+
+    PN->setIncomingValue(i, It->second);
+  }
+}
+
+/// \brief Replace a value flowing from a block to a phi with
+/// potentially multiple instances of that value flowing from the
+/// block's predecessors to the phi.
+///
+/// \param BB The block with the value flowing into the phi.
+/// \param BBPreds The predecessors of BB.
+/// \param PN The phi that we are updating.
+static void redirectValuesFromPredecessorsToPhi(BasicBlock *BB,
+                                                const PredBlockVector &BBPreds,
+                                                PHINode *PN) {
+  Value *OldVal = PN->removeIncomingValue(BB, false);
+  assert(OldVal && "No entry in PHI for Pred BB!");
+
+  IncomingValueMap IncomingValues;
+
+  // We are merging two blocks - BB, and the block containing PN - and
+  // as a result we need to redirect edges from the predecessors of BB
+  // to go to the block containing PN, and update PN
+  // accordingly. Since we allow merging blocks in the case where the
+  // predecessor and successor blocks both share some predecessors,
+  // and where some of those common predecessors might have undef
+  // values flowing into PN, we want to rewrite those values to be
+  // consistent with the non-undef values.
+
+  gatherIncomingValuesToPhi(PN, IncomingValues);
+
+  // If this incoming value is one of the PHI nodes in BB, the new entries
+  // in the PHI node are the entries from the old PHI.
+  if (isa<PHINode>(OldVal) && cast<PHINode>(OldVal)->getParent() == BB) {
+    PHINode *OldValPN = cast<PHINode>(OldVal);
+    for (unsigned i = 0, e = OldValPN->getNumIncomingValues(); i != e; ++i) {
+      // Note that, since we are merging phi nodes and BB and Succ might
+      // have common predecessors, we could end up with a phi node with
+      // identical incoming branches. This will be cleaned up later (and
+      // will trigger asserts if we try to clean it up now, without also
+      // simplifying the corresponding conditional branch).
+      BasicBlock *PredBB = OldValPN->getIncomingBlock(i);
+      Value *PredVal = OldValPN->getIncomingValue(i);
+      Value *Selected = selectIncomingValueForBlock(PredVal, PredBB,
+                                                    IncomingValues);
+
+      // And add a new incoming value for this predecessor for the
+      // newly retargeted branch.
+      PN->addIncoming(Selected, PredBB);
+    }
+  } else {
+    for (unsigned i = 0, e = BBPreds.size(); i != e; ++i) {
+      // Update existing incoming values in PN for this
+      // predecessor of BB.
+      BasicBlock *PredBB = BBPreds[i];
+      Value *Selected = selectIncomingValueForBlock(OldVal, PredBB,
+                                                    IncomingValues);
+
+      // And add a new incoming value for this predecessor for the
+      // newly retargeted branch.
+      PN->addIncoming(Selected, PredBB);
+    }
+  }
+
+  replaceUndefValuesInPhi(PN, IncomingValues);
+}
+
 /// TryToSimplifyUncondBranchFromEmptyBlock - BB is known to contain an
 /// unconditional branch, and contains no instructions other than PHI nodes,
 /// potential side-effect free intrinsics and the branch.  If possible,
@@ -595,7 +737,7 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {
   // We can't eliminate infinite loops.
   BasicBlock *Succ = cast<BranchInst>(BB->getTerminator())->getSuccessor(0);
   if (BB == Succ) return false;
-  
+
   // Check to see if merging these blocks would cause conflicts for any of the
   // phi nodes in BB or Succ. If not, we can safely merge.
   if (!CanPropagatePredecessorsForPHIs(BB, Succ)) return false;
@@ -629,39 +771,21 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {
   }
 
   DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB);
-  
+
   if (isa<PHINode>(Succ->begin())) {
     // If there is more than one pred of succ, and there are PHI nodes in
     // the successor, then we need to add incoming edges for the PHI nodes
     //
-    const SmallVector<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB));
-    
+    const PredBlockVector BBPreds(pred_begin(BB), pred_end(BB));
+
     // Loop over all of the PHI nodes in the successor of BB.
     for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
       PHINode *PN = cast<PHINode>(I);
-      Value *OldVal = PN->removeIncomingValue(BB, false);
-      assert(OldVal && "No entry in PHI for Pred BB!");
-      
-      // If this incoming value is one of the PHI nodes in BB, the new entries
-      // in the PHI node are the entries from the old PHI.
-      if (isa<PHINode>(OldVal) && cast<PHINode>(OldVal)->getParent() == BB) {
-        PHINode *OldValPN = cast<PHINode>(OldVal);
-        for (unsigned i = 0, e = OldValPN->getNumIncomingValues(); i != e; ++i)
-          // Note that, since we are merging phi nodes and BB and Succ might
-          // have common predecessors, we could end up with a phi node with
-          // identical incoming branches. This will be cleaned up later (and
-          // will trigger asserts if we try to clean it up now, without also
-          // simplifying the corresponding conditional branch).
-          PN->addIncoming(OldValPN->getIncomingValue(i),
-                          OldValPN->getIncomingBlock(i));
-      } else {
-        // Add an incoming value for each of the new incoming values.
-        for (unsigned i = 0, e = BBPreds.size(); i != e; ++i)
-          PN->addIncoming(OldVal, BBPreds[i]);
-      }
+
+      redirectValuesFromPredecessorsToPhi(BB, BBPreds, PN);
     }
   }
-  
+
   if (Succ->getSinglePredecessor()) {
     // BB is the only predecessor of Succ, so Succ will end up with exactly
     // the same predecessors BB had.
@@ -676,7 +800,7 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {
       PN->eraseFromParent();
     }
   }
-    
+
   // Everything that jumped to BB now goes to Succ.
   BB->replaceAllUsesWith(Succ);
   if (!Succ->hasName()) Succ->takeName(BB);
@@ -784,7 +908,7 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align,
     // the final program then it is impossible for us to reliably enforce the
     // preferred alignment.
     if (GV->isWeakForLinker()) return Align;
-    
+
     if (GV->getAlignment() >= PrefAlign)
       return GV->getAlignment();
     // We can only increase the alignment of the global if it has no alignment
@@ -804,26 +928,27 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align,
 /// and it is more than the alignment of the ultimate object, see if we can
 /// increase the alignment of the ultimate object, making this check succeed.
 unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
-                                          const DataLayout *TD) {
+                                          const DataLayout *DL) {
   assert(V->getType()->isPointerTy() &&
          "getOrEnforceKnownAlignment expects a pointer!");
-  unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64;
+  unsigned BitWidth = DL ? DL->getPointerTypeSizeInBits(V->getType()) : 64;
+
   APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
-  ComputeMaskedBits(V, KnownZero, KnownOne, TD);
+  ComputeMaskedBits(V, KnownZero, KnownOne, DL);
   unsigned TrailZ = KnownZero.countTrailingOnes();
-  
-  // Avoid trouble with rediculously large TrailZ values, such as
+
+  // Avoid trouble with ridiculously large TrailZ values, such as
   // those computed from a null pointer.
   TrailZ = std::min(TrailZ, unsigned(sizeof(unsigned) * CHAR_BIT - 1));
-  
+
   unsigned Align = 1u << std::min(BitWidth - 1, TrailZ);
-  
+
   // LLVM doesn't support alignments larger than this currently.
   Align = std::min(Align, +Value::MaximumAlignment);
-  
+
   if (PrefAlign > Align)
-    Align = enforceKnownAlignment(V, Align, PrefAlign, TD);
-    
+    Align = enforceKnownAlignment(V, Align, PrefAlign, DL);
+
   // We don't need to make any adjustment.
   return Align;
 }
@@ -832,14 +957,36 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
 ///  Dbg Intrinsic utilities
 ///
 
-/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value
+/// See if there is a dbg.value intrinsic for DIVar before I.
+static bool LdStHasDebugValue(DIVariable &DIVar, Instruction *I) {
+  // Since we can't guarantee that the original dbg.declare instrinsic
+  // is removed by LowerDbgDeclare(), we need to make sure that we are
+  // not inserting the same dbg.value intrinsic over and over.
+  llvm::BasicBlock::InstListType::iterator PrevI(I);
+  if (PrevI != I->getParent()->getInstList().begin()) {
+    --PrevI;
+    if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(PrevI))
+      if (DVI->getValue() == I->getOperand(0) &&
+          DVI->getOffset() == 0 &&
+          DVI->getVariable() == DIVar)
+        return true;
+  }
+  return false;
+}
+
+/// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value
 /// that has an associated llvm.dbg.decl intrinsic.
 bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
                                            StoreInst *SI, DIBuilder &Builder) {
   DIVariable DIVar(DDI->getVariable());
-  if (!DIVar.Verify())
+  assert((!DIVar || DIVar.isVariable()) &&
+         "Variable in DbgDeclareInst should be either null or a DIVariable.");
+  if (!DIVar)
     return false;
 
+  if (LdStHasDebugValue(DIVar, SI))
+    return true;
+
   Instruction *DbgVal = NULL;
   // If an argument is zero extended then use argument directly. The ZExt
   // may be zapped by an optimization pass in future.
@@ -863,18 +1010,23 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
   return true;
 }
 
-/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value
+/// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value
 /// that has an associated llvm.dbg.decl intrinsic.
 bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
                                            LoadInst *LI, DIBuilder &Builder) {
   DIVariable DIVar(DDI->getVariable());
-  if (!DIVar.Verify())
+  assert((!DIVar || DIVar.isVariable()) &&
+         "Variable in DbgDeclareInst should be either null or a DIVariable.");
+  if (!DIVar)
     return false;
 
-  Instruction *DbgVal = 
+  if (LdStHasDebugValue(DIVar, LI))
+    return true;
+
+  Instruction *DbgVal =
     Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0,
                                     DIVar, LI);
-  
+
   // Propagate any debug metadata from the store onto the dbg.value.
   DebugLoc LIDL = LI->getDebugLoc();
   if (!LIDL.isUnknown())
@@ -898,10 +1050,12 @@ bool llvm::LowerDbgDeclare(Function &F) {
   if (Dbgs.empty())
     return false;
 
-  for (SmallVector<DbgDeclareInst *, 4>::iterator I = Dbgs.begin(),
+  for (SmallVectorImpl<DbgDeclareInst *>::iterator I = Dbgs.begin(),
          E = Dbgs.end(); I != E; ++I) {
     DbgDeclareInst *DDI = *I;
     if (AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress())) {
+      // We only remove the dbg.declare intrinsic if all uses are
+      // converted to dbg.value intrinsics.
       bool RemoveDDI = true;
       for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
            UI != E; ++UI)
@@ -936,7 +1090,9 @@ bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
   if (!DDI)
     return false;
   DIVariable DIVar(DDI->getVariable());
-  if (!DIVar.Verify())
+  assert((!DIVar || DIVar.isVariable()) &&
+         "Variable in DbgDeclareInst should be either null or a DIVariable.");
+  if (!DIVar)
     return false;
 
   // Create a copy of the original DIDescriptor for user variable, appending
@@ -985,22 +1141,17 @@ bool llvm::removeUnreachableBlocks(Function &F) {
     if (Reachable.count(I))
       continue;
 
-    // Remove the block as predecessor of all its reachable successors.
-    // Unreachable successors don't matter as they'll soon be removed, too.
     for (succ_iterator SI = succ_begin(I), SE = succ_end(I); SI != SE; ++SI)
       if (Reachable.count(*SI))
         (*SI)->removePredecessor(I);
+    I->dropAllReferences();
+  }
 
-    // Zap all instructions in this basic block.
-    while (!I->empty()) {
-      Instruction &Inst = I->back();
-      if (!Inst.use_empty())
-        Inst.replaceAllUsesWith(UndefValue::get(Inst.getType()));
-      I->getInstList().pop_back();
-    }
+  for (Function::iterator I = llvm::next(F.begin()), E=F.end(); I != E;)
+    if (!Reachable.count(I))
+      I = F.getBasicBlockList().erase(I);
+    else
+      ++I;
 
-    --I;
-    llvm::next(I)->eraseFromParent();
-  }
   return true;
 }
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index 37819cc..6d5f16c 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -59,6 +59,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
 using namespace llvm;
 
 STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted");
@@ -100,16 +101,16 @@ namespace {
   private:
     bool ProcessLoop(Loop *L, LPPassManager &LPM);
     BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit);
-    BasicBlock *InsertPreheaderForLoop(Loop *L);
     Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM,
                              BasicBlock *Preheader);
     BasicBlock *InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader);
-    void PlaceSplitBlockCarefully(BasicBlock *NewBB,
-                                  SmallVectorImpl<BasicBlock*> &SplitPreds,
-                                  Loop *L);
   };
 }
 
+static void PlaceSplitBlockCarefully(BasicBlock *NewBB,
+                                     SmallVectorImpl<BasicBlock*> &SplitPreds,
+                                     Loop *L);
+
 char LoopSimplify::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify",
                 "Canonicalize natural loops", true, false)
@@ -208,7 +209,7 @@ ReprocessLoop:
   // Does the loop already have a preheader?  If so, don't insert one.
   BasicBlock *Preheader = L->getLoopPreheader();
   if (!Preheader) {
-    Preheader = InsertPreheaderForLoop(L);
+    Preheader = InsertPreheaderForLoop(L, this);
     if (Preheader) {
       ++NumInserted;
       Changed = true;
@@ -367,7 +368,7 @@ ReprocessLoop:
 /// preheader, this method is called to insert one.  This method has two phases:
 /// preheader insertion and analysis updating.
 ///
-BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) {
+BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) {
   BasicBlock *Header = L->getHeader();
 
   // Compute the set of predecessors of the loop that are not in the loop.
@@ -390,11 +391,11 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) {
   BasicBlock *PreheaderBB;
   if (!Header->isLandingPad()) {
     PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader",
-                                         this);
+                                         PP);
   } else {
     SmallVector<BasicBlock*, 2> NewBBs;
     SplitLandingPadPredecessors(Header, OutsideBlocks, ".preheader",
-                                ".split-lp", this, NewBBs);
+                                ".split-lp", PP, NewBBs);
     PreheaderBB = NewBBs[0];
   }
 
@@ -491,9 +492,9 @@ static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT,
 // PlaceSplitBlockCarefully - If the block isn't already, move the new block to
 // right after some 'outside block' block.  This prevents the preheader from
 // being placed inside the loop body, e.g. when the loop hasn't been rotated.
-void LoopSimplify::PlaceSplitBlockCarefully(BasicBlock *NewBB,
-                                       SmallVectorImpl<BasicBlock*> &SplitPreds,
-                                            Loop *L) {
+void PlaceSplitBlockCarefully(BasicBlock *NewBB,
+                              SmallVectorImpl<BasicBlock*> &SplitPreds,
+                              Loop *L) {
   // Check to see if NewBB is already well placed.
   Function::iterator BBI = NewBB; --BBI;
   for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp
index 9ec84d7..f66b54d 100644
--- a/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/lib/Transforms/Utils/LowerInvoke.cpp
@@ -61,6 +61,8 @@ static cl::opt<bool> ExpensiveEHSupport("enable-correct-eh-support",
 
 namespace {
   class LowerInvoke : public FunctionPass {
+    const TargetMachine *TM;
+
     // Used for both models.
     Constant *AbortFn;
 
@@ -70,15 +72,12 @@ namespace {
     Constant *SetJmpFn, *LongJmpFn, *StackSaveFn, *StackRestoreFn;
     bool useExpensiveEHSupport;
 
-    // We peek in TLI to grab the target's jmp_buf size and alignment
-    const TargetLowering *TLI;
-
   public:
     static char ID; // Pass identification, replacement for typeid
-    explicit LowerInvoke(const TargetLowering *tli = NULL,
+    explicit LowerInvoke(const TargetMachine *TM = 0,
                          bool useExpensiveEHSupport = ExpensiveEHSupport)
-      : FunctionPass(ID), useExpensiveEHSupport(useExpensiveEHSupport),
-        TLI(tli) {
+      : FunctionPass(ID), TM(TM),
+        useExpensiveEHSupport(useExpensiveEHSupport) {
       initializeLowerInvokePass(*PassRegistry::getPassRegistry());
     }
     bool doInitialization(Module &M);
@@ -108,12 +107,9 @@ INITIALIZE_PASS(LowerInvoke, "lowerinvoke",
 char &llvm::LowerInvokePassID = LowerInvoke::ID;
 
 // Public Interface To the LowerInvoke pass.
-FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI) {
-  return new LowerInvoke(TLI, ExpensiveEHSupport);
-}
-FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI,
+FunctionPass *llvm::createLowerInvokePass(const TargetMachine *TM,
                                           bool useExpensiveEHSupport) {
-  return new LowerInvoke(TLI, useExpensiveEHSupport);
+  return new LowerInvoke(TM, useExpensiveEHSupport || ExpensiveEHSupport);
 }
 
 // doInitialization - Make sure that there is a prototype for abort in the
@@ -122,6 +118,7 @@ bool LowerInvoke::doInitialization(Module &M) {
   Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
   if (useExpensiveEHSupport) {
     // Insert a type for the linked list of jump buffers.
+    const TargetLowering *TLI = TM ? TM->getTargetLowering() : 0;
     unsigned JBSize = TLI ? TLI->getJumpBufSize() : 0;
     JBSize = JBSize ? JBSize : 200;
     Type *JmpBufTy = ArrayType::get(VoidPtrTy, JBSize);
@@ -430,6 +427,7 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
     // Create an alloca for the incoming jump buffer ptr and the new jump buffer
     // that needs to be restored on all exits from the function.  This is an
     // alloca because the value needs to be live across invokes.
+    const TargetLowering *TLI = TM ? TM->getTargetLowering() : 0;
     unsigned Align = TLI ? TLI->getJumpBufAlignment() : 0;
     AllocaInst *JmpBuf =
       new AllocaInst(JBLinkTy, 0, Align,
diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp
index 61b3965..ebd7db6 100644
--- a/lib/Transforms/Utils/Mem2Reg.cpp
+++ b/lib/Transforms/Utils/Mem2Reg.cpp
@@ -16,6 +16,7 @@
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Dominators.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
@@ -27,6 +28,7 @@ STATISTIC(NumPromoted, "Number of alloca's promoted");
 namespace {
   struct PromotePass : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
+
     PromotePass() : FunctionPass(ID) {
       initializePromotePassPass(*PassRegistry::getPassRegistry());
     }
@@ -62,6 +64,7 @@ bool PromotePass::runOnFunction(Function &F) {
   bool Changed  = false;
 
   DominatorTree &DT = getAnalysis<DominatorTree>();
+  const DataLayout *DL = getAnalysisIfAvailable<DataLayout>();
 
   while (1) {
     Allocas.clear();
@@ -70,12 +73,12 @@ bool PromotePass::runOnFunction(Function &F) {
     // the entry node
     for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
       if (AllocaInst *AI = dyn_cast<AllocaInst>(I))       // Is it an alloca?
-        if (isAllocaPromotable(AI))
+        if (isAllocaPromotable(AI, DL))
           Allocas.push_back(AI);
 
     if (Allocas.empty()) break;
 
-    PromoteMemToReg(Allocas, DT);
+    PromoteMemToReg(Allocas, DT, DL);
     NumPromoted += Allocas.size();
     Changed = true;
   }
diff --git a/lib/Transforms/Utils/MetaRenamer.cpp b/lib/Transforms/Utils/MetaRenamer.cpp
index 3716f58..c370453 100644
--- a/lib/Transforms/Utils/MetaRenamer.cpp
+++ b/lib/Transforms/Utils/MetaRenamer.cpp
@@ -53,7 +53,7 @@ namespace {
     }
 
     bool runOnModule(Module &M) {
-      static const char *metaNames[] = {
+      static const char *const metaNames[] = {
         // See http://en.wikipedia.org/wiki/Metasyntactic_variable
         "foo", "bar", "baz", "quux", "barney", "snork", "zot", "blam", "hoge",
         "wibble", "wobble", "widget", "wombat", "ham", "eggs", "pluto", "spam"
diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp
index d090b48..ff6e6f9 100644
--- a/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/lib/Transforms/Utils/ModuleUtils.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/ModuleUtils.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
@@ -62,3 +63,20 @@ void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority) {
 void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority) {
   appendToGlobalArray("llvm.global_dtors", M, F, Priority);
 }
+
+GlobalVariable *
+llvm::collectUsedGlobalVariables(Module &M, SmallPtrSet<GlobalValue *, 8> &Set,
+                                 bool CompilerUsed) {
+  const char *Name = CompilerUsed ? "llvm.compiler.used" : "llvm.used";
+  GlobalVariable *GV = M.getGlobalVariable(Name);
+  if (!GV || !GV->hasInitializer())
+    return GV;
+
+  const ConstantArray *Init = cast<ConstantArray>(GV->getInitializer());
+  for (unsigned I = 0, E = Init->getNumOperands(); I != E; ++I) {
+    Value *Op = Init->getOperand(I);
+    GlobalValue *G = cast<GlobalValue>(Op->stripPointerCastsNoFollowAliases());
+    Set.insert(G);
+  }
+  return GV;
+}
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index de335ec..6910180 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -27,9 +27,10 @@
 
 #define DEBUG_TYPE "mem2reg"
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
@@ -45,6 +46,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Metadata.h"
+#include "llvm/InstVisitor.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include <algorithm>
@@ -56,360 +58,560 @@ STATISTIC(NumSingleStore,   "Number of alloca's promoted with a single store");
 STATISTIC(NumDeadAlloca,    "Number of dead alloca's removed");
 STATISTIC(NumPHIInsert,     "Number of PHI nodes inserted");
 
-namespace llvm {
-template<>
-struct DenseMapInfo<std::pair<BasicBlock*, unsigned> > {
-  typedef std::pair<BasicBlock*, unsigned> EltTy;
-  static inline EltTy getEmptyKey() {
-    return EltTy(reinterpret_cast<BasicBlock*>(-1), ~0U);
+namespace {
+
+struct AllocaInfo : private InstVisitor<AllocaInfo, bool> {
+  const DataLayout *DL;
+
+  SmallVector<BasicBlock *, 32> DefiningBlocks;
+  SmallVector<BasicBlock *, 32> UsingBlocks;
+  SmallVector<Instruction *, 8> DeadInsts;
+
+  Type *AllocaTy;
+  StoreInst *OnlyStore;
+  BasicBlock *OnlyBlock;
+  bool OnlyUsedInOneBlock;
+
+  Value *AllocaPointerVal;
+  DbgDeclareInst *DbgDeclare;
+
+  AllocaInfo(const DataLayout *DL) : DL(DL) {}
+
+  void clear() {
+    DefiningBlocks.clear();
+    UsingBlocks.clear();
+    DeadInsts.clear();
+    AllocaTy = 0;
+    OnlyStore = 0;
+    OnlyBlock = 0;
+    OnlyUsedInOneBlock = true;
+    AllocaPointerVal = 0;
+    DbgDeclare = 0;
   }
-  static inline EltTy getTombstoneKey() {
-    return EltTy(reinterpret_cast<BasicBlock*>(-2), 0U);
+
+  /// Scan the uses of the specified alloca, filling in the AllocaInfo used
+  /// by the rest of the pass to reason about the uses of this alloca.
+  bool analyzeAlloca(AllocaInst &AI) {
+    clear();
+
+    AllocaTy = AI.getAllocatedType();
+    enqueueUsers(AI);
+
+    // Walk queued up uses in the worklist to handle nested uses.
+    while (!UseWorklist.empty()) {
+      U = UseWorklist.pop_back_val();
+      Instruction &I = *cast<Instruction>(U->getUser());
+      if (!visit(I))
+        return false; // Propagate failure to promote up.
+
+      if (OnlyUsedInOneBlock) {
+        if (OnlyBlock == 0)
+          OnlyBlock = I.getParent();
+        else if (OnlyBlock != I.getParent())
+          OnlyUsedInOneBlock = false;
+      }
+    }
+
+    DbgDeclare = FindAllocaDbgDeclare(&AI);
+    return true;
   }
-  static unsigned getHashValue(const std::pair<BasicBlock*, unsigned> &Val) {
-    using llvm::hash_value;
-    return static_cast<unsigned>(hash_value(Val));
+
+private:
+  // Befriend the base class so it can call through private visitor methods.
+  friend class InstVisitor<AllocaInfo, bool>;
+
+  /// \brief A use pointer that is non-null when visiting uses.
+  Use *U;
+
+  /// \brief A worklist for recursively visiting all uses of an alloca.
+  SmallVector<Use *, 8> UseWorklist;
+
+  /// \brief A set for preventing cyclic visitation.
+  SmallPtrSet<Use *, 8> VisitedUses;
+
+  void enqueueUsers(Instruction &I) {
+    for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE;
+         ++UI)
+      if (VisitedUses.insert(&UI.getUse()))
+        UseWorklist.push_back(&UI.getUse());
   }
-  static bool isEqual(const EltTy &LHS, const EltTy &RHS) {
-    return LHS == RHS;
+
+  bool visitLoadInst(LoadInst &LI) {
+    if (LI.isVolatile() || LI.getType() != AllocaTy)
+      return false;
+
+    // Keep track of variable reads.
+    UsingBlocks.push_back(LI.getParent());
+    AllocaPointerVal = &LI;
+    return true;
   }
-};
-}
 
-/// isAllocaPromotable - Return true if this alloca is legal for promotion.
-/// This is true if there are only loads and stores to the alloca.
-///
-bool llvm::isAllocaPromotable(const AllocaInst *AI) {
-  // FIXME: If the memory unit is of pointer or integer type, we can permit
-  // assignments to subsections of the memory unit.
-
-  // Only allow direct and non-volatile loads and stores...
-  for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end();
-       UI != UE; ++UI) {   // Loop over all of the uses of the alloca
-    const User *U = *UI;
-    if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
-      // Note that atomic loads can be transformed; atomic semantics do
-      // not have any meaning for a local alloca.
-      if (LI->isVolatile())
-        return false;
-    } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
-      if (SI->getOperand(0) == AI)
-        return false;   // Don't allow a store OF the AI, only INTO the AI.
-      // Note that atomic stores can be transformed; atomic semantics do
-      // not have any meaning for a local alloca.
-      if (SI->isVolatile())
-        return false;
-    } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
-      if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
-          II->getIntrinsicID() != Intrinsic::lifetime_end)
-        return false;
-    } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
-      if (BCI->getType() != Type::getInt8PtrTy(U->getContext()))
-        return false;
-      if (!onlyUsedByLifetimeMarkers(BCI))
-        return false;
-    } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
-      if (GEPI->getType() != Type::getInt8PtrTy(U->getContext()))
-        return false;
-      if (!GEPI->hasAllZeroIndices())
-        return false;
-      if (!onlyUsedByLifetimeMarkers(GEPI))
-        return false;
-    } else {
+  bool visitStoreInst(StoreInst &SI) {
+    if (SI.isVolatile() || SI.getValueOperand() == U->get() ||
+        SI.getValueOperand()->getType() != AllocaTy)
       return false;
+
+    // Remember the basic blocks which define new values for the alloca
+    DefiningBlocks.push_back(SI.getParent());
+    AllocaPointerVal = SI.getOperand(0);
+    OnlyStore = &SI;
+    return true;
+  }
+
+  bool visitBitCastInst(BitCastInst &BC) {
+    if (BC.use_empty())
+      DeadInsts.push_back(&BC);
+    else
+      enqueueUsers(BC);
+    return true;
+  }
+
+  bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
+    if (GEPI.use_empty()) {
+      DeadInsts.push_back(&GEPI);
+      return true;
     }
+
+    enqueueUsers(GEPI);
+
+    return GEPI.hasAllZeroIndices();
   }
 
-  return true;
-}
+  // We can promote through debug info intrinsics as they don't alter the
+  // value stored in memory.
+  bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &I) {
+    DeadInsts.push_back(&I);
+    return true;
+  }
 
-namespace {
-  struct AllocaInfo;
-
-  // Data package used by RenamePass()
-  class RenamePassData {
-  public:
-    typedef std::vector<Value *> ValVector;
-    
-    RenamePassData() : BB(NULL), Pred(NULL), Values() {}
-    RenamePassData(BasicBlock *B, BasicBlock *P,
-                   const ValVector &V) : BB(B), Pred(P), Values(V) {}
-    BasicBlock *BB;
-    BasicBlock *Pred;
-    ValVector Values;
-    
-    void swap(RenamePassData &RHS) {
-      std::swap(BB, RHS.BB);
-      std::swap(Pred, RHS.Pred);
-      Values.swap(RHS.Values);
+  bool visitIntrinsicInst(IntrinsicInst &II) {
+    switch (II.getIntrinsicID()) {
+    default:
+      return false;
+
+      // Lifetime intrinsics don't preclude promoting the memory to a register.
+      // FIXME: We should use these to promote to undef when outside of a valid
+      // lifetime.
+    case Intrinsic::lifetime_start:
+    case Intrinsic::lifetime_end:
+      DeadInsts.push_back(&II);
+      return true;
     }
-  };
-  
-  /// LargeBlockInfo - This assigns and keeps a per-bb relative ordering of
-  /// load/store instructions in the block that directly load or store an alloca.
+  }
+
+  // The fallback is that the alloca cannot be promoted.
+  bool visitInstruction(Instruction &I) { return false; }
+};
+
+// Data package used by RenamePass()
+class RenamePassData {
+public:
+  typedef std::vector<Value *> ValVector;
+
+  RenamePassData() : BB(NULL), Pred(NULL), Values() {}
+  RenamePassData(BasicBlock *B, BasicBlock *P, const ValVector &V)
+      : BB(B), Pred(P), Values(V) {}
+  BasicBlock *BB;
+  BasicBlock *Pred;
+  ValVector Values;
+
+  void swap(RenamePassData &RHS) {
+    std::swap(BB, RHS.BB);
+    std::swap(Pred, RHS.Pred);
+    Values.swap(RHS.Values);
+  }
+};
+
+/// \brief This assigns and keeps a per-bb relative ordering of load/store
+/// instructions in the block that directly load or store an alloca.
+///
+/// This functionality is important because it avoids scanning large basic
+/// blocks multiple times when promoting many allocas in the same block.
+class LargeBlockInfo {
+  /// \brief For each instruction that we track, keep the index of the
+  /// instruction.
   ///
-  /// This functionality is important because it avoids scanning large basic
-  /// blocks multiple times when promoting many allocas in the same block.
-  class LargeBlockInfo {
-    /// InstNumbers - For each instruction that we track, keep the index of the
-    /// instruction.  The index starts out as the number of the instruction from
-    /// the start of the block.
-    DenseMap<const Instruction *, unsigned> InstNumbers;
-  public:
-    
-    /// isInterestingInstruction - This code only looks at accesses to allocas.
-    static bool isInterestingInstruction(const Instruction *I) {
-      return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) ||
-             (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1)));
-    }
-    
-    /// getInstructionIndex - Get or calculate the index of the specified
-    /// instruction.
-    unsigned getInstructionIndex(const Instruction *I) {
-      assert(isInterestingInstruction(I) &&
-             "Not a load/store to/from an alloca?");
-      
-      // If we already have this instruction number, return it.
-      DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I);
-      if (It != InstNumbers.end()) return It->second;
-      
-      // Scan the whole block to get the instruction.  This accumulates
-      // information for every interesting instruction in the block, in order to
-      // avoid gratuitus rescans.
-      const BasicBlock *BB = I->getParent();
-      unsigned InstNo = 0;
-      for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end();
-           BBI != E; ++BBI)
-        if (isInterestingInstruction(BBI))
-          InstNumbers[BBI] = InstNo++;
-      It = InstNumbers.find(I);
-      
-      assert(It != InstNumbers.end() && "Didn't insert instruction?");
+  /// The index starts out as the number of the instruction from the start of
+  /// the block.
+  DenseMap<const Instruction *, unsigned> InstNumbers;
+
+public:
+
+  /// This code only looks at accesses to allocas.
+  static bool isInterestingInstruction(const Instruction *I) {
+    return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) ||
+           (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1)));
+  }
+
+  /// Get or calculate the index of the specified instruction.
+  unsigned getInstructionIndex(const Instruction *I) {
+    assert(isInterestingInstruction(I) &&
+           "Not a load/store to/from an alloca?");
+
+    // If we already have this instruction number, return it.
+    DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I);
+    if (It != InstNumbers.end())
       return It->second;
-    }
-    
-    void deleteValue(const Instruction *I) {
-      InstNumbers.erase(I);
-    }
-    
-    void clear() {
-      InstNumbers.clear();
-    }
-  };
-
-  struct PromoteMem2Reg {
-    /// Allocas - The alloca instructions being promoted.
-    ///
-    std::vector<AllocaInst*> Allocas;
-    DominatorTree &DT;
-    DIBuilder *DIB;
-
-    /// AST - An AliasSetTracker object to update.  If null, don't update it.
-    ///
-    AliasSetTracker *AST;
-    
-    /// AllocaLookup - Reverse mapping of Allocas.
-    ///
-    DenseMap<AllocaInst*, unsigned>  AllocaLookup;
-
-    /// NewPhiNodes - The PhiNodes we're adding.  That map is used to simplify
-    /// some Phi nodes as we iterate over it, so it should have deterministic
-    /// iterators.  We could use a MapVector, but since we already maintain a
-    /// map from BasicBlock* to a stable numbering (BBNumbers), the DenseMap is
-    /// more efficient (also supports removal).
-    ///
-    DenseMap<std::pair<unsigned, unsigned>, PHINode*> NewPhiNodes;
-    
-    /// PhiToAllocaMap - For each PHI node, keep track of which entry in Allocas
-    /// it corresponds to.
-    DenseMap<PHINode*, unsigned> PhiToAllocaMap;
-    
-    /// PointerAllocaValues - If we are updating an AliasSetTracker, then for
-    /// each alloca that is of pointer type, we keep track of what to copyValue
-    /// to the inserted PHI nodes here.
-    ///
-    std::vector<Value*> PointerAllocaValues;
-
-    /// AllocaDbgDeclares - For each alloca, we keep track of the dbg.declare
-    /// intrinsic that describes it, if any, so that we can convert it to a
-    /// dbg.value intrinsic if the alloca gets promoted.
-    SmallVector<DbgDeclareInst*, 8> AllocaDbgDeclares;
-
-    /// Visited - The set of basic blocks the renamer has already visited.
-    ///
-    SmallPtrSet<BasicBlock*, 16> Visited;
-
-    /// BBNumbers - Contains a stable numbering of basic blocks to avoid
-    /// non-determinstic behavior.
-    DenseMap<BasicBlock*, unsigned> BBNumbers;
-
-    /// DomLevels - Maps DomTreeNodes to their level in the dominator tree.
-    DenseMap<DomTreeNode*, unsigned> DomLevels;
-
-    /// BBNumPreds - Lazily compute the number of predecessors a block has.
-    DenseMap<const BasicBlock*, unsigned> BBNumPreds;
-  public:
-    PromoteMem2Reg(const std::vector<AllocaInst*> &A, DominatorTree &dt,
-                   AliasSetTracker *ast)
-      : Allocas(A), DT(dt), DIB(0), AST(ast) {}
-    ~PromoteMem2Reg() {
-      delete DIB;
-    }
 
-    void run();
+    // Scan the whole block to get the instruction.  This accumulates
+    // information for every interesting instruction in the block, in order to
+    // avoid gratuitus rescans.
+    const BasicBlock *BB = I->getParent();
+    unsigned InstNo = 0;
+    for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end(); BBI != E;
+         ++BBI)
+      if (isInterestingInstruction(BBI))
+        InstNumbers[BBI] = InstNo++;
+    It = InstNumbers.find(I);
+
+    assert(It != InstNumbers.end() && "Didn't insert instruction?");
+    return It->second;
+  }
 
-    /// dominates - Return true if BB1 dominates BB2 using the DominatorTree.
-    ///
-    bool dominates(BasicBlock *BB1, BasicBlock *BB2) const {
-      return DT.dominates(BB1, BB2);
-    }
+  void deleteValue(const Instruction *I) { InstNumbers.erase(I); }
 
-  private:
-    void RemoveFromAllocasList(unsigned &AllocaIdx) {
-      Allocas[AllocaIdx] = Allocas.back();
-      Allocas.pop_back();
-      --AllocaIdx;
-    }
+  void clear() { InstNumbers.clear(); }
+};
+
+struct PromoteMem2Reg {
+  /// The alloca instructions being promoted.
+  std::vector<AllocaInst *> Allocas;
+  DominatorTree &DT;
+  DIBuilder DIB;
+  const DataLayout *DL;
+
+  /// An AliasSetTracker object to update.  If null, don't update it.
+  AliasSetTracker *AST;
+
+  /// Reverse mapping of Allocas.
+  DenseMap<AllocaInst *, unsigned> AllocaLookup;
+
+  /// \brief The PhiNodes we're adding.
+  ///
+  /// That map is used to simplify some Phi nodes as we iterate over it, so
+  /// it should have deterministic iterators.  We could use a MapVector, but
+  /// since we already maintain a map from BasicBlock* to a stable numbering
+  /// (BBNumbers), the DenseMap is more efficient (also supports removal).
+  DenseMap<std::pair<unsigned, unsigned>, PHINode *> NewPhiNodes;
+
+  /// For each PHI node, keep track of which entry in Allocas it corresponds
+  /// to.
+  DenseMap<PHINode *, unsigned> PhiToAllocaMap;
+
+  /// If we are updating an AliasSetTracker, then for each alloca that is of
+  /// pointer type, we keep track of what to copyValue to the inserted PHI
+  /// nodes here.
+  std::vector<Value *> PointerAllocaValues;
+
+  /// For each alloca, we keep track of the dbg.declare intrinsic that
+  /// describes it, if any, so that we can convert it to a dbg.value
+  /// intrinsic if the alloca gets promoted.
+  SmallVector<DbgDeclareInst *, 8> AllocaDbgDeclares;
+
+  /// The set of basic blocks the renamer has already visited.
+  ///
+  SmallPtrSet<BasicBlock *, 16> Visited;
+
+  /// Contains a stable numbering of basic blocks to avoid non-determinstic
+  /// behavior.
+  DenseMap<BasicBlock *, unsigned> BBNumbers;
+
+  /// Maps DomTreeNodes to their level in the dominator tree.
+  DenseMap<DomTreeNode *, unsigned> DomLevels;
+
+  /// Lazily compute the number of predecessors a block has.
+  DenseMap<const BasicBlock *, unsigned> BBNumPreds;
+
+public:
+  PromoteMem2Reg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
+                 const DataLayout *DL, AliasSetTracker *AST)
+      : Allocas(Allocas.begin(), Allocas.end()), DT(DT),
+        DIB(*DT.getRoot()->getParent()->getParent()), DL(DL), AST(AST) {}
+
+  void run();
+
+private:
+  void RemoveFromAllocasList(unsigned &AllocaIdx) {
+    Allocas[AllocaIdx] = Allocas.back();
+    Allocas.pop_back();
+    --AllocaIdx;
+  }
+
+  unsigned getNumPreds(const BasicBlock *BB) {
+    unsigned &NP = BBNumPreds[BB];
+    if (NP == 0)
+      NP = std::distance(pred_begin(BB), pred_end(BB)) + 1;
+    return NP - 1;
+  }
 
-    unsigned getNumPreds(const BasicBlock *BB) {
-      unsigned &NP = BBNumPreds[BB];
-      if (NP == 0)
-        NP = std::distance(pred_begin(BB), pred_end(BB))+1;
-      return NP-1;
+  void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
+                               AllocaInfo &Info);
+  void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
+                           const SmallPtrSet<BasicBlock *, 32> &DefBlocks,
+                           SmallPtrSet<BasicBlock *, 32> &LiveInBlocks);
+  void RenamePass(BasicBlock *BB, BasicBlock *Pred,
+                  RenamePassData::ValVector &IncVals,
+                  std::vector<RenamePassData> &Worklist);
+  bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version);
+};
+
+} // end of anonymous namespace
+
+/// \brief Walk a small vector of dead instructions and recursively remove them
+/// and subsequently dead instructions.
+///
+/// This is only valid to call on dead instructions using an alloca which is
+/// promotable, as we leverage that assumption to delete them faster.
+static void removeDeadInstructions(AllocaInst *AI,
+                                   SmallVectorImpl<Instruction *> &DeadInsts) {
+  while (!DeadInsts.empty()) {
+    Instruction *I = DeadInsts.pop_back_val();
+
+    // Don't delete the alloca itself.
+    if (I == AI)
+      continue;
+
+    // Note that we open code the deletion algorithm here because we know
+    // apriori that all of the instructions using an alloca that reaches here
+    // are trivially dead when their use list becomes empty (The only risk are
+    // lifetime markers which we specifically want to nuke). By coding it here
+    // we can skip the triviality test and be more efficient.
+    //
+    // Null out all of the instruction's operands to see if any operand becomes
+    // dead as we go.
+    for (User::op_iterator OI = I->op_begin(), OE = I->op_end(); OI != OE;
+         ++OI) {
+      Instruction *Op = dyn_cast<Instruction>(*OI);
+      if (!Op)
+        continue;
+
+      OI->set(0);
+      if (!Op->use_empty())
+        continue;
+
+      DeadInsts.push_back(Op);
     }
+    I->eraseFromParent();
+  }
+}
+
+/// \brief Rewrite as many loads as possible given a single store.
+///
+/// When there is only a single store, we can use the domtree to trivially
+/// replace all of the dominated loads with the stored value. Do so, and return
+/// true if this has successfully promoted the alloca entirely. If this returns
+/// false there were some loads which were not dominated by the single store
+/// and thus must be phi-ed with undef. We fall back to the standard alloca
+/// promotion algorithm in that case.
+static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
+                                     LargeBlockInfo &LBI,
+                                     DominatorTree &DT,
+                                     AliasSetTracker *AST) {
+  StoreInst *OnlyStore = Info.OnlyStore;
+  bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0));
+  BasicBlock *StoreBB = OnlyStore->getParent();
+  int StoreIndex = -1;
+
+  // Clear out UsingBlocks.  We will reconstruct it here if needed.
+  Info.UsingBlocks.clear();
 
-    void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
-                                 AllocaInfo &Info);
-    void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, 
-                             const SmallPtrSet<BasicBlock*, 32> &DefBlocks,
-                             SmallPtrSet<BasicBlock*, 32> &LiveInBlocks);
-    
-    void RewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
-                                  LargeBlockInfo &LBI);
-    void PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info,
-                                  LargeBlockInfo &LBI);
-    
-    void RenamePass(BasicBlock *BB, BasicBlock *Pred,
-                    RenamePassData::ValVector &IncVals,
-                    std::vector<RenamePassData> &Worklist);
-    bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version);
-  };
-  
-  struct AllocaInfo {
-    SmallVector<BasicBlock*, 32> DefiningBlocks;
-    SmallVector<BasicBlock*, 32> UsingBlocks;
-    
-    StoreInst  *OnlyStore;
-    BasicBlock *OnlyBlock;
-    bool OnlyUsedInOneBlock;
-    
-    Value *AllocaPointerVal;
-    DbgDeclareInst *DbgDeclare;
-    
-    void clear() {
-      DefiningBlocks.clear();
-      UsingBlocks.clear();
-      OnlyStore = 0;
-      OnlyBlock = 0;
-      OnlyUsedInOneBlock = true;
-      AllocaPointerVal = 0;
-      DbgDeclare = 0;
+  for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) {
+    Instruction *UserInst = cast<Instruction>(*UI++);
+    if (!isa<LoadInst>(UserInst)) {
+      assert(UserInst == OnlyStore && "Should only have load/stores");
+      continue;
     }
-    
-    /// AnalyzeAlloca - Scan the uses of the specified alloca, filling in our
-    /// ivars.
-    void AnalyzeAlloca(AllocaInst *AI) {
-      clear();
-
-      // As we scan the uses of the alloca instruction, keep track of stores,
-      // and decide whether all of the loads and stores to the alloca are within
-      // the same basic block.
-      for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
-           UI != E;)  {
-        Instruction *User = cast<Instruction>(*UI++);
-
-        if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
-          // Remember the basic blocks which define new values for the alloca
-          DefiningBlocks.push_back(SI->getParent());
-          AllocaPointerVal = SI->getOperand(0);
-          OnlyStore = SI;
-        } else {
-          LoadInst *LI = cast<LoadInst>(User);
-          // Otherwise it must be a load instruction, keep track of variable
-          // reads.
-          UsingBlocks.push_back(LI->getParent());
-          AllocaPointerVal = LI;
-        }
-        
-        if (OnlyUsedInOneBlock) {
-          if (OnlyBlock == 0)
-            OnlyBlock = User->getParent();
-          else if (OnlyBlock != User->getParent())
-            OnlyUsedInOneBlock = false;
+    LoadInst *LI = cast<LoadInst>(UserInst);
+
+    // Okay, if we have a load from the alloca, we want to replace it with the
+    // only value stored to the alloca.  We can do this if the value is
+    // dominated by the store.  If not, we use the rest of the mem2reg machinery
+    // to insert the phi nodes as needed.
+    if (!StoringGlobalVal) { // Non-instructions are always dominated.
+      if (LI->getParent() == StoreBB) {
+        // If we have a use that is in the same block as the store, compare the
+        // indices of the two instructions to see which one came first.  If the
+        // load came before the store, we can't handle it.
+        if (StoreIndex == -1)
+          StoreIndex = LBI.getInstructionIndex(OnlyStore);
+
+        if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) {
+          // Can't handle this load, bail out.
+          Info.UsingBlocks.push_back(StoreBB);
+          continue;
         }
+
+      } else if (LI->getParent() != StoreBB &&
+                 !DT.dominates(StoreBB, LI->getParent())) {
+        // If the load and store are in different blocks, use BB dominance to
+        // check their relationships.  If the store doesn't dom the use, bail
+        // out.
+        Info.UsingBlocks.push_back(LI->getParent());
+        continue;
       }
-      
-      DbgDeclare = FindAllocaDbgDeclare(AI);
     }
-  };
 
-  typedef std::pair<DomTreeNode*, unsigned> DomTreeNodePair;
+    // Otherwise, we *can* safely rewrite this load.
+    Value *ReplVal = OnlyStore->getOperand(0);
+    // If the replacement value is the load, this must occur in unreachable
+    // code.
+    if (ReplVal == LI)
+      ReplVal = UndefValue::get(LI->getType());
+    LI->replaceAllUsesWith(ReplVal);
+    if (AST && LI->getType()->isPointerTy())
+      AST->deleteValue(LI);
+    LI->eraseFromParent();
+    LBI.deleteValue(LI);
+  }
+
+  // Finally, after the scan, check to see if the store is all that is left.
+  if (!Info.UsingBlocks.empty())
+    return false; // If not, we'll have to fall back for the remainder.
 
-  struct DomTreeNodeCompare {
-    bool operator()(const DomTreeNodePair &LHS, const DomTreeNodePair &RHS) {
-      return LHS.second < RHS.second;
-    }
-  };
-}  // end of anonymous namespace
-
-static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
-  // Knowing that this alloca is promotable, we know that it's safe to kill all
-  // instructions except for load and store.
-
-  for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end();
-       UI != UE;) {
-    Instruction *I = cast<Instruction>(*UI);
-    ++UI;
-    if (isa<LoadInst>(I) || isa<StoreInst>(I))
+  // Record debuginfo for the store and remove the declaration's
+  // debuginfo.
+  if (DbgDeclareInst *DDI = Info.DbgDeclare) {
+    DIBuilder DIB(*AI->getParent()->getParent()->getParent());
+    ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, DIB);
+    DDI->eraseFromParent();
+  }
+  // Remove the (now dead) store and alloca.
+  Info.OnlyStore->eraseFromParent();
+  LBI.deleteValue(Info.OnlyStore);
+
+  if (AST)
+    AST->deleteValue(AI);
+  AI->eraseFromParent();
+  LBI.deleteValue(AI);
+  return true;
+}
+
+namespace {
+/// This is a helper predicate used to search by the first element of a pair.
+struct StoreIndexSearchPredicate {
+  bool operator()(const std::pair<unsigned, StoreInst *> &LHS,
+                  const std::pair<unsigned, StoreInst *> &RHS) {
+    return LHS.first < RHS.first;
+  }
+};
+}
+
+/// Many allocas are only used within a single basic block.  If this is the
+/// case, avoid traversing the CFG and inserting a lot of potentially useless
+/// PHI nodes by just performing a single linear pass over the basic block
+/// using the Alloca.
+///
+/// If we cannot promote this alloca (because it is read before it is written),
+/// return true.  This is necessary in cases where, due to control flow, the
+/// alloca is potentially undefined on some control flow paths.  e.g. code like
+/// this is potentially correct:
+///
+///   for (...) { if (c) { A = undef; undef = B; } }
+///
+/// ... so long as A is not used before undef is set.
+static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
+                                     LargeBlockInfo &LBI,
+                                     AliasSetTracker *AST) {
+  // The trickiest case to handle is when we have large blocks. Because of this,
+  // this code is optimized assuming that large blocks happen.  This does not
+  // significantly pessimize the small block case.  This uses LargeBlockInfo to
+  // make it efficient to get the index of various operations in the block.
+
+  // Walk the use-def list of the alloca, getting the locations of all stores.
+  typedef SmallVector<std::pair<unsigned, StoreInst *>, 64> StoresByIndexTy;
+  StoresByIndexTy StoresByIndex;
+
+  for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;
+       ++UI)
+    if (StoreInst *SI = dyn_cast<StoreInst>(*UI))
+      StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI));
+
+  // Sort the stores by their index, making it efficient to do a lookup with a
+  // binary search.
+  std::sort(StoresByIndex.begin(), StoresByIndex.end(),
+            StoreIndexSearchPredicate());
+
+  // Walk all of the loads from this alloca, replacing them with the nearest
+  // store above them, if any.
+  for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) {
+    LoadInst *LI = dyn_cast<LoadInst>(*UI++);
+    if (!LI)
       continue;
 
-    if (!I->getType()->isVoidTy()) {
-      // The only users of this bitcast/GEP instruction are lifetime intrinsics.
-      // Follow the use/def chain to erase them now instead of leaving it for
-      // dead code elimination later.
-      for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
-           UI != UE;) {
-        Instruction *Inst = cast<Instruction>(*UI);
-        ++UI;
-        Inst->eraseFromParent();
-      }
+    unsigned LoadIdx = LBI.getInstructionIndex(LI);
+
+    // Find the nearest store that has a lower index than this load.
+    StoresByIndexTy::iterator I =
+        std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(),
+                         std::make_pair(LoadIdx, static_cast<StoreInst *>(0)),
+                         StoreIndexSearchPredicate());
+
+    if (I == StoresByIndex.begin())
+      // If there is no store before this load, the load takes the undef value.
+      LI->replaceAllUsesWith(UndefValue::get(LI->getType()));
+    else
+      // Otherwise, there was a store before this load, the load takes its value.
+      LI->replaceAllUsesWith(llvm::prior(I)->second->getOperand(0));
+
+    if (AST && LI->getType()->isPointerTy())
+      AST->deleteValue(LI);
+    LI->eraseFromParent();
+    LBI.deleteValue(LI);
+  }
+
+  // Remove the (now dead) stores and alloca.
+  while (!AI->use_empty()) {
+    StoreInst *SI = cast<StoreInst>(AI->use_back());
+    // Record debuginfo for the store before removing it.
+    if (DbgDeclareInst *DDI = Info.DbgDeclare) {
+      DIBuilder DIB(*AI->getParent()->getParent()->getParent());
+      ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
     }
-    I->eraseFromParent();
+    SI->eraseFromParent();
+    LBI.deleteValue(SI);
   }
+
+  if (AST)
+    AST->deleteValue(AI);
+  AI->eraseFromParent();
+  LBI.deleteValue(AI);
+
+  // The alloca's debuginfo can be removed as well.
+  if (DbgDeclareInst *DDI = Info.DbgDeclare)
+    DDI->eraseFromParent();
+
+  ++NumLocalPromoted;
 }
 
 void PromoteMem2Reg::run() {
   Function &F = *DT.getRoot()->getParent();
 
-  if (AST) PointerAllocaValues.resize(Allocas.size());
+  if (AST)
+    PointerAllocaValues.resize(Allocas.size());
   AllocaDbgDeclares.resize(Allocas.size());
 
-  AllocaInfo Info;
+  AllocaInfo Info(DL);
   LargeBlockInfo LBI;
 
   for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) {
     AllocaInst *AI = Allocas[AllocaNum];
 
-    assert(isAllocaPromotable(AI) &&
-           "Cannot promote non-promotable alloca!");
     assert(AI->getParent()->getParent() == &F &&
            "All allocas should be in the same function, which is same as DF!");
 
-    removeLifetimeIntrinsicUsers(AI);
+    // Calculate the set of read and write-locations for each alloca.  This is
+    // analogous to finding the 'uses' and 'definitions' of each variable.
+    bool Good = Info.analyzeAlloca(*AI);
+    (void)Good;
+    assert(Good && "Cannot promote non-promotable alloca!");
+
+    // Nuke all of the dead instructions.
+    removeDeadInstructions(AI, Info.DeadInsts);
 
     if (AI->use_empty()) {
       // If there are no uses of the alloca, just delete it now.
-      if (AST) AST->deleteValue(AI);
+      if (AST)
+        AST->deleteValue(AI);
       AI->eraseFromParent();
 
       // Remove the alloca from the Allocas list, since it has been processed
@@ -417,83 +619,31 @@ void PromoteMem2Reg::run() {
       ++NumDeadAlloca;
       continue;
     }
-    
-    // Calculate the set of read and write-locations for each alloca.  This is
-    // analogous to finding the 'uses' and 'definitions' of each variable.
-    Info.AnalyzeAlloca(AI);
 
     // If there is only a single store to this value, replace any loads of
     // it that are directly dominated by the definition with the value stored.
     if (Info.DefiningBlocks.size() == 1) {
-      RewriteSingleStoreAlloca(AI, Info, LBI);
-
-      // Finally, after the scan, check to see if the store is all that is left.
-      if (Info.UsingBlocks.empty()) {
-        // Record debuginfo for the store and remove the declaration's 
-        // debuginfo.
-        if (DbgDeclareInst *DDI = Info.DbgDeclare) {
-          if (!DIB)
-            DIB = new DIBuilder(*DDI->getParent()->getParent()->getParent());
-          ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, *DIB);
-          DDI->eraseFromParent();
-        }
-        // Remove the (now dead) store and alloca.
-        Info.OnlyStore->eraseFromParent();
-        LBI.deleteValue(Info.OnlyStore);
-
-        if (AST) AST->deleteValue(AI);
-        AI->eraseFromParent();
-        LBI.deleteValue(AI);
-        
+      if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AST)) {
         // The alloca has been processed, move on.
         RemoveFromAllocasList(AllocaNum);
-        
         ++NumSingleStore;
         continue;
       }
     }
-    
+
     // If the alloca is only read and written in one basic block, just perform a
     // linear sweep over the block to eliminate it.
     if (Info.OnlyUsedInOneBlock) {
-      PromoteSingleBlockAlloca(AI, Info, LBI);
-      
-      // Finally, after the scan, check to see if the stores are all that is
-      // left.
-      if (Info.UsingBlocks.empty()) {
-        
-        // Remove the (now dead) stores and alloca.
-        while (!AI->use_empty()) {
-          StoreInst *SI = cast<StoreInst>(AI->use_back());
-          // Record debuginfo for the store before removing it.
-          if (DbgDeclareInst *DDI = Info.DbgDeclare) {
-            if (!DIB)
-              DIB = new DIBuilder(*SI->getParent()->getParent()->getParent());
-            ConvertDebugDeclareToDebugValue(DDI, SI, *DIB);
-          }
-          SI->eraseFromParent();
-          LBI.deleteValue(SI);
-        }
-        
-        if (AST) AST->deleteValue(AI);
-        AI->eraseFromParent();
-        LBI.deleteValue(AI);
-        
-        // The alloca has been processed, move on.
-        RemoveFromAllocasList(AllocaNum);
-        
-        // The alloca's debuginfo can be removed as well.
-        if (DbgDeclareInst *DDI = Info.DbgDeclare)
-          DDI->eraseFromParent();
+      promoteSingleBlockAlloca(AI, Info, LBI, AST);
 
-        ++NumLocalPromoted;
-        continue;
-      }
+      // The alloca has been processed, move on.
+      RemoveFromAllocasList(AllocaNum);
+      continue;
     }
 
     // If we haven't computed dominator tree levels, do so now.
     if (DomLevels.empty()) {
-      SmallVector<DomTreeNode*, 32> Worklist;
+      SmallVector<DomTreeNode *, 32> Worklist;
 
       DomTreeNode *Root = DT.getRootNode();
       DomLevels[Root] = 0;
@@ -522,10 +672,11 @@ void PromoteMem2Reg::run() {
     // stored into the alloca.
     if (AST)
       PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal;
-      
+
     // Remember the dbg.declare intrinsic describing this alloca, if any.
-    if (Info.DbgDeclare) AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare;
-    
+    if (Info.DbgDeclare)
+      AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare;
+
     // Keep the reverse mapping of the 'Allocas' array for the rename pass.
     AllocaLookup[Allocas[AllocaNum]] = AllocaNum;
 
@@ -540,8 +691,7 @@ void PromoteMem2Reg::run() {
     return; // All of the allocas must have been trivial!
 
   LBI.clear();
-  
-  
+
   // Set the incoming values for the basic block to be null values for all of
   // the alloca's.  We do this in case there is a load of a value that has not
   // been stored yet.  In this case, it will get this null value.
@@ -562,7 +712,7 @@ void PromoteMem2Reg::run() {
     // RenamePass may add new worklist entries.
     RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList);
   } while (!RenamePassWorkList.empty());
-  
+
   // The renamer uses the Visited set to avoid infinite loops.  Clear it now.
   Visited.clear();
 
@@ -575,7 +725,8 @@ void PromoteMem2Reg::run() {
     // tree. Just delete the users now.
     if (!A->use_empty())
       A->replaceAllUsesWith(UndefValue::get(A->getType()));
-    if (AST) AST->deleteValue(A);
+    if (AST)
+      AST->deleteValue(A);
     A->eraseFromParent();
   }
 
@@ -591,13 +742,15 @@ void PromoteMem2Reg::run() {
   bool EliminatedAPHI = true;
   while (EliminatedAPHI) {
     EliminatedAPHI = false;
-    
+
     // Iterating over NewPhiNodes is deterministic, so it is safe to try to
     // simplify and RAUW them as we go.  If it was not, we could add uses to
     // the values we replace with in a non deterministic order, thus creating
     // non deterministic def->use chains.
-    for (DenseMap<std::pair<unsigned, unsigned>, PHINode*>::iterator I =
-           NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) {
+    for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator
+             I = NewPhiNodes.begin(),
+             E = NewPhiNodes.end();
+         I != E;) {
       PHINode *PN = I->second;
 
       // If this PHI node merges one value and/or undefs, get the value.
@@ -613,15 +766,17 @@ void PromoteMem2Reg::run() {
       ++I;
     }
   }
-  
+
   // At this point, the renamer has added entries to PHI nodes for all reachable
   // code.  Unfortunately, there may be unreachable blocks which the renamer
   // hasn't traversed.  If this is the case, the PHI nodes may not
   // have incoming values for all predecessors.  Loop over all PHI nodes we have
   // created, inserting undef values if they are missing any incoming values.
   //
-  for (DenseMap<std::pair<unsigned, unsigned>, PHINode*>::iterator I =
-         NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) {
+  for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator
+           I = NewPhiNodes.begin(),
+           E = NewPhiNodes.end();
+       I != E; ++I) {
     // We want to do this once per basic block.  As such, only process a block
     // when we find the PHI that is the first entry in the block.
     PHINode *SomePHI = I->second;
@@ -636,21 +791,20 @@ void PromoteMem2Reg::run() {
       continue;
 
     // Get the preds for BB.
-    SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB));
-    
+    SmallVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
+
     // Ok, now we know that all of the PHI nodes are missing entries for some
     // basic blocks.  Start by sorting the incoming predecessors for efficient
     // access.
     std::sort(Preds.begin(), Preds.end());
-    
+
     // Now we loop through all BB's which have entries in SomePHI and remove
     // them from the Preds list.
     for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) {
       // Do a log(n) search of the Preds list for the entry we want.
-      SmallVector<BasicBlock*, 16>::iterator EntIt =
-        std::lower_bound(Preds.begin(), Preds.end(),
-                         SomePHI->getIncomingBlock(i));
-      assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i)&&
+      SmallVectorImpl<BasicBlock *>::iterator EntIt = std::lower_bound(
+          Preds.begin(), Preds.end(), SomePHI->getIncomingBlock(i));
+      assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i) &&
              "PHI node has entry for a block which is not a predecessor!");
 
       // Remove the entry
@@ -670,39 +824,41 @@ void PromoteMem2Reg::run() {
         SomePHI->addIncoming(UndefVal, Preds[pred]);
     }
   }
-        
+
   NewPhiNodes.clear();
 }
 
+/// \brief Determine which blocks the value is live in.
+///
+/// These are blocks which lead to uses.  Knowing this allows us to avoid
+/// inserting PHI nodes into blocks which don't lead to uses (thus, the
+/// inserted phi nodes would be dead).
+void PromoteMem2Reg::ComputeLiveInBlocks(
+    AllocaInst *AI, AllocaInfo &Info,
+    const SmallPtrSet<BasicBlock *, 32> &DefBlocks,
+    SmallPtrSet<BasicBlock *, 32> &LiveInBlocks) {
 
-/// ComputeLiveInBlocks - Determine which blocks the value is live in.  These
-/// are blocks which lead to uses.  Knowing this allows us to avoid inserting
-/// PHI nodes into blocks which don't lead to uses (thus, the inserted phi nodes
-/// would be dead).
-void PromoteMem2Reg::
-ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, 
-                    const SmallPtrSet<BasicBlock*, 32> &DefBlocks,
-                    SmallPtrSet<BasicBlock*, 32> &LiveInBlocks) {
-  
   // To determine liveness, we must iterate through the predecessors of blocks
   // where the def is live.  Blocks are added to the worklist if we need to
   // check their predecessors.  Start with all the using blocks.
-  SmallVector<BasicBlock*, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(),
-                                                   Info.UsingBlocks.end());
-  
+  SmallVector<BasicBlock *, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(),
+                                                    Info.UsingBlocks.end());
+
   // If any of the using blocks is also a definition block, check to see if the
   // definition occurs before or after the use.  If it happens before the use,
   // the value isn't really live-in.
   for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) {
     BasicBlock *BB = LiveInBlockWorklist[i];
-    if (!DefBlocks.count(BB)) continue;
-    
+    if (!DefBlocks.count(BB))
+      continue;
+
     // Okay, this is a block that both uses and defines the value.  If the first
     // reference to the alloca is a def (store), then we know it isn't live-in.
-    for (BasicBlock::iterator I = BB->begin(); ; ++I) {
+    for (BasicBlock::iterator I = BB->begin();; ++I) {
       if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
-        if (SI->getOperand(1) != AI) continue;
-        
+        if (SI->getOperand(1) != AI)
+          continue;
+
         // We found a store to the alloca before a load.  The alloca is not
         // actually live-in here.
         LiveInBlockWorklist[i] = LiveInBlockWorklist.back();
@@ -710,73 +866,86 @@ ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
         --i, --e;
         break;
       }
-      
+
       if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
-        if (LI->getOperand(0) != AI) continue;
-        
+        if (LI->getOperand(0) != AI)
+          continue;
+
         // Okay, we found a load before a store to the alloca.  It is actually
         // live into this block.
         break;
       }
     }
   }
-  
+
   // Now that we have a set of blocks where the phi is live-in, recursively add
   // their predecessors until we find the full region the value is live.
   while (!LiveInBlockWorklist.empty()) {
     BasicBlock *BB = LiveInBlockWorklist.pop_back_val();
-    
+
     // The block really is live in here, insert it into the set.  If already in
     // the set, then it has already been processed.
     if (!LiveInBlocks.insert(BB))
       continue;
-    
+
     // Since the value is live into BB, it is either defined in a predecessor or
     // live into it to.  Add the preds to the worklist unless they are a
     // defining block.
     for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
       BasicBlock *P = *PI;
-      
+
       // The value is not live into a predecessor if it defines the value.
       if (DefBlocks.count(P))
         continue;
-      
+
       // Otherwise it is, add to the worklist.
       LiveInBlockWorklist.push_back(P);
     }
   }
 }
 
-/// DetermineInsertionPoint - At this point, we're committed to promoting the
-/// alloca using IDF's, and the standard SSA construction algorithm.  Determine
-/// which blocks need phi nodes and see if we can optimize out some work by
-/// avoiding insertion of dead phi nodes.
+namespace {
+typedef std::pair<DomTreeNode *, unsigned> DomTreeNodePair;
+
+struct DomTreeNodeCompare {
+  bool operator()(const DomTreeNodePair &LHS, const DomTreeNodePair &RHS) {
+    return LHS.second < RHS.second;
+  }
+};
+} // end anonymous namespace
+
+/// At this point, we're committed to promoting the alloca using IDF's, and the
+/// standard SSA construction algorithm.  Determine which blocks need phi nodes
+/// and see if we can optimize out some work by avoiding insertion of dead phi
+/// nodes.
 void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
                                              AllocaInfo &Info) {
   // Unique the set of defining blocks for efficient lookup.
-  SmallPtrSet<BasicBlock*, 32> DefBlocks;
+  SmallPtrSet<BasicBlock *, 32> DefBlocks;
   DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end());
 
   // Determine which blocks the value is live in.  These are blocks which lead
   // to uses.
-  SmallPtrSet<BasicBlock*, 32> LiveInBlocks;
+  SmallPtrSet<BasicBlock *, 32> LiveInBlocks;
   ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks);
 
   // Use a priority queue keyed on dominator tree level so that inserted nodes
   // are handled from the bottom of the dominator tree upwards.
-  typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>,
+  typedef std::priority_queue<DomTreeNodePair,
+                              SmallVector<DomTreeNodePair, 32>,
                               DomTreeNodeCompare> IDFPriorityQueue;
   IDFPriorityQueue PQ;
 
-  for (SmallPtrSet<BasicBlock*, 32>::const_iterator I = DefBlocks.begin(),
-       E = DefBlocks.end(); I != E; ++I) {
+  for (SmallPtrSet<BasicBlock *, 32>::const_iterator I = DefBlocks.begin(),
+                                                     E = DefBlocks.end();
+       I != E; ++I) {
     if (DomTreeNode *Node = DT.getNode(*I))
       PQ.push(std::make_pair(Node, DomLevels[Node]));
   }
 
-  SmallVector<std::pair<unsigned, BasicBlock*>, 32> DFBlocks;
-  SmallPtrSet<DomTreeNode*, 32> Visited;
-  SmallVector<DomTreeNode*, 32> Worklist;
+  SmallVector<std::pair<unsigned, BasicBlock *>, 32> DFBlocks;
+  SmallPtrSet<DomTreeNode *, 32> Visited;
+  SmallVector<DomTreeNode *, 32> Worklist;
   while (!PQ.empty()) {
     DomTreeNodePair RootPair = PQ.top();
     PQ.pop();
@@ -836,179 +1005,22 @@ void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
     QueuePhiNode(DFBlocks[i].second, AllocaNum, CurrentVersion);
 }
 
-/// RewriteSingleStoreAlloca - If there is only a single store to this value,
-/// replace any loads of it that are directly dominated by the definition with
-/// the value stored.
-void PromoteMem2Reg::RewriteSingleStoreAlloca(AllocaInst *AI,
-                                              AllocaInfo &Info,
-                                              LargeBlockInfo &LBI) {
-  StoreInst *OnlyStore = Info.OnlyStore;
-  bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0));
-  BasicBlock *StoreBB = OnlyStore->getParent();
-  int StoreIndex = -1;
-
-  // Clear out UsingBlocks.  We will reconstruct it here if needed.
-  Info.UsingBlocks.clear();
-  
-  for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ) {
-    Instruction *UserInst = cast<Instruction>(*UI++);
-    if (!isa<LoadInst>(UserInst)) {
-      assert(UserInst == OnlyStore && "Should only have load/stores");
-      continue;
-    }
-    LoadInst *LI = cast<LoadInst>(UserInst);
-    
-    // Okay, if we have a load from the alloca, we want to replace it with the
-    // only value stored to the alloca.  We can do this if the value is
-    // dominated by the store.  If not, we use the rest of the mem2reg machinery
-    // to insert the phi nodes as needed.
-    if (!StoringGlobalVal) {  // Non-instructions are always dominated.
-      if (LI->getParent() == StoreBB) {
-        // If we have a use that is in the same block as the store, compare the
-        // indices of the two instructions to see which one came first.  If the
-        // load came before the store, we can't handle it.
-        if (StoreIndex == -1)
-          StoreIndex = LBI.getInstructionIndex(OnlyStore);
-
-        if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) {
-          // Can't handle this load, bail out.
-          Info.UsingBlocks.push_back(StoreBB);
-          continue;
-        }
-        
-      } else if (LI->getParent() != StoreBB &&
-                 !dominates(StoreBB, LI->getParent())) {
-        // If the load and store are in different blocks, use BB dominance to
-        // check their relationships.  If the store doesn't dom the use, bail
-        // out.
-        Info.UsingBlocks.push_back(LI->getParent());
-        continue;
-      }
-    }
-    
-    // Otherwise, we *can* safely rewrite this load.
-    Value *ReplVal = OnlyStore->getOperand(0);
-    // If the replacement value is the load, this must occur in unreachable
-    // code.
-    if (ReplVal == LI)
-      ReplVal = UndefValue::get(LI->getType());
-    LI->replaceAllUsesWith(ReplVal);
-    if (AST && LI->getType()->isPointerTy())
-      AST->deleteValue(LI);
-    LI->eraseFromParent();
-    LBI.deleteValue(LI);
-  }
-}
-
-namespace {
-
-/// StoreIndexSearchPredicate - This is a helper predicate used to search by the
-/// first element of a pair.
-struct StoreIndexSearchPredicate {
-  bool operator()(const std::pair<unsigned, StoreInst*> &LHS,
-                  const std::pair<unsigned, StoreInst*> &RHS) {
-    return LHS.first < RHS.first;
-  }
-};
-
-}
-
-/// PromoteSingleBlockAlloca - Many allocas are only used within a single basic
-/// block.  If this is the case, avoid traversing the CFG and inserting a lot of
-/// potentially useless PHI nodes by just performing a single linear pass over
-/// the basic block using the Alloca.
-///
-/// If we cannot promote this alloca (because it is read before it is written),
-/// return true.  This is necessary in cases where, due to control flow, the
-/// alloca is potentially undefined on some control flow paths.  e.g. code like
-/// this is potentially correct:
-///
-///   for (...) { if (c) { A = undef; undef = B; } }
-///
-/// ... so long as A is not used before undef is set.
+/// \brief Queue a phi-node to be added to a basic-block for a specific Alloca.
 ///
-void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info,
-                                              LargeBlockInfo &LBI) {
-  // The trickiest case to handle is when we have large blocks. Because of this,
-  // this code is optimized assuming that large blocks happen.  This does not
-  // significantly pessimize the small block case.  This uses LargeBlockInfo to
-  // make it efficient to get the index of various operations in the block.
-  
-  // Clear out UsingBlocks.  We will reconstruct it here if needed.
-  Info.UsingBlocks.clear();
-  
-  // Walk the use-def list of the alloca, getting the locations of all stores.
-  typedef SmallVector<std::pair<unsigned, StoreInst*>, 64> StoresByIndexTy;
-  StoresByIndexTy StoresByIndex;
-  
-  for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
-       UI != E; ++UI) 
-    if (StoreInst *SI = dyn_cast<StoreInst>(*UI))
-      StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI));
-
-  // If there are no stores to the alloca, just replace any loads with undef.
-  if (StoresByIndex.empty()) {
-    for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) 
-      if (LoadInst *LI = dyn_cast<LoadInst>(*UI++)) {
-        LI->replaceAllUsesWith(UndefValue::get(LI->getType()));
-        if (AST && LI->getType()->isPointerTy())
-          AST->deleteValue(LI);
-        LBI.deleteValue(LI);
-        LI->eraseFromParent();
-      }
-    return;
-  }
-  
-  // Sort the stores by their index, making it efficient to do a lookup with a
-  // binary search.
-  std::sort(StoresByIndex.begin(), StoresByIndex.end());
-  
-  // Walk all of the loads from this alloca, replacing them with the nearest
-  // store above them, if any.
-  for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) {
-    LoadInst *LI = dyn_cast<LoadInst>(*UI++);
-    if (!LI) continue;
-    
-    unsigned LoadIdx = LBI.getInstructionIndex(LI);
-    
-    // Find the nearest store that has a lower than this load. 
-    StoresByIndexTy::iterator I = 
-      std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(),
-                       std::pair<unsigned, StoreInst*>(LoadIdx, static_cast<StoreInst*>(0)),
-                       StoreIndexSearchPredicate());
-    
-    // If there is no store before this load, then we can't promote this load.
-    if (I == StoresByIndex.begin()) {
-      // Can't handle this load, bail out.
-      Info.UsingBlocks.push_back(LI->getParent());
-      continue;
-    }
-      
-    // Otherwise, there was a store before this load, the load takes its value.
-    --I;
-    LI->replaceAllUsesWith(I->second->getOperand(0));
-    if (AST && LI->getType()->isPointerTy())
-      AST->deleteValue(LI);
-    LI->eraseFromParent();
-    LBI.deleteValue(LI);
-  }
-}
-
-// QueuePhiNode - queues a phi-node to be added to a basic-block for a specific
-// Alloca returns true if there wasn't already a phi-node for that variable
-//
+/// Returns true if there wasn't already a phi-node for that variable
 bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
                                   unsigned &Version) {
   // Look up the basic-block in question.
   PHINode *&PN = NewPhiNodes[std::make_pair(BBNumbers[BB], AllocaNo)];
 
   // If the BB already has a phi node added for the i'th alloca then we're done!
-  if (PN) return false;
+  if (PN)
+    return false;
 
   // Create a PhiNode using the dereferenced type... and add the phi-node to the
   // BasicBlock.
   PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB),
-                       Allocas[AllocaNo]->getName() + "." + Twine(Version++), 
+                       Allocas[AllocaNo]->getName() + "." + Twine(Version++),
                        BB->begin());
   ++NumPHIInsert;
   PhiToAllocaMap[PN] = AllocaNo;
@@ -1019,10 +1031,11 @@ bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
   return true;
 }
 
-// RenamePass - Recursively traverse the CFG of the function, renaming loads and
-// stores to the allocas which we are promoting.  IncomingVals indicates what
-// value each Alloca contains on exit from the predecessor block Pred.
-//
+/// \brief Recursively traverse the CFG of the function, renaming loads and
+/// stores to the allocas which we are promoting.
+///
+/// IncomingVals indicates what value each Alloca contains on exit from the
+/// predecessor block Pred.
 void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred,
                                 RenamePassData::ValVector &IncomingVals,
                                 std::vector<RenamePassData> &Worklist) {
@@ -1040,48 +1053,49 @@ NextIteration:
       // inserted by this pass of mem2reg will have the same number of incoming
       // operands so far.  Remember this count.
       unsigned NewPHINumOperands = APN->getNumOperands();
-      
-      unsigned NumEdges = 0;
-      for (succ_iterator I = succ_begin(Pred), E = succ_end(Pred); I != E; ++I)
-        if (*I == BB)
-          ++NumEdges;
+
+      unsigned NumEdges = std::count(succ_begin(Pred), succ_end(Pred), BB);
       assert(NumEdges && "Must be at least one edge from Pred to BB!");
-      
+
       // Add entries for all the phis.
       BasicBlock::iterator PNI = BB->begin();
       do {
         unsigned AllocaNo = PhiToAllocaMap[APN];
-        
+
         // Add N incoming values to the PHI node.
         for (unsigned i = 0; i != NumEdges; ++i)
           APN->addIncoming(IncomingVals[AllocaNo], Pred);
-        
+
         // The currently active variable for this block is now the PHI.
         IncomingVals[AllocaNo] = APN;
-        
+
         // Get the next phi node.
         ++PNI;
         APN = dyn_cast<PHINode>(PNI);
-        if (APN == 0) break;
-        
+        if (APN == 0)
+          break;
+
         // Verify that it is missing entries.  If not, it is not being inserted
         // by this mem2reg invocation so we want to ignore it.
       } while (APN->getNumOperands() == NewPHINumOperands);
     }
   }
-  
+
   // Don't revisit blocks.
-  if (!Visited.insert(BB)) return;
+  if (!Visited.insert(BB))
+    return;
 
-  for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II); ) {
+  for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II);) {
     Instruction *I = II++; // get the instruction, increment iterator
 
     if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
       AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand());
-      if (!Src) continue;
-  
-      DenseMap<AllocaInst*, unsigned>::iterator AI = AllocaLookup.find(Src);
-      if (AI == AllocaLookup.end()) continue;
+      if (!Src)
+        continue;
+
+      DenseMap<AllocaInst *, unsigned>::iterator AI = AllocaLookup.find(Src);
+      if (AI == AllocaLookup.end())
+        continue;
 
       Value *V = IncomingVals[AI->second];
 
@@ -1094,30 +1108,29 @@ NextIteration:
       // Delete this instruction and mark the name as the current holder of the
       // value
       AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand());
-      if (!Dest) continue;
-      
+      if (!Dest)
+        continue;
+
       DenseMap<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest);
       if (ai == AllocaLookup.end())
         continue;
-      
+
       // what value were we writing?
       IncomingVals[ai->second] = SI->getOperand(0);
       // Record debuginfo for the store before removing it.
-      if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second]) {
-        if (!DIB)
-          DIB = new DIBuilder(*SI->getParent()->getParent()->getParent());
-        ConvertDebugDeclareToDebugValue(DDI, SI, *DIB);
-      }
+      if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second])
+        ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
       BB->getInstList().erase(SI);
     }
   }
 
   // 'Recurse' to our successors.
   succ_iterator I = succ_begin(BB), E = succ_end(BB);
-  if (I == E) return;
+  if (I == E)
+    return;
 
   // Keep track of the successors so we don't visit the same successor twice
-  SmallPtrSet<BasicBlock*, 8> VisitedSuccs;
+  SmallPtrSet<BasicBlock *, 8> VisitedSuccs;
 
   // Handle the first successor without using the worklist.
   VisitedSuccs.insert(*I);
@@ -1132,18 +1145,19 @@ NextIteration:
   goto NextIteration;
 }
 
-/// PromoteMemToReg - Promote the specified list of alloca instructions into
-/// scalar registers, inserting PHI nodes as appropriate.  This function does
-/// not modify the CFG of the function at all.  All allocas must be from the
-/// same function.
-///
-/// If AST is specified, the specified tracker is updated to reflect changes
-/// made to the IR.
-///
-void llvm::PromoteMemToReg(const std::vector<AllocaInst*> &Allocas,
-                           DominatorTree &DT, AliasSetTracker *AST) {
+bool llvm::isAllocaPromotable(const AllocaInst *AI, const DataLayout *DL) {
+  // We cast away constness because we re-use the non-const analysis that the
+  // actual promotion routine uses. While it is non-const, it doesn't actually
+  // mutate anything at this phase, and we discard the non-const results that
+  // promotion uses to mutate the alloca.
+  return AllocaInfo(DL).analyzeAlloca(*const_cast<AllocaInst *>(AI));
+}
+
+void llvm::PromoteMemToReg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
+                           const DataLayout *DL, AliasSetTracker *AST) {
   // If there is nothing to do, bail out...
-  if (Allocas.empty()) return;
+  if (Allocas.empty())
+    return;
 
-  PromoteMem2Reg(Allocas, DT, AST).run();
+  PromoteMem2Reg(Allocas, DT, DL, AST).run();
 }
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index 9d90fbe..fc85ef3 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -42,8 +42,6 @@ SSAUpdater::~SSAUpdater() {
   delete static_cast<AvailableValsTy*>(AV);
 }
 
-/// Initialize - Reset this object to get ready for a new set of SSA
-/// updates with type 'Ty'.  PHI nodes get a name based on 'Name'.
 void SSAUpdater::Initialize(Type *Ty, StringRef Name) {
   if (AV == 0)
     AV = new AvailableValsTy();
@@ -53,14 +51,10 @@ void SSAUpdater::Initialize(Type *Ty, StringRef Name) {
   ProtoName = Name;
 }
 
-/// HasValueForBlock - Return true if the SSAUpdater already has a value for
-/// the specified block.
 bool SSAUpdater::HasValueForBlock(BasicBlock *BB) const {
   return getAvailableVals(AV).count(BB);
 }
 
-/// AddAvailableValue - Indicate that a rewritten value is available in the
-/// specified block with the specified value.
 void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) {
   assert(ProtoType != 0 && "Need to initialize SSAUpdater");
   assert(ProtoType == V->getType() &&
@@ -68,8 +62,6 @@ void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) {
   getAvailableVals(AV)[BB] = V;
 }
 
-/// IsEquivalentPHI - Check if PHI has the same incoming value as specified
-/// in ValueMapping for each predecessor block.
 static bool IsEquivalentPHI(PHINode *PHI,
                             DenseMap<BasicBlock*, Value*> &ValueMapping) {
   unsigned PHINumValues = PHI->getNumIncomingValues();
@@ -86,32 +78,11 @@ static bool IsEquivalentPHI(PHINode *PHI,
   return true;
 }
 
-/// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is
-/// live at the end of the specified block.
 Value *SSAUpdater::GetValueAtEndOfBlock(BasicBlock *BB) {
   Value *Res = GetValueAtEndOfBlockInternal(BB);
   return Res;
 }
 
-/// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that
-/// is live in the middle of the specified block.
-///
-/// GetValueInMiddleOfBlock is the same as GetValueAtEndOfBlock except in one
-/// important case: if there is a definition of the rewritten value after the
-/// 'use' in BB.  Consider code like this:
-///
-///      X1 = ...
-///   SomeBB:
-///      use(X)
-///      X2 = ...
-///      br Cond, SomeBB, OutBB
-///
-/// In this case, there are two values (X1 and X2) added to the AvailableVals
-/// set by the client of the rewriter, and those values are both live out of
-/// their respective blocks.  However, the use of X happens in the *middle* of
-/// a block.  Because of this, we need to insert a new PHI node in SomeBB to
-/// merge the appropriate values, and this value isn't live out of the block.
-///
 Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
   // If there is no definition of the renamed variable in this block, just use
   // GetValueAtEndOfBlock to do our work.
@@ -203,8 +174,6 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
   return InsertedPHI;
 }
 
-/// RewriteUse - Rewrite a use of the symbolic value.  This handles PHI nodes,
-/// which use their value in the corresponding predecessor.
 void SSAUpdater::RewriteUse(Use &U) {
   Instruction *User = cast<Instruction>(U.getUser());
 
@@ -222,10 +191,6 @@ void SSAUpdater::RewriteUse(Use &U) {
   U.set(V);
 }
 
-/// RewriteUseAfterInsertions - Rewrite a use, just like RewriteUse.  However,
-/// this version of the method can rewrite uses in the same block as a
-/// definition, because it assumes that all uses of a value are below any
-/// inserted values.
 void SSAUpdater::RewriteUseAfterInsertions(Use &U) {
   Instruction *User = cast<Instruction>(U.getUser());
   
@@ -238,8 +203,6 @@ void SSAUpdater::RewriteUseAfterInsertions(Use &U) {
   U.set(V);
 }
 
-/// SSAUpdaterTraits<SSAUpdater> - Traits for the SSAUpdaterImpl template,
-/// specialized for SSAUpdater.
 namespace llvm {
 template<>
 class SSAUpdaterTraits<SSAUpdater> {
@@ -342,10 +305,9 @@ public:
 
 } // End llvm namespace
 
-/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry
-/// for the specified BB and if so, return it.  If not, construct SSA form by
-/// first calculating the required placement of PHIs and then inserting new
-/// PHIs where needed.
+/// Check to see if AvailableVals has an entry for the specified BB and if so,
+/// return it.  If not, construct SSA form by first calculating the required
+/// placement of PHIs and then inserting new PHIs where needed.
 Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
   AvailableValsTy &AvailableVals = getAvailableVals(AV);
   if (Value *V = AvailableVals[BB])
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 681bf9c..c4c1423 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -40,12 +40,14 @@
 #include "llvm/Support/ConstantRange.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/NoFolder.h"
+#include "llvm/Support/PatternMatch.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include <algorithm>
 #include <map>
 #include <set>
 using namespace llvm;
+using namespace PatternMatch;
 
 static cl::opt<unsigned>
 PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(1),
@@ -59,6 +61,10 @@ static cl::opt<bool>
 SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
        cl::desc("Sink common instructions down to the end block"));
 
+static cl::opt<bool>
+HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
+       cl::desc("Hoist conditional stores if an unconditional store preceeds"));
+
 STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
 STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables");
 STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block");
@@ -84,7 +90,6 @@ namespace {
 class SimplifyCFGOpt {
   const TargetTransformInfo &TTI;
   const DataLayout *const TD;
-
   Value *isValueEqualityComparison(TerminatorInst *TI);
   BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI,
                                std::vector<ValueEqualityComparisonCase> &Cases);
@@ -190,94 +195,7 @@ static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
     PN->addIncoming(PN->getIncomingValueForBlock(ExistPred), NewPred);
 }
 
-
-/// GetIfCondition - Given a basic block (BB) with two predecessors (and at
-/// least one PHI node in it), check to see if the merge at this block is due
-/// to an "if condition".  If so, return the boolean condition that determines
-/// which entry into BB will be taken.  Also, return by references the block
-/// that will be entered from if the condition is true, and the block that will
-/// be entered if the condition is false.
-///
-/// This does no checking to see if the true/false blocks have large or unsavory
-/// instructions in them.
-static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
-                             BasicBlock *&IfFalse) {
-  PHINode *SomePHI = cast<PHINode>(BB->begin());
-  assert(SomePHI->getNumIncomingValues() == 2 &&
-         "Function can only handle blocks with 2 predecessors!");
-  BasicBlock *Pred1 = SomePHI->getIncomingBlock(0);
-  BasicBlock *Pred2 = SomePHI->getIncomingBlock(1);
-
-  // We can only handle branches.  Other control flow will be lowered to
-  // branches if possible anyway.
-  BranchInst *Pred1Br = dyn_cast<BranchInst>(Pred1->getTerminator());
-  BranchInst *Pred2Br = dyn_cast<BranchInst>(Pred2->getTerminator());
-  if (Pred1Br == 0 || Pred2Br == 0)
-    return 0;
-
-  // Eliminate code duplication by ensuring that Pred1Br is conditional if
-  // either are.
-  if (Pred2Br->isConditional()) {
-    // If both branches are conditional, we don't have an "if statement".  In
-    // reality, we could transform this case, but since the condition will be
-    // required anyway, we stand no chance of eliminating it, so the xform is
-    // probably not profitable.
-    if (Pred1Br->isConditional())
-      return 0;
-
-    std::swap(Pred1, Pred2);
-    std::swap(Pred1Br, Pred2Br);
-  }
-
-  if (Pred1Br->isConditional()) {
-    // The only thing we have to watch out for here is to make sure that Pred2
-    // doesn't have incoming edges from other blocks.  If it does, the condition
-    // doesn't dominate BB.
-    if (Pred2->getSinglePredecessor() == 0)
-      return 0;
-
-    // If we found a conditional branch predecessor, make sure that it branches
-    // to BB and Pred2Br.  If it doesn't, this isn't an "if statement".
-    if (Pred1Br->getSuccessor(0) == BB &&
-        Pred1Br->getSuccessor(1) == Pred2) {
-      IfTrue = Pred1;
-      IfFalse = Pred2;
-    } else if (Pred1Br->getSuccessor(0) == Pred2 &&
-               Pred1Br->getSuccessor(1) == BB) {
-      IfTrue = Pred2;
-      IfFalse = Pred1;
-    } else {
-      // We know that one arm of the conditional goes to BB, so the other must
-      // go somewhere unrelated, and this must not be an "if statement".
-      return 0;
-    }
-
-    return Pred1Br->getCondition();
-  }
-
-  // Ok, if we got here, both predecessors end with an unconditional branch to
-  // BB.  Don't panic!  If both blocks only have a single (identical)
-  // predecessor, and THAT is a conditional branch, then we're all ok!
-  BasicBlock *CommonPred = Pred1->getSinglePredecessor();
-  if (CommonPred == 0 || CommonPred != Pred2->getSinglePredecessor())
-    return 0;
-
-  // Otherwise, if this is a conditional branch, then we can use it!
-  BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator());
-  if (BI == 0) return 0;
-
-  assert(BI->isConditional() && "Two successors but not conditional?");
-  if (BI->getSuccessor(0) == Pred1) {
-    IfTrue = Pred1;
-    IfFalse = Pred2;
-  } else {
-    IfTrue = Pred2;
-    IfFalse = Pred1;
-  }
-  return BI->getCondition();
-}
-
-/// ComputeSpeculuationCost - Compute an abstract "cost" of speculating the
+/// ComputeSpeculationCost - Compute an abstract "cost" of speculating the
 /// given instruction, which is assumed to be safe to speculate. 1 means
 /// cheap, 2 means less cheap, and UINT_MAX means prohibitively expensive.
 static unsigned ComputeSpeculationCost(const User *I) {
@@ -428,7 +346,24 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
   // If this is an icmp against a constant, handle this as one of the cases.
   if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
     if (ConstantInt *C = GetConstantInt(I->getOperand(1), TD)) {
+      Value *RHSVal;
+      ConstantInt *RHSC;
+
       if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ:ICmpInst::ICMP_NE)) {
+        // (x & ~2^x) == y --> x == y || x == y|2^x
+        // This undoes a transformation done by instcombine to fuse 2 compares.
+        if (match(ICI->getOperand(0),
+                  m_And(m_Value(RHSVal), m_ConstantInt(RHSC)))) {
+          APInt Not = ~RHSC->getValue();
+          if (Not.isPowerOf2()) {
+            Vals.push_back(C);
+            Vals.push_back(
+                ConstantInt::get(C->getContext(), C->getValue() | Not));
+            UsedICmps++;
+            return RHSVal;
+          }
+        }
+
         UsedICmps++;
         Vals.push_back(C);
         return I->getOperand(0);
@@ -439,6 +374,13 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
       ConstantRange Span =
         ConstantRange::makeICmpRegion(ICI->getPredicate(), C->getValue());
 
+      // Shift the range if the compare is fed by an add. This is the range
+      // compare idiom as emitted by instcombine.
+      bool hasAdd =
+          match(I->getOperand(0), m_Add(m_Value(RHSVal), m_ConstantInt(RHSC)));
+      if (hasAdd)
+        Span = Span.subtract(RHSC->getValue());
+
       // If this is an and/!= check then we want to optimize "x ugt 2" into
       // x != 0 && x != 1.
       if (!isEQ)
@@ -451,7 +393,7 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
       for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
         Vals.push_back(ConstantInt::get(V->getContext(), Tmp));
       UsedICmps++;
-      return I->getOperand(0);
+      return hasAdd ? RHSVal : I->getOperand(0);
     }
     return 0;
   }
@@ -529,9 +471,7 @@ Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) {
   } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
     if (BI->isConditional() && BI->getCondition()->hasOneUse())
       if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition()))
-        if ((ICI->getPredicate() == ICmpInst::ICMP_EQ ||
-             ICI->getPredicate() == ICmpInst::ICMP_NE) &&
-            GetConstantInt(ICI->getOperand(1), TD))
+        if (ICI->isEquality() && GetConstantInt(ICI->getOperand(1), TD))
           CV = ICI->getOperand(0);
 
   // Unwrap any lossless ptrtoint cast.
@@ -1079,9 +1019,9 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) {
       (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
     return false;
 
-  // If we get here, we can hoist at least one instruction.
   BasicBlock *BIParent = BI->getParent();
 
+  bool Changed = false;
   do {
     // If we are hoisting the terminator instruction, don't move one (making a
     // broken BB), instead clone it, and remove BI.
@@ -1096,6 +1036,7 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) {
       I2->replaceAllUsesWith(I1);
     I1->intersectOptionalDataWith(I2);
     I2->eraseFromParent();
+    Changed = true;
 
     I1 = BB1_Itr++;
     I2 = BB2_Itr++;
@@ -1115,7 +1056,23 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) {
 HoistTerminator:
   // It may not be possible to hoist an invoke.
   if (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))
-    return true;
+    return Changed;
+
+  for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) {
+    PHINode *PN;
+    for (BasicBlock::iterator BBI = SI->begin();
+         (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
+      Value *BB1V = PN->getIncomingValueForBlock(BB1);
+      Value *BB2V = PN->getIncomingValueForBlock(BB2);
+      if (BB1V == BB2V)
+        continue;
+
+      if (isa<ConstantExpr>(BB1V) && !isSafeToSpeculativelyExecute(BB1V))
+        return Changed;
+      if (isa<ConstantExpr>(BB2V) && !isSafeToSpeculativelyExecute(BB2V))
+        return Changed;
+    }
+  }
 
   // Okay, it is safe to hoist the terminator.
   Instruction *NT = I1->clone();
@@ -1332,6 +1289,66 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
   return Changed;
 }
 
+/// \brief Determine if we can hoist sink a sole store instruction out of a
+/// conditional block.
+///
+/// We are looking for code like the following:
+///   BrBB:
+///     store i32 %add, i32* %arrayidx2
+///     ... // No other stores or function calls (we could be calling a memory
+///     ... // function).
+///     %cmp = icmp ult %x, %y
+///     br i1 %cmp, label %EndBB, label %ThenBB
+///   ThenBB:
+///     store i32 %add5, i32* %arrayidx2
+///     br label EndBB
+///   EndBB:
+///     ...
+///   We are going to transform this into:
+///   BrBB:
+///     store i32 %add, i32* %arrayidx2
+///     ... //
+///     %cmp = icmp ult %x, %y
+///     %add.add5 = select i1 %cmp, i32 %add, %add5
+///     store i32 %add.add5, i32* %arrayidx2
+///     ...
+///
+/// \return The pointer to the value of the previous store if the store can be
+///         hoisted into the predecessor block. 0 otherwise.
+static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
+                                     BasicBlock *StoreBB, BasicBlock *EndBB) {
+  StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
+  if (!StoreToHoist)
+    return 0;
+
+  // Volatile or atomic.
+  if (!StoreToHoist->isSimple())
+    return 0;
+
+  Value *StorePtr = StoreToHoist->getPointerOperand();
+
+  // Look for a store to the same pointer in BrBB.
+  unsigned MaxNumInstToLookAt = 10;
+  for (BasicBlock::reverse_iterator RI = BrBB->rbegin(),
+       RE = BrBB->rend(); RI != RE && (--MaxNumInstToLookAt); ++RI) {
+    Instruction *CurI = &*RI;
+
+    // Could be calling an instruction that effects memory like free().
+    if (CurI->mayHaveSideEffects() && !isa<StoreInst>(CurI))
+      return 0;
+
+    StoreInst *SI = dyn_cast<StoreInst>(CurI);
+    // Found the previous store make sure it stores to the same location.
+    if (SI && SI->getPointerOperand() == StorePtr)
+      // Found the previous store, return its value operand.
+      return SI->getValueOperand();
+    else if (SI)
+      return 0; // Unknown store.
+  }
+
+  return 0;
+}
+
 /// \brief Speculate a conditional basic block flattening the CFG.
 ///
 /// Note that this is a very risky transform currently. Speculating
@@ -1395,6 +1412,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
   SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
 
   unsigned SpeculationCost = 0;
+  Value *SpeculatedStoreValue = 0;
+  StoreInst *SpeculatedStore = 0;
   for (BasicBlock::iterator BBI = ThenBB->begin(),
                             BBE = llvm::prior(ThenBB->end());
        BBI != BBE; ++BBI) {
@@ -1410,13 +1429,21 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
       return false;
 
     // Don't hoist the instruction if it's unsafe or expensive.
-    if (!isSafeToSpeculativelyExecute(I))
+    if (!isSafeToSpeculativelyExecute(I) &&
+        !(HoistCondStores &&
+          (SpeculatedStoreValue = isSafeToSpeculateStore(I, BB, ThenBB,
+                                                         EndBB))))
       return false;
-    if (ComputeSpeculationCost(I) > PHINodeFoldingThreshold)
+    if (!SpeculatedStoreValue &&
+        ComputeSpeculationCost(I) > PHINodeFoldingThreshold)
       return false;
 
+    // Store the store speculation candidate.
+    if (SpeculatedStoreValue)
+      SpeculatedStore = cast<StoreInst>(I);
+
     // Do not hoist the instruction if any of its operands are defined but not
-    // used in this BB. The transformation will prevent the operand from
+    // used in BB. The transformation will prevent the operand from
     // being sunk into the use block.
     for (User::op_iterator i = I->op_begin(), e = I->op_end();
          i != e; ++i) {
@@ -1448,18 +1475,23 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
     Value *OrigV = PN->getIncomingValueForBlock(BB);
     Value *ThenV = PN->getIncomingValueForBlock(ThenBB);
 
+    // FIXME: Try to remove some of the duplication with HoistThenElseCodeToIf.
     // Skip PHIs which are trivial.
     if (ThenV == OrigV)
       continue;
 
     HaveRewritablePHIs = true;
-    ConstantExpr *CE = dyn_cast<ConstantExpr>(ThenV);
-    if (!CE)
+    ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
+    ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
+    if (!OrigCE && !ThenCE)
       continue; // Known safe and cheap.
 
-    if (!isSafeToSpeculativelyExecute(CE))
+    if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) ||
+        (OrigCE && !isSafeToSpeculativelyExecute(OrigCE)))
       return false;
-    if (ComputeSpeculationCost(CE) > PHINodeFoldingThreshold)
+    unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE) : 0;
+    unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE) : 0;
+    if (OrigCost + ThenCost > 2 * PHINodeFoldingThreshold)
       return false;
 
     // Account for the cost of an unfolded ConstantExpr which could end up
@@ -1473,12 +1505,24 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
 
   // If there are no PHIs to process, bail early. This helps ensure idempotence
   // as well.
-  if (!HaveRewritablePHIs)
+  if (!HaveRewritablePHIs && !(HoistCondStores && SpeculatedStoreValue))
     return false;
 
   // If we get here, we can hoist the instruction and if-convert.
   DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
 
+  // Insert a select of the value of the speculated store.
+  if (SpeculatedStoreValue) {
+    IRBuilder<true, NoFolder> Builder(BI);
+    Value *TrueV = SpeculatedStore->getValueOperand();
+    Value *FalseV = SpeculatedStoreValue;
+    if (Invert)
+      std::swap(TrueV, FalseV);
+    Value *S = Builder.CreateSelect(BrCond, TrueV, FalseV, TrueV->getName() +
+                                    "." + FalseV->getName());
+    SpeculatedStore->setOperand(0, S);
+  }
+
   // Hoist the instructions.
   BB->getInstList().splice(BI, ThenBB->getInstList(), ThenBB->begin(),
                            llvm::prior(ThenBB->end()));
@@ -3073,7 +3117,12 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
   Value *Sub = SI->getCondition();
   if (!Offset->isNullValue())
     Sub = Builder.CreateAdd(Sub, Offset, Sub->getName()+".off");
-  Value *Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
+  Value *Cmp;
+  // If NumCases overflowed, then all possible values jump to the successor.
+  if (NumCases->isNullValue() && SI->getNumCases() != 0)
+    Cmp = ConstantInt::getTrue(SI->getContext());
+  else
+    Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
   BranchInst *NewBI = Builder.CreateCondBr(
       Cmp, SI->case_begin().getCaseSuccessor(), SI->getDefaultDest());
 
@@ -3216,7 +3265,7 @@ static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
   for (ForwardingNodesMap::iterator I = ForwardingNodes.begin(),
        E = ForwardingNodes.end(); I != E; ++I) {
     PHINode *Phi = I->first;
-    SmallVector<int,4> &Indexes = I->second;
+    SmallVectorImpl<int> &Indexes = I->second;
 
     if (Indexes.size() < 2) continue;
 
@@ -3301,11 +3350,12 @@ static Constant *ConstantFold(Instruction *I,
 /// at the common destination basic block, *CommonDest, for one of the case
 /// destionations CaseDest corresponding to value CaseVal (0 for the default
 /// case), of a switch instruction SI.
-static bool GetCaseResults(SwitchInst *SI,
-                           ConstantInt *CaseVal,
-                           BasicBlock *CaseDest,
-                           BasicBlock **CommonDest,
-                           SmallVector<std::pair<PHINode*,Constant*>, 4> &Res) {
+static bool
+GetCaseResults(SwitchInst *SI,
+               ConstantInt *CaseVal,
+               BasicBlock *CaseDest,
+               BasicBlock **CommonDest,
+               SmallVectorImpl<std::pair<PHINode*,Constant*> > &Res) {
   // The block from which we enter the common destination.
   BasicBlock *Pred = SI->getParent();
 
@@ -3378,7 +3428,7 @@ namespace {
     SwitchLookupTable(Module &M,
                       uint64_t TableSize,
                       ConstantInt *Offset,
-               const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values,
+             const SmallVectorImpl<std::pair<ConstantInt*, Constant*> >& Values,
                       Constant *DefaultValue,
                       const DataLayout *TD);
 
@@ -3425,7 +3475,7 @@ namespace {
 SwitchLookupTable::SwitchLookupTable(Module &M,
                                      uint64_t TableSize,
                                      ConstantInt *Offset,
-               const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values,
+             const SmallVectorImpl<std::pair<ConstantInt*, Constant*> >& Values,
                                      Constant *DefaultValue,
                                      const DataLayout *TD)
     : SingleValue(0), BitMap(0), BitMapElementTy(0), Array(0) {
@@ -3552,7 +3602,7 @@ bool SwitchLookupTable::WouldFitInRegister(const DataLayout *TD,
 }
 
 /// ShouldBuildLookupTable - Determine whether a lookup table should be built
-/// for this switch, based on the number of caes, size of the table and the
+/// for this switch, based on the number of cases, size of the table and the
 /// types of the results.
 static bool ShouldBuildLookupTable(SwitchInst *SI,
                                    uint64_t TableSize,
diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp
index 41c207c..bf3442a 100644
--- a/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -119,7 +119,7 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand)
         return 0;
 
       D = ConstantInt::get(UseInst->getContext(),
-                           APInt(BitWidth, 1).shl(D->getZExtValue()));
+                           APInt::getOneBitSet(BitWidth, D->getZExtValue()));
     }
     FoldedExpr = SE->getUDivExpr(SE->getSCEV(IVSrc), SE->getSCEV(D));
   }
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index c231704..094c201 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -1518,6 +1518,12 @@ struct FPrintFOpt : public LibCallOptimization {
     if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
       return 0;
 
+    // Do not do any of the following transformations if the fprintf return
+    // value is used, in general the fprintf return value is not compatible
+    // with fwrite(), fputc() or fputs().
+    if (!CI->use_empty())
+      return 0;
+
     // fprintf(F, "foo") --> fwrite("foo", 3, 1, F)
     if (CI->getNumArgOperands() == 2) {
       for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
@@ -1527,11 +1533,10 @@ struct FPrintFOpt : public LibCallOptimization {
       // These optimizations require DataLayout.
       if (!TD) return 0;
 
-      Value *NewCI = EmitFWrite(CI->getArgOperand(1),
-                                ConstantInt::get(TD->getIntPtrType(*Context),
-                                                 FormatStr.size()),
-                                CI->getArgOperand(0), B, TD, TLI);
-      return NewCI ? ConstantInt::get(CI->getType(), FormatStr.size()) : 0;
+      return EmitFWrite(CI->getArgOperand(1),
+                        ConstantInt::get(TD->getIntPtrType(*Context),
+                                         FormatStr.size()),
+                        CI->getArgOperand(0), B, TD, TLI);
     }
 
     // The remaining optimizations require the format string to be "%s" or "%c"
@@ -1544,14 +1549,12 @@ struct FPrintFOpt : public LibCallOptimization {
     if (FormatStr[1] == 'c') {
       // fprintf(F, "%c", chr) --> fputc(chr, F)
       if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
-      Value *NewCI = EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B,
-                               TD, TLI);
-      return NewCI ? ConstantInt::get(CI->getType(), 1) : 0;
+      return EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI);
     }
 
     if (FormatStr[1] == 's') {
       // fprintf(F, "%s", str) --> fputs(str, F)
-      if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty())
+      if (!CI->getArgOperand(2)->getType()->isPointerTy())
         return 0;
       return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI);
     }
@@ -1937,7 +1940,7 @@ LibCallSimplifier::~LibCallSimplifier() {
 }
 
 Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
-  if (CI->hasFnAttr(Attribute::NoBuiltin)) return 0;
+  if (CI->isNoBuiltin()) return 0;
   return Impl->optimizeCall(CI);
 }
 
@@ -1947,3 +1950,53 @@ void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) const {
 }
 
 }
+
+// TODO:
+//   Additional cases that we need to add to this file:
+//
+// cbrt:
+//   * cbrt(expN(X))  -> expN(x/3)
+//   * cbrt(sqrt(x))  -> pow(x,1/6)
+//   * cbrt(sqrt(x))  -> pow(x,1/9)
+//
+// exp, expf, expl:
+//   * exp(log(x))  -> x
+//
+// log, logf, logl:
+//   * log(exp(x))   -> x
+//   * log(x**y)     -> y*log(x)
+//   * log(exp(y))   -> y*log(e)
+//   * log(exp2(y))  -> y*log(2)
+//   * log(exp10(y)) -> y*log(10)
+//   * log(sqrt(x))  -> 0.5*log(x)
+//   * log(pow(x,y)) -> y*log(x)
+//
+// lround, lroundf, lroundl:
+//   * lround(cnst) -> cnst'
+//
+// pow, powf, powl:
+//   * pow(exp(x),y)  -> exp(x*y)
+//   * pow(sqrt(x),y) -> pow(x,y*0.5)
+//   * pow(pow(x,y),z)-> pow(x,y*z)
+//
+// round, roundf, roundl:
+//   * round(cnst) -> cnst'
+//
+// signbit:
+//   * signbit(cnst) -> cnst'
+//   * signbit(nncst) -> 0 (if pstv is a non-negative constant)
+//
+// sqrt, sqrtf, sqrtl:
+//   * sqrt(expN(x))  -> expN(x*0.5)
+//   * sqrt(Nroot(x)) -> pow(x,1/(2*N))
+//   * sqrt(pow(x,y)) -> pow(|x|,y*0.5)
+//
+// strchr:
+//   * strchr(p, 0) -> strlen(p)
+// tan, tanf, tanl:
+//   * tan(atan(x)) -> x
+//
+// trunc, truncf, truncl:
+//   * trunc(cnst) -> cnst'
+//
+//
diff --git a/lib/Transforms/Utils/SpecialCaseList.cpp b/lib/Transforms/Utils/SpecialCaseList.cpp
new file mode 100644
index 0000000..b98cb5b
--- /dev/null
+++ b/lib/Transforms/Utils/SpecialCaseList.cpp
@@ -0,0 +1,225 @@
+//===-- SpecialCaseList.cpp - special case list for sanitizers ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a utility class for instrumentation passes (like AddressSanitizer
+// or ThreadSanitizer) to avoid instrumenting some functions or global
+// variables, or to instrument some functions or global variables in a specific
+// way, based on a user-supplied list.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/SpecialCaseList.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+#include <string>
+#include <utility>
+
+namespace llvm {
+
+/// Represents a set of regular expressions.  Regular expressions which are
+/// "literal" (i.e. no regex metacharacters) are stored in Strings, while all
+/// others are represented as a single pipe-separated regex in RegEx.  The
+/// reason for doing so is efficiency; StringSet is much faster at matching
+/// literal strings than Regex.
+struct SpecialCaseList::Entry {
+  StringSet<> Strings;
+  Regex *RegEx;
+
+  Entry() : RegEx(0) {}
+
+  bool match(StringRef Query) const {
+    return Strings.count(Query) || (RegEx && RegEx->match(Query));
+  }
+};
+
+SpecialCaseList::SpecialCaseList(const StringRef Path) {
+  // Validate and open blacklist file.
+  if (Path.empty()) return;
+  OwningPtr<MemoryBuffer> File;
+  if (error_code EC = MemoryBuffer::getFile(Path, File)) {
+    report_fatal_error("Can't open blacklist file: " + Path + ": " +
+                       EC.message());
+  }
+
+  init(File.get());
+}
+
+SpecialCaseList::SpecialCaseList(const MemoryBuffer *MB) {
+  init(MB);
+}
+
+void SpecialCaseList::init(const MemoryBuffer *MB) {
+  // Iterate through each line in the blacklist file.
+  SmallVector<StringRef, 16> Lines;
+  SplitString(MB->getBuffer(), Lines, "\n\r");
+  StringMap<StringMap<std::string> > Regexps;
+  for (SmallVectorImpl<StringRef>::iterator I = Lines.begin(), E = Lines.end();
+       I != E; ++I) {
+    // Ignore empty lines and lines starting with "#"
+    if (I->empty() || I->startswith("#"))
+      continue;
+    // Get our prefix and unparsed regexp.
+    std::pair<StringRef, StringRef> SplitLine = I->split(":");
+    StringRef Prefix = SplitLine.first;
+    if (SplitLine.second.empty()) {
+      // Missing ':' in the line.
+      report_fatal_error("malformed blacklist line: " + SplitLine.first);
+    }
+
+    std::pair<StringRef, StringRef> SplitRegexp = SplitLine.second.split("=");
+    std::string Regexp = SplitRegexp.first;
+    StringRef Category = SplitRegexp.second;
+
+    // Backwards compatibility.
+    if (Prefix == "global-init") {
+      Prefix = "global";
+      Category = "init";
+    } else if (Prefix == "global-init-type") {
+      Prefix = "type";
+      Category = "init";
+    } else if (Prefix == "global-init-src") {
+      Prefix = "src";
+      Category = "init";
+    }
+
+    // See if we can store Regexp in Strings.
+    if (Regex::isLiteralERE(Regexp)) {
+      Entries[Prefix][Category].Strings.insert(Regexp);
+      continue;
+    }
+
+    // Replace * with .*
+    for (size_t pos = 0; (pos = Regexp.find("*", pos)) != std::string::npos;
+         pos += strlen(".*")) {
+      Regexp.replace(pos, strlen("*"), ".*");
+    }
+
+    // Check that the regexp is valid.
+    Regex CheckRE(Regexp);
+    std::string Error;
+    if (!CheckRE.isValid(Error)) {
+      report_fatal_error("malformed blacklist regex: " + SplitLine.second +
+          ": " + Error);
+    }
+
+    // Add this regexp into the proper group by its prefix.
+    if (!Regexps[Prefix][Category].empty())
+      Regexps[Prefix][Category] += "|";
+    Regexps[Prefix][Category] += "^" + Regexp + "$";
+  }
+
+  // Iterate through each of the prefixes, and create Regexs for them.
+  for (StringMap<StringMap<std::string> >::const_iterator I = Regexps.begin(),
+                                                          E = Regexps.end();
+       I != E; ++I) {
+    for (StringMap<std::string>::const_iterator II = I->second.begin(),
+                                                IE = I->second.end();
+         II != IE; ++II) {
+      Entries[I->getKey()][II->getKey()].RegEx = new Regex(II->getValue());
+    }
+  }
+}
+
+SpecialCaseList::~SpecialCaseList() {
+  for (StringMap<StringMap<Entry> >::iterator I = Entries.begin(),
+                                              E = Entries.end();
+       I != E; ++I) {
+    for (StringMap<Entry>::const_iterator II = I->second.begin(),
+                                          IE = I->second.end();
+         II != IE; ++II) {
+      delete II->second.RegEx;
+    }
+  }
+}
+
+bool SpecialCaseList::findCategory(const Function &F,
+                                   StringRef &Category) const {
+  return findCategory(*F.getParent(), Category) ||
+         findCategory("fun", F.getName(), Category);
+}
+
+bool SpecialCaseList::isIn(const Function& F, const StringRef Category) const {
+  return isIn(*F.getParent(), Category) ||
+         inSectionCategory("fun", F.getName(), Category);
+}
+
+static StringRef GetGVTypeString(const GlobalVariable &G) {
+  // Types of GlobalVariables are always pointer types.
+  Type *GType = G.getType()->getElementType();
+  // For now we support blacklisting struct types only.
+  if (StructType *SGType = dyn_cast<StructType>(GType)) {
+    if (!SGType->isLiteral())
+      return SGType->getName();
+  }
+  return "<unknown type>";
+}
+
+bool SpecialCaseList::findCategory(const GlobalVariable &G,
+                                   StringRef &Category) const {
+  return findCategory(*G.getParent(), Category) ||
+         findCategory("global", G.getName(), Category) ||
+         findCategory("type", GetGVTypeString(G), Category);
+}
+
+bool SpecialCaseList::isIn(const GlobalVariable &G,
+                           const StringRef Category) const {
+  return isIn(*G.getParent(), Category) ||
+         inSectionCategory("global", G.getName(), Category) ||
+         inSectionCategory("type", GetGVTypeString(G), Category);
+}
+
+bool SpecialCaseList::findCategory(const Module &M, StringRef &Category) const {
+  return findCategory("src", M.getModuleIdentifier(), Category);
+}
+
+bool SpecialCaseList::isIn(const Module &M, const StringRef Category) const {
+  return inSectionCategory("src", M.getModuleIdentifier(), Category);
+}
+
+bool SpecialCaseList::findCategory(const StringRef Section,
+                                   const StringRef Query,
+                                   StringRef &Category) const {
+  StringMap<StringMap<Entry> >::const_iterator I = Entries.find(Section);
+  if (I == Entries.end()) return false;
+
+  for (StringMap<Entry>::const_iterator II = I->second.begin(),
+                                        IE = I->second.end();
+       II != IE; ++II) {
+    if (II->getValue().match(Query)) {
+      Category = II->first();
+      return true;
+    }
+  }
+
+  return false;
+}
+
+bool SpecialCaseList::inSectionCategory(const StringRef Section,
+                                        const StringRef Query,
+                                        const StringRef Category) const {
+  StringMap<StringMap<Entry> >::const_iterator I = Entries.find(Section);
+  if (I == Entries.end()) return false;
+  StringMap<Entry>::const_iterator II = I->second.find(Category);
+  if (II == I->second.end()) return false;
+
+  return II->getValue().match(Query);
+}
+
+}  // namespace llvm
diff --git a/lib/Transforms/Utils/Utils.cpp b/lib/Transforms/Utils/Utils.cpp
index 5812d46..c3df215 100644
--- a/lib/Transforms/Utils/Utils.cpp
+++ b/lib/Transforms/Utils/Utils.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/InitializePasses.h"
+#include "llvm/PassRegistry.h"
 #include "llvm-c/Initialization.h"
 
 using namespace llvm;
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index b5941bd..457fc80 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -22,14 +22,22 @@ using namespace llvm;
 
 // Out of line method to get vtable etc for class.
 void ValueMapTypeRemapper::anchor() {}
+void ValueMaterializer::anchor() {}
 
 Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
-                      ValueMapTypeRemapper *TypeMapper) {
+                      ValueMapTypeRemapper *TypeMapper,
+                      ValueMaterializer *Materializer) {
   ValueToValueMapTy::iterator I = VM.find(V);
   
   // If the value already exists in the map, use it.
   if (I != VM.end() && I->second) return I->second;
   
+  // If we have a materializer and it can materialize a value, use that.
+  if (Materializer) {
+    if (Value *NewV = Materializer->materializeValueFor(const_cast<Value*>(V)))
+      return VM[V] = NewV;
+  }
+
   // Global values do not need to be seeded into the VM if they
   // are using the identity mapping.
   if (isa<GlobalValue>(V) || isa<MDString>(V))
@@ -57,14 +65,14 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
       return VM[V] = const_cast<Value*>(V);
     
     // Create a dummy node in case we have a metadata cycle.
-    MDNode *Dummy = MDNode::getTemporary(V->getContext(), ArrayRef<Value*>());
+    MDNode *Dummy = MDNode::getTemporary(V->getContext(), None);
     VM[V] = Dummy;
     
     // Check all operands to see if any need to be remapped.
     for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) {
       Value *OP = MD->getOperand(i);
       if (OP == 0) continue;
-      Value *Mapped_OP = MapValue(OP, VM, Flags, TypeMapper);
+      Value *Mapped_OP = MapValue(OP, VM, Flags, TypeMapper, Materializer);
       // Use identity map if Mapped_Op is null and we can ignore missing
       // entries.
       if (Mapped_OP == OP ||
@@ -79,7 +87,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
         if (Op == 0)
           Elts.push_back(0);
         else {
-          Value *Mapped_Op = MapValue(Op, VM, Flags, TypeMapper);
+          Value *Mapped_Op = MapValue(Op, VM, Flags, TypeMapper, Materializer);
           // Use identity map if Mapped_Op is null and we can ignore missing
           // entries.
           if (Mapped_Op == 0 && (Flags & RF_IgnoreMissingEntries))
@@ -109,9 +117,9 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
   
   if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) {
     Function *F = 
-      cast<Function>(MapValue(BA->getFunction(), VM, Flags, TypeMapper));
+      cast<Function>(MapValue(BA->getFunction(), VM, Flags, TypeMapper, Materializer));
     BasicBlock *BB = cast_or_null<BasicBlock>(MapValue(BA->getBasicBlock(), VM,
-                                                       Flags, TypeMapper));
+                                                       Flags, TypeMapper, Materializer));
     return VM[V] = BlockAddress::get(F, BB ? BB : BA->getBasicBlock());
   }
   
@@ -121,7 +129,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
   Value *Mapped = 0;
   for (; OpNo != NumOperands; ++OpNo) {
     Value *Op = C->getOperand(OpNo);
-    Mapped = MapValue(Op, VM, Flags, TypeMapper);
+    Mapped = MapValue(Op, VM, Flags, TypeMapper, Materializer);
     if (Mapped != C) break;
   }
   
@@ -149,7 +157,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
     // Map the rest of the operands that aren't processed yet.
     for (++OpNo; OpNo != NumOperands; ++OpNo)
       Ops.push_back(MapValue(cast<Constant>(C->getOperand(OpNo)), VM,
-                             Flags, TypeMapper));
+                             Flags, TypeMapper, Materializer));
   }
   
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
@@ -173,10 +181,11 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
 /// current values into those specified by VMap.
 ///
 void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
-                            RemapFlags Flags, ValueMapTypeRemapper *TypeMapper){
+                            RemapFlags Flags, ValueMapTypeRemapper *TypeMapper,
+                            ValueMaterializer *Materializer){
   // Remap operands.
   for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) {
-    Value *V = MapValue(*op, VMap, Flags, TypeMapper);
+    Value *V = MapValue(*op, VMap, Flags, TypeMapper, Materializer);
     // If we aren't ignoring missing entries, assert that something happened.
     if (V != 0)
       *op = V;
@@ -204,7 +213,7 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
   for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator
        MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI) {
     MDNode *Old = MI->second;
-    MDNode *New = MapValue(Old, VMap, Flags, TypeMapper);
+    MDNode *New = MapValue(Old, VMap, Flags, TypeMapper, Materializer);
     if (New != Old)
       I->setMetadata(MI->first, New);
   }