104 files changed, 7357 insertions, 4887 deletions
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index b520d8f..c23351b 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -16,6 +16,7 @@
 
 #define DEBUG_TYPE "post-RA-sched"
 #include "AggressiveAntiDepBreaker.h"
+#include "RegisterClassInfo.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -114,12 +115,13 @@ bool AggressiveAntiDepState::IsLive(unsigned Reg)
 
 AggressiveAntiDepBreaker::
 AggressiveAntiDepBreaker(MachineFunction& MFi,
+                         const RegisterClassInfo &RCI,
                          TargetSubtarget::RegClassVector& CriticalPathRCs) :
   AntiDepBreaker(), MF(MFi),
   MRI(MF.getRegInfo()),
   TII(MF.getTarget().getInstrInfo()),
   TRI(MF.getTarget().getRegisterInfo()),
-  AllocatableSet(TRI->getAllocatableSet(MF)),
+  RegClassInfo(RCI),
   State(NULL) {
   /* Collect a bitset of all registers that are only broken if they
      are on the critical path. */
@@ -357,7 +359,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
     RegRefs = State->GetRegRefs();
 
   // Handle dead defs by simulating a last-use of the register just
-  // after the def. A dead def can occur because the def is truely
+  // after the def. A dead def can occur because the def is truly
   // dead, or because only a subregister is live at the def. If we
   // don't do this the dead def will be incorrectly merged into the
   // previous def.
@@ -618,9 +620,8 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
   const TargetRegisterClass *SuperRC =
     TRI->getMinimalPhysRegClass(SuperReg, MVT::Other);
 
-  const TargetRegisterClass::iterator RB = SuperRC->allocation_order_begin(MF);
-  const TargetRegisterClass::iterator RE = SuperRC->allocation_order_end(MF);
-  if (RB == RE) {
+  ArrayRef<unsigned> Order = RegClassInfo.getOrder(SuperRC);
+  if (Order.empty()) {
     DEBUG(dbgs() << "\tEmpty Super Regclass!!\n");
     return false;
   }
@@ -628,17 +629,17 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
   DEBUG(dbgs() << "\tFind Registers:");
 
   if (RenameOrder.count(SuperRC) == 0)
-    RenameOrder.insert(RenameOrderType::value_type(SuperRC, RE));
+    RenameOrder.insert(RenameOrderType::value_type(SuperRC, Order.size()));
 
-  const TargetRegisterClass::iterator OrigR = RenameOrder[SuperRC];
-  const TargetRegisterClass::iterator EndR = ((OrigR == RE) ? RB : OrigR);
-  TargetRegisterClass::iterator R = OrigR;
+  unsigned OrigR = RenameOrder[SuperRC];
+  unsigned EndR = ((OrigR == Order.size()) ? 0 : OrigR);
+  unsigned R = OrigR;
   do {
-    if (R == RB) R = RE;
+    if (R == 0) R = Order.size();
     --R;
-    const unsigned NewSuperReg = *R;
+    const unsigned NewSuperReg = Order[R];
     // Don't consider non-allocatable registers
-    if (!AllocatableSet.test(NewSuperReg)) continue;
+    if (!RegClassInfo.isAllocatable(NewSuperReg)) continue;
     // Don't replace a register with itself.
     if (NewSuperReg == SuperReg) continue;
 
@@ -719,7 +720,9 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
                               const std::vector<SUnit>& SUnits,
                               MachineBasicBlock::iterator Begin,
                               MachineBasicBlock::iterator End,
-                              unsigned InsertPosIndex) {
+                              unsigned InsertPosIndex,
+                              DbgValueVector &DbgValues) {
+
   std::vector<unsigned> &KillIndices = State->GetKillIndices();
   std::vector<unsigned> &DefIndices = State->GetDefIndices();
   std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
@@ -817,7 +820,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
         DEBUG(dbgs() << "\tAntidep reg: " << TRI->getName(AntiDepReg));
         assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
 
-        if (!AllocatableSet.test(AntiDepReg)) {
+        if (!RegClassInfo.isAllocatable(AntiDepReg)) {
           // Don't break anti-dependencies on non-allocatable registers.
           DEBUG(dbgs() << " (non-allocatable)\n");
           continue;
@@ -923,14 +926,10 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
               // sure to update that as well.
               const SUnit *SU = MISUnitMap[Q->second.Operand->getParent()];
               if (!SU) continue;
-              for (unsigned i = 0, e = SU->DbgInstrList.size() ; i < e ; ++i) {
-                MachineInstr *DI = SU->DbgInstrList[i];
-                assert (DI->getNumOperands()==3 && DI->getOperand(0).isReg() &&
-                        DI->getOperand(0).getReg()
-                        && "Non register dbg_value attached to SUnit!");
-                if (DI->getOperand(0).getReg() == AntiDepReg)
-                  DI->getOperand(0).setReg(NewReg);
-              }
+              for (DbgValueVector::iterator DVI = DbgValues.begin(),
+                     DVE = DbgValues.end(); DVI != DVE; ++DVI)
+                if (DVI->second == Q->second.Operand->getParent())
+                  UpdateDbgValue(DVI->first, AntiDepReg, NewReg);
             }
 
             // We just went back in time and modified history; the
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h
index 9d715cc..e43fe65 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.h
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -30,6 +30,8 @@
 #include <map>
 
 namespace llvm {
+class RegisterClassInfo;
+
   /// Class AggressiveAntiDepState
   /// Contains all the state necessary for anti-dep breaking.
   class AggressiveAntiDepState {
@@ -117,11 +119,7 @@ namespace llvm {
     MachineRegisterInfo &MRI;
     const TargetInstrInfo *TII;
     const TargetRegisterInfo *TRI;
-
-    /// AllocatableSet - The set of allocatable registers.
-    /// We'll be ignoring anti-dependencies on non-allocatable registers,
-    /// because they may not be safe to break.
-    const BitVector AllocatableSet;
+    const RegisterClassInfo &RegClassInfo;
 
     /// CriticalPathSet - The set of registers that should only be
     /// renamed if they are on the critical path.
@@ -133,6 +131,7 @@ namespace llvm {
 
   public:
     AggressiveAntiDepBreaker(MachineFunction& MFi,
+                             const RegisterClassInfo &RCI,
                              TargetSubtarget::RegClassVector& CriticalPathRCs);
     ~AggressiveAntiDepBreaker();
 
@@ -146,7 +145,8 @@ namespace llvm {
     unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits,
                                    MachineBasicBlock::iterator Begin,
                                    MachineBasicBlock::iterator End,
-                                   unsigned InsertPosIndex);
+                                   unsigned InsertPosIndex,
+                                   DbgValueVector &DbgValues);
 
     /// Observe - Update liveness information to account for the current
     /// instruction, which will not be scheduled.
@@ -157,8 +157,8 @@ namespace llvm {
     void FinishBlock();
 
   private:
-    typedef std::map<const TargetRegisterClass *,
-                     TargetRegisterClass::const_iterator> RenameOrderType;
+    /// Keep track of a position in the allocation order for each regclass.
+    typedef std::map<const TargetRegisterClass *, unsigned> RenameOrderType;
 
     /// IsImplicitDefUse - Return true if MO represents a register
     /// that is both implicitly used and defined in MI
diff --git a/lib/CodeGen/AllocationOrder.cpp b/lib/CodeGen/AllocationOrder.cpp
index 20c7625..1005f10 100644
--- a/lib/CodeGen/AllocationOrder.cpp
+++ b/lib/CodeGen/AllocationOrder.cpp
@@ -15,6 +15,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "AllocationOrder.h"
+#include "RegisterClassInfo.h"
 #include "VirtRegMap.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 
@@ -23,8 +24,8 @@ using namespace llvm;
 // Compare VirtRegMap::getRegAllocPref().
 AllocationOrder::AllocationOrder(unsigned VirtReg,
                                  const VirtRegMap &VRM,
-                                 const BitVector &ReservedRegs)
-  : Pos(0), Reserved(ReservedRegs) {
+                                 const RegisterClassInfo &RegClassInfo)
+  : Begin(0), End(0), Pos(0), RCI(RegClassInfo), OwnedBegin(false) {
   const TargetRegisterClass *RC = VRM.getRegInfo().getRegClass(VirtReg);
   std::pair<unsigned, unsigned> HintPair =
     VRM.getRegInfo().getRegAllocationHint(VirtReg);
@@ -36,33 +37,43 @@ AllocationOrder::AllocationOrder(unsigned VirtReg,
   if (TargetRegisterInfo::isVirtualRegister(Hint))
     Hint = VRM.getPhys(Hint);
 
-  // The remaining allocation order may depend on the hint.
-  tie(Begin, End) = VRM.getTargetRegInfo()
-        .getAllocationOrder(RC, HintPair.first, Hint, VRM.getMachineFunction());
+  // The first hint pair component indicates a target-specific hint.
+  if (HintPair.first) {
+    const TargetRegisterInfo &TRI = VRM.getTargetRegInfo();
+    // The remaining allocation order may depend on the hint.
+    ArrayRef<unsigned> Order =
+      TRI.getRawAllocationOrder(RC, HintPair.first, Hint,
+                                VRM.getMachineFunction());
+    if (Order.empty())
+      return;
 
-  // Target-dependent hints require resolution.
-  if (HintPair.first)
-    Hint = VRM.getTargetRegInfo().ResolveRegAllocHint(HintPair.first, Hint,
-                                                      VRM.getMachineFunction());
+    // Copy the allocation order with reserved registers removed.
+    OwnedBegin = true;
+    unsigned *P = new unsigned[Order.size()];
+    Begin = P;
+    for (unsigned i = 0; i != Order.size(); ++i)
+      if (!RCI.isReserved(Order[i]))
+        *P++ = Order[i];
+    End = P;
+
+    // Target-dependent hints require resolution.
+    Hint = TRI.ResolveRegAllocHint(HintPair.first, Hint,
+                                   VRM.getMachineFunction());
+  } else {
+    // If there is no hint or just a normal hint, use the cached allocation
+    // order from RegisterClassInfo.
+    ArrayRef<unsigned> O = RCI.getOrder(RC);
+    Begin = O.begin();
+    End = O.end();
+  }
 
   // The hint must be a valid physreg for allocation.
   if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) ||
-               !RC->contains(Hint) || ReservedRegs.test(Hint)))
+               !RC->contains(Hint) || RCI.isReserved(Hint)))
     Hint = 0;
 }
 
-unsigned AllocationOrder::next() {
-  // First take the hint.
-  if (!Pos) {
-    Pos = Begin;
-    if (Hint)
-      return Hint;
-  }
-  // Then look at the order from TRI.
-  while(Pos != End) {
-    unsigned Reg = *Pos++;
-    if (Reg != Hint && !Reserved.test(Reg))
-      return Reg;
-  }
-  return 0;
+AllocationOrder::~AllocationOrder() {
+  if (OwnedBegin)
+    delete [] Begin;
 }
diff --git a/lib/CodeGen/AllocationOrder.h b/lib/CodeGen/AllocationOrder.h
index 61fd8f8..d1e48a1 100644
--- a/lib/CodeGen/AllocationOrder.h
+++ b/lib/CodeGen/AllocationOrder.h
@@ -19,15 +19,16 @@
 
 namespace llvm {
 
-class BitVector;
+class RegisterClassInfo;
 class VirtRegMap;
 
 class AllocationOrder {
   const unsigned *Begin;
   const unsigned *End;
   const unsigned *Pos;
-  const BitVector &Reserved;
+  const RegisterClassInfo &RCI;
   unsigned Hint;
+  bool OwnedBegin;
 public:
 
   /// AllocationOrder - Create a new AllocationOrder for VirtReg.
@@ -37,12 +38,28 @@ public:
   ///        TargetRegisterInfo::getReservedRegs().
   AllocationOrder(unsigned VirtReg,
                   const VirtRegMap &VRM,
-                  const BitVector &ReservedRegs);
+                  const RegisterClassInfo &RegClassInfo);
+
+  ~AllocationOrder();
 
   /// next - Return the next physical register in the allocation order, or 0.
   /// It is safe to call next again after it returned 0.
   /// It will keep returning 0 until rewind() is called.
-  unsigned next();
+  unsigned next() {
+    // First take the hint.
+    if (!Pos) {
+      Pos = Begin;
+      if (Hint)
+        return Hint;
+    }
+    // Then look at the order from TRI.
+    while (Pos != End) {
+      unsigned Reg = *Pos++;
+      if (Reg != Hint)
+        return Reg;
+    }
+    return 0;
+  }
 
   /// rewind - Start over from the beginning.
   void rewind() { Pos = 0; }
diff --git a/lib/CodeGen/AntiDepBreaker.h b/lib/CodeGen/AntiDepBreaker.h
index 086b757..df47f98 100644
--- a/lib/CodeGen/AntiDepBreaker.h
+++ b/lib/CodeGen/AntiDepBreaker.h
@@ -30,6 +30,9 @@ namespace llvm {
 /// anti-dependencies.
 class AntiDepBreaker {
 public:
+  typedef std::vector<std::pair<MachineInstr *, MachineInstr *> > 
+    DbgValueVector;
+
   virtual ~AntiDepBreaker();
 
   /// Start - Initialize anti-dep breaking for a new basic block.
@@ -40,9 +43,10 @@ public:
   /// the number of anti-dependencies broken.
   ///
   virtual unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits,
-                                MachineBasicBlock::iterator Begin,
-                                MachineBasicBlock::iterator End,
-                                unsigned InsertPosIndex) =0;
+                                         MachineBasicBlock::iterator Begin,
+                                         MachineBasicBlock::iterator End,
+                                         unsigned InsertPosIndex,
+                                         DbgValueVector &DbgValues) = 0;
   
   /// Observe - Update liveness information to account for the current
   /// instruction, which will not be scheduled.
@@ -52,6 +56,14 @@ public:
   
   /// Finish - Finish anti-dep breaking for a basic block.
   virtual void FinishBlock() =0;
+
+  /// UpdateDbgValue - Update DBG_VALUE if dependency breaker is updating
+  /// other machine instruction to use NewReg.
+  void UpdateDbgValue(MachineInstr *MI, unsigned OldReg, unsigned NewReg) {
+    assert (MI->isDebugValue() && "MI is not DBG_VALUE!");
+    if (MI && MI->getOperand(0).isReg() && MI->getOperand(0).getReg() == OldReg)
+      MI->getOperand(0).setReg(NewReg);
+  }
 };
 
 }
diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp
index 0db28a6..5861fa4 100644
--- a/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -52,7 +52,7 @@ void ARMException::EndModule() {
 /// being emitted immediately after the function entry point.
 void ARMException::BeginFunction(const MachineFunction *MF) {
   Asm->OutStreamer.EmitFnStart();
-  if (!Asm->MF->getFunction()->doesNotThrow() || UnwindTablesMandatory)
+  if (Asm->MF->getFunction()->needsUnwindTableEntry())
     Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
                                                   Asm->getFunctionNumber()));
 }
@@ -60,7 +60,7 @@ void ARMException::BeginFunction(const MachineFunction *MF) {
 /// EndFunction - Gather and emit post-function exception information.
 ///
 void ARMException::EndFunction() {
-  if (Asm->MF->getFunction()->doesNotThrow() && !UnwindTablesMandatory)
+  if (!Asm->MF->getFunction()->needsUnwindTableEntry())
     Asm->OutStreamer.EmitCantUnwind();
   else {
     Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 61f5672..b544ff1 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -35,10 +35,12 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetAsmInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/ADT/SmallString.h"
@@ -191,22 +193,25 @@ bool AsmPrinter::doInitialization(Module &M) {
   if (MAI->doesSupportDebugInformation())
     DD = new DwarfDebug(this, &M);
 
-  if (MAI->doesSupportExceptionHandling())
-    switch (MAI->getExceptionHandlingType()) {
-    default:
-    case ExceptionHandling::DwarfTable:
-      DE = new DwarfTableException(this);
-      break;
-    case ExceptionHandling::DwarfCFI:
-      DE = new DwarfCFIException(this);
-      break;
-    case ExceptionHandling::ARM:
-      DE = new ARMException(this);
-      break;
-    }
+  switch (MAI->getExceptionHandlingType()) {
+  case ExceptionHandling::None:
+    return false;
+  case ExceptionHandling::SjLj:
+  case ExceptionHandling::DwarfCFI:
+    DE = new DwarfCFIException(this);
+    return false;
+  case ExceptionHandling::ARM:
+    DE = new ARMException(this);
+    return false;
+  case ExceptionHandling::Win64:
+    DE = new Win64Exception(this);
+    return false;
+  }
+#else
+  return false;
 #endif // ANDROID_TARGET_BUILD
 
-  return false;
+  llvm_unreachable("Unknown exception type.");
 }
 
 void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const {
@@ -271,7 +276,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
   }
 
   MCSymbol *GVSym = Mang->getSymbol(GV);
-  EmitVisibility(GVSym, GV->getVisibility());
+  EmitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration());
 
   if (!GV->hasInitializer())   // External globals require no extra code.
     return;
@@ -293,12 +298,6 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
   if (GVKind.isCommon() || GVKind.isBSSLocal()) {
     if (Size == 0) Size = 1;   // .comm Foo, 0 is undefined, avoid it.
 
-    if (isVerbose()) {
-      WriteAsOperand(OutStreamer.GetCommentOS(), GV,
-                     /*PrintType=*/false, GV->getParent());
-      OutStreamer.GetCommentOS() << '\n';
-    }
-
     // Handle common symbols.
     if (GVKind.isCommon()) {
       unsigned Align = 1 << AlignLog;
@@ -492,39 +491,11 @@ void AsmPrinter::EmitFunctionEntryLabel() {
 }
 
 
-static void EmitDebugLoc(DebugLoc DL, const MachineFunction *MF,
-                         raw_ostream &CommentOS) {
-  const LLVMContext &Ctx = MF->getFunction()->getContext();
-  if (!DL.isUnknown()) {          // Print source line info.
-    DIScope Scope(DL.getScope(Ctx));
-    // Omit the directory, because it's likely to be long and uninteresting.
-    if (Scope.Verify())
-      CommentOS << Scope.getFilename();
-    else
-      CommentOS << "<unknown>";
-    CommentOS << ':' << DL.getLine();
-    if (DL.getCol() != 0)
-      CommentOS << ':' << DL.getCol();
-    DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(DL.getInlinedAt(Ctx));
-    if (!InlinedAtDL.isUnknown()) {
-      CommentOS << "[ ";
-      EmitDebugLoc(InlinedAtDL, MF, CommentOS);
-      CommentOS << " ]";
-    }
-  }
-}
-
 /// EmitComments - Pretty-print comments for instructions.
 static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
   const MachineFunction *MF = MI.getParent()->getParent();
   const TargetMachine &TM = MF->getTarget();
 
-  DebugLoc DL = MI.getDebugLoc();
-  if (!DL.isUnknown()) {          // Print source line info.
-    EmitDebugLoc(DL, MF, CommentOS);
-    CommentOS << '\n';
-  }
-
   // Check for spills and reloads
   int FI;
 
@@ -631,6 +602,45 @@ static bool EmitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
   return true;
 }
 
+AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() {
+  if (MAI->getExceptionHandlingType() == ExceptionHandling::DwarfCFI &&
+      MF->getFunction()->needsUnwindTableEntry())
+    return CFI_M_EH;
+
+  if (MMI->hasDebugInfo())
+    return CFI_M_Debug;
+
+  return CFI_M_None;
+}
+
+bool AsmPrinter::needsSEHMoves() {
+  return MAI->getExceptionHandlingType() == ExceptionHandling::Win64 &&
+    MF->getFunction()->needsUnwindTableEntry();
+}
+
+void AsmPrinter::emitPrologLabel(const MachineInstr &MI) {
+  MCSymbol *Label = MI.getOperand(0).getMCSymbol();
+
+  if (MAI->getExceptionHandlingType() != ExceptionHandling::DwarfCFI)
+    return;
+
+  if (needsCFIMoves() == CFI_M_None)
+    return;
+
+  MachineModuleInfo &MMI = MF->getMMI();
+  std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+  bool FoundOne = false;
+  (void)FoundOne;
+  for (std::vector<MachineMove>::iterator I = Moves.begin(),
+         E = Moves.end(); I != E; ++I) {
+    if (I->getLabel() == Label) {
+      EmitCFIFrameMove(*I);
+      FoundOne = true;
+    }
+  }
+  assert(FoundOne);
+}
+
 /// EmitFunctionBody - This method emits the body and trailer for a
 /// function.
 void AsmPrinter::EmitFunctionBody() {
@@ -669,6 +679,9 @@ void AsmPrinter::EmitFunctionBody() {
 
       switch (II->getOpcode()) {
       case TargetOpcode::PROLOG_LABEL:
+        emitPrologLabel(*II);
+        break;
+
       case TargetOpcode::EH_LABEL:
       case TargetOpcode::GC_LABEL:
         OutStreamer.EmitLabel(II->getOperand(0).getMCSymbol());
@@ -689,6 +702,9 @@ void AsmPrinter::EmitFunctionBody() {
         if (isVerbose()) EmitKill(II, *this);
         break;
       default:
+        if (!TM.hasMCUseLoc())
+          MCLineEntry::Make(&OutStreamer, getCurrentSection());
+
         EmitInstruction(II);
         break;
       }
@@ -767,6 +783,53 @@ getDebugValueLocation(const MachineInstr *MI) const {
   return MachineLocation();
 }
 
+/// EmitDwarfRegOp - Emit dwarf register operation.
+void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const {
+  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+  int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false);
+
+  for (const unsigned *SR = TRI->getSuperRegisters(MLoc.getReg());
+       *SR && Reg < 0; ++SR) {
+    Reg = TRI->getDwarfRegNum(*SR, false);
+    // FIXME: Get the bit range this register uses of the superregister
+    // so that we can produce a DW_OP_bit_piece
+  }
+
+  // FIXME: Handle cases like a super register being encoded as
+  // DW_OP_reg 32 DW_OP_piece 4 DW_OP_reg 33
+
+  // FIXME: We have no reasonable way of handling errors in here. The
+  // caller might be in the middle of an dwarf expression. We should
+  // probably assert that Reg >= 0 once debug info generation is more mature.
+
+  if (int Offset =  MLoc.getOffset()) {
+    if (Reg < 32) {
+      OutStreamer.AddComment(
+        dwarf::OperationEncodingString(dwarf::DW_OP_breg0 + Reg));
+      EmitInt8(dwarf::DW_OP_breg0 + Reg);
+    } else {
+      OutStreamer.AddComment("DW_OP_bregx");
+      EmitInt8(dwarf::DW_OP_bregx);
+      OutStreamer.AddComment(Twine(Reg));
+      EmitULEB128(Reg);
+    }
+    EmitSLEB128(Offset);
+  } else {
+    if (Reg < 32) {
+      OutStreamer.AddComment(
+        dwarf::OperationEncodingString(dwarf::DW_OP_reg0 + Reg));
+      EmitInt8(dwarf::DW_OP_reg0 + Reg);
+    } else {
+      OutStreamer.AddComment("DW_OP_regx");
+      EmitInt8(dwarf::DW_OP_regx);
+      OutStreamer.AddComment(Twine(Reg));
+      EmitULEB128(Reg);
+    }
+  }
+
+  // FIXME: Produce a DW_OP_bit_piece if we used a superregister
+}
+
 bool AsmPrinter::doFinalization(Module &M) {
   // Emit global variables.
   for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
@@ -1879,7 +1942,7 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
     return false;
 
   // The predecessor has to be immediately before this block.
-  const MachineBasicBlock *Pred = *PI;
+  MachineBasicBlock *Pred = *PI;
 
   if (!Pred->isLayoutSuccessor(MBB))
     return false;
@@ -1888,9 +1951,28 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
   if (Pred->empty())
     return true;
 
-  // Otherwise, check the last instruction.
-  const MachineInstr &LastInst = Pred->back();
-  return !LastInst.getDesc().isBarrier();
+  // Check the terminators in the previous blocks
+  for (MachineBasicBlock::iterator II = Pred->getFirstTerminator(),
+         IE = Pred->end(); II != IE; ++II) {
+    MachineInstr &MI = *II;
+
+    // If it is not a simple branch, we are in a table somewhere.
+    if (!MI.getDesc().isBranch() || MI.getDesc().isIndirectBranch())
+      return false;
+
+    // If we are the operands of one of the branches, this is not
+    // a fall through.
+    for (MachineInstr::mop_iterator OI = MI.operands_begin(),
+           OE = MI.operands_end(); OI != OE; ++OI) {
+      const MachineOperand& OP = *OI;
+      if (OP.isJTI())
+        return false;
+      if (OP.isMBB() && OP.getMBB() == MBB)
+        return false;
+    }
+  }
+
+  return true;
 }
 
 
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index 9c8184a..dd5b0e2 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -155,7 +155,7 @@ void AsmPrinter::EmitReference(const MCSymbol *Sym, unsigned Encoding) const {
   const TargetLoweringObjectFile &TLOF = getObjFileLowering();
   
   const MCExpr *Exp =
-    TLOF.getExprForDwarfReference(Sym, Mang, MMI, Encoding, OutStreamer);
+    TLOF.getExprForDwarfReference(Sym, Encoding, OutStreamer);
   OutStreamer.EmitAbsValue(Exp, GetSizeOfEncodedValue(Encoding));
 }
 
@@ -206,108 +206,28 @@ void AsmPrinter::EmitSectionOffset(const MCSymbol *Label,
 // Dwarf Lowering Routines
 //===----------------------------------------------------------------------===//
 
-
-/// EmitFrameMoves - Emit frame instructions to describe the layout of the
-/// frame.
-void AsmPrinter::EmitFrameMoves(const std::vector<MachineMove> &Moves,
-                                MCSymbol *BaseLabel, bool isEH) const {
-  const TargetRegisterInfo *RI = TM.getRegisterInfo();
-  
-  int stackGrowth = TM.getTargetData()->getPointerSize();
-  if (TM.getFrameLowering()->getStackGrowthDirection() !=
-      TargetFrameLowering::StackGrowsUp)
-    stackGrowth *= -1;
-  
-  for (unsigned i = 0, N = Moves.size(); i < N; ++i) {
-    const MachineMove &Move = Moves[i];
-    MCSymbol *Label = Move.getLabel();
-    // Throw out move if the label is invalid.
-    if (Label && !Label->isDefined()) continue; // Not emitted, in dead code.
-    
-    const MachineLocation &Dst = Move.getDestination();
-    const MachineLocation &Src = Move.getSource();
-    
-    // Advance row if new location.
-    if (BaseLabel && Label) {
-      MCSymbol *ThisSym = Label;
-      if (ThisSym != BaseLabel) {
-        EmitCFAByte(dwarf::DW_CFA_advance_loc4);
-        EmitLabelDifference(ThisSym, BaseLabel, 4);
-        BaseLabel = ThisSym;
-      }
-    }
-    
-    // If advancing cfa.
-    if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
-      assert(!Src.isReg() && "Machine move not supported yet.");
-      
-      if (Src.getReg() == MachineLocation::VirtualFP) {
-        EmitCFAByte(dwarf::DW_CFA_def_cfa_offset);
-      } else {
-        EmitCFAByte(dwarf::DW_CFA_def_cfa);
-        EmitULEB128(RI->getDwarfRegNum(Src.getReg(), isEH), "Register");
-      }
-      
-      EmitULEB128(-Src.getOffset(), "Offset");
-      continue;
-    }
-    
-    if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) {
-      assert(Dst.isReg() && "Machine move not supported yet.");
-      EmitCFAByte(dwarf::DW_CFA_def_cfa_register);
-      EmitULEB128(RI->getDwarfRegNum(Dst.getReg(), isEH), "Register");
-      continue;
-    }
-    
-    unsigned Reg = RI->getDwarfRegNum(Src.getReg(), isEH);
-    int Offset = Dst.getOffset() / stackGrowth;
-    
-    if (Offset < 0) {
-      EmitCFAByte(dwarf::DW_CFA_offset_extended_sf);
-      EmitULEB128(Reg, "Reg");
-      EmitSLEB128(Offset, "Offset");
-    } else if (Reg < 64) {
-      EmitCFAByte(dwarf::DW_CFA_offset + Reg);
-      EmitULEB128(Offset, "Offset");
-    } else {
-      EmitCFAByte(dwarf::DW_CFA_offset_extended);
-      EmitULEB128(Reg, "Reg");
-      EmitULEB128(Offset, "Offset");
-    }
-  }
-}
-
-/// EmitFrameMoves - Emit frame instructions to describe the layout of the
-/// frame.
-void AsmPrinter::EmitCFIFrameMoves(const std::vector<MachineMove> &Moves) const {
+/// EmitCFIFrameMove - Emit a frame instruction.
+void AsmPrinter::EmitCFIFrameMove(const MachineMove &Move) const {
   const TargetRegisterInfo *RI = TM.getRegisterInfo();
 
-  for (unsigned i = 0, N = Moves.size(); i < N; ++i) {
-    const MachineMove &Move = Moves[i];
-    MCSymbol *Label = Move.getLabel();
-    // Throw out move if the label is invalid.
-    if (Label && !Label->isDefined()) continue; // Not emitted, in dead code.
-
-    const MachineLocation &Dst = Move.getDestination();
-    const MachineLocation &Src = Move.getSource();
-
-    // If advancing cfa.
-    if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
-      assert(!Src.isReg() && "Machine move not supported yet.");
+  const MachineLocation &Dst = Move.getDestination();
+  const MachineLocation &Src = Move.getSource();
 
-      if (Src.getReg() == MachineLocation::VirtualFP) {
-        OutStreamer.EmitCFIDefCfaOffset(-Src.getOffset());
-      } else {
-        assert("Machine move not supported yet");
-        // Reg + Offset
-      }
-    } else if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) {
-      assert(Dst.isReg() && "Machine move not supported yet.");
-      OutStreamer.EmitCFIDefCfaRegister(RI->getDwarfRegNum(Dst.getReg(), true));
+  // If advancing cfa.
+  if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
+    if (Src.getReg() == MachineLocation::VirtualFP) {
+      OutStreamer.EmitCFIDefCfaOffset(-Src.getOffset());
     } else {
-      assert(!Dst.isReg() && "Machine move not supported yet.");
-      OutStreamer.EmitCFIOffset(RI->getDwarfRegNum(Src.getReg(), true),
-                                Dst.getOffset());
+      // Reg + Offset
+      OutStreamer.EmitCFIDefCfa(RI->getDwarfRegNum(Src.getReg(), true),
+                                Src.getOffset());
     }
+  } else if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) {
+    assert(Dst.isReg() && "Machine move not supported yet.");
+    OutStreamer.EmitCFIDefCfaRegister(RI->getDwarfRegNum(Dst.getReg(), true));
+  } else {
+    assert(!Dst.isReg() && "Machine move not supported yet.");
+    OutStreamer.EmitCFIOffset(RI->getDwarfRegNum(Src.getReg(), true),
+                              Dst.getOffset());
   }
 }
diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt
index 1377e4d..4da7876 100644
--- a/lib/CodeGen/AsmPrinter/CMakeLists.txt
+++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt
@@ -5,9 +5,10 @@ add_llvm_library(LLVMAsmPrinter
   AsmPrinterInlineAsm.cpp
   DIE.cpp
   DwarfCFIException.cpp
+  DwarfCompileUnit.cpp
   DwarfDebug.cpp
   DwarfException.cpp
-  DwarfTableException.cpp
   OcamlGCPrinter.cpp
+  Win64Exception.cpp
   )
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index 68be2ee..91b7d08 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -32,6 +32,7 @@
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
@@ -40,92 +41,105 @@ using namespace llvm;
 
 DwarfCFIException::DwarfCFIException(AsmPrinter *A)
   : DwarfException(A),
-    shouldEmitTable(false), shouldEmitMoves(false), shouldEmitTableModule(false)
-    {}
+    shouldEmitPersonality(false), shouldEmitLSDA(false), shouldEmitMoves(false),
+    moveTypeModule(AsmPrinter::CFI_M_None) {}
 
 DwarfCFIException::~DwarfCFIException() {}
 
 /// EndModule - Emit all exception information that should come after the
 /// content.
 void DwarfCFIException::EndModule() {
-  if (!Asm->MAI->isExceptionHandlingDwarf())
-    return;
+  if (moveTypeModule == AsmPrinter::CFI_M_Debug)
+    Asm->OutStreamer.EmitCFISections(false, true);
 
-  if (!shouldEmitTableModule)
+  if (!Asm->MAI->isExceptionHandlingDwarf())
     return;
 
   const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
   unsigned PerEncoding = TLOF.getPersonalityEncoding();
 
-  // Begin eh frame section.
-  Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection());
+  if ((PerEncoding & 0x70) != dwarf::DW_EH_PE_pcrel)
+    return;
 
   // Emit references to all used personality functions
+  bool AtLeastOne = false;
   const std::vector<const Function*> &Personalities = MMI->getPersonalities();
   for (size_t i = 0, e = Personalities.size(); i != e; ++i) {
-    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("personality", i));
-    Asm->EmitReference(Personalities[i], PerEncoding);
+    if (!Personalities[i])
+      continue;
+    MCSymbol *Sym = Asm->Mang->getSymbol(Personalities[i]);
+    TLOF.emitPersonalityValue(Asm->OutStreamer, Asm->TM, Sym);
+    AtLeastOne = true;
+  }
+
+  if (AtLeastOne && !TLOF.isFunctionEHFrameSymbolPrivate()) {
+    // This is a temporary hack to keep sections in the same order they
+    // were before. This lets us produce bit identical outputs while
+    // transitioning to CFI.
+    Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection());
   }
 }
 
 /// BeginFunction - Gather pre-function exception information. Assumes it's
 /// being emitted immediately after the function entry point.
 void DwarfCFIException::BeginFunction(const MachineFunction *MF) {
-  shouldEmitTable = shouldEmitMoves = false;
+  shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false;
 
   // If any landing pads survive, we need an EH table.
-  shouldEmitTable = !MMI->getLandingPads().empty();
+  bool hasLandingPads = !MMI->getLandingPads().empty();
 
   // See if we need frame move info.
-  shouldEmitMoves =
-    !Asm->MF->getFunction()->doesNotThrow() || UnwindTablesMandatory;
+  AsmPrinter::CFIMoveType MoveType = Asm->needsCFIMoves();
+  if (MoveType == AsmPrinter::CFI_M_EH ||
+      (MoveType == AsmPrinter::CFI_M_Debug &&
+       moveTypeModule == AsmPrinter::CFI_M_None))
+    moveTypeModule = MoveType;
 
-  if (shouldEmitMoves || shouldEmitTable)
-    // Assumes in correct section after the entry point.
-    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
-                                                  Asm->getFunctionNumber()));
+  shouldEmitMoves = MoveType != AsmPrinter::CFI_M_None;
 
-  shouldEmitTableModule |= shouldEmitTable;
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+  unsigned PerEncoding = TLOF.getPersonalityEncoding();
+  const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()];
 
-  if (shouldEmitMoves) {
-    const TargetFrameLowering *TFL = Asm->TM.getFrameLowering();
-    Asm->OutStreamer.EmitCFIStartProc();
+  shouldEmitPersonality = hasLandingPads &&
+    PerEncoding != dwarf::DW_EH_PE_omit && Per;
 
-    // Indicate locations of general callee saved registers in frame.
-    std::vector<MachineMove> Moves;
-    TFL->getInitialFrameState(Moves);
-    Asm->EmitCFIFrameMoves(Moves);
-    Asm->EmitCFIFrameMoves(MMI->getFrameMoves());
-  }
+  unsigned LSDAEncoding = TLOF.getLSDAEncoding();
+  shouldEmitLSDA = shouldEmitPersonality &&
+    LSDAEncoding != dwarf::DW_EH_PE_omit;
 
-  if (!shouldEmitTable)
+  if (!shouldEmitPersonality && !shouldEmitMoves)
     return;
 
-  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+  Asm->OutStreamer.EmitCFIStartProc();
+
+  // Indicate personality routine, if any.
+  if (!shouldEmitPersonality)
+    return;
+
+  const MCSymbol *Sym = TLOF.getCFIPersonalitySymbol(Per, Asm->Mang, MMI);
+  Asm->OutStreamer.EmitCFIPersonality(Sym, PerEncoding);
+
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
+                                                Asm->getFunctionNumber()));
 
   // Provide LSDA information.
-  unsigned LSDAEncoding = TLOF.getLSDAEncoding();
-  if (LSDAEncoding != dwarf::DW_EH_PE_omit)
-    Asm->OutStreamer.EmitCFILsda(Asm->GetTempSymbol("exception",
-                                                    Asm->getFunctionNumber()),
-                                 LSDAEncoding);
+  if (!shouldEmitLSDA)
+    return;
 
-  // Indicate personality routine, if any.
-  unsigned PerEncoding = TLOF.getPersonalityEncoding();
-  if (PerEncoding != dwarf::DW_EH_PE_omit &&
-      MMI->getPersonalities()[MMI->getPersonalityIndex()])
-    Asm->OutStreamer.EmitCFIPersonality(Asm->GetTempSymbol("personality",
-                                                    MMI->getPersonalityIndex()),
-                                        PerEncoding);
+  Asm->OutStreamer.EmitCFILsda(Asm->GetTempSymbol("exception",
+                                                  Asm->getFunctionNumber()),
+                               LSDAEncoding);
 }
 
 /// EndFunction - Gather and emit post-function exception information.
 ///
 void DwarfCFIException::EndFunction() {
-  if (!shouldEmitMoves && !shouldEmitTable) return;
+  if (!shouldEmitPersonality && !shouldEmitMoves)
+    return;
 
-  if (shouldEmitMoves)
-    Asm->OutStreamer.EmitCFIEndProc();
+  Asm->OutStreamer.EmitCFIEndProc();
 
   Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
                                                 Asm->getFunctionNumber()));
@@ -133,6 +147,6 @@ void DwarfCFIException::EndFunction() {
   // Map all labels and get rid of any dead landing pads.
   MMI->TidyLandingPads();
 
-  if (shouldEmitTable)
+  if (shouldEmitPersonality)
     EmitExceptionTable();
 }
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
new file mode 100644
index 0000000..bff1a35
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -0,0 +1,1054 @@
+//===-- llvm/CodeGen/DwarfCompileUnit.cpp - Dwarf Compile Unit ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf compile unit.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dwarfdebug"
+
+#include "DwarfCompileUnit.h"
+#include "DwarfDebug.h"
+#include "llvm/Constants.h"
+#include "llvm/Analysis/DIBuilder.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+/// CompileUnit - Compile unit constructor.
+CompileUnit::CompileUnit(unsigned I, DIE *D, AsmPrinter *A, DwarfDebug *DW)
+  : ID(I), CUDie(D), Asm(A), DD(DW), IndexTyDie(0) {
+  DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1);
+}
+
+/// ~CompileUnit - Destructor for compile unit.
+CompileUnit::~CompileUnit() {
+  for (unsigned j = 0, M = DIEBlocks.size(); j < M; ++j)
+    DIEBlocks[j]->~DIEBlock();
+}
+
+/// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug
+/// information entry.
+DIEEntry *CompileUnit::createDIEEntry(DIE *Entry) {
+  DIEEntry *Value = new (DIEValueAllocator) DIEEntry(Entry);
+  return Value;
+}
+
+/// addUInt - Add an unsigned integer attribute data and value.
+///
+void CompileUnit::addUInt(DIE *Die, unsigned Attribute,
+                          unsigned Form, uint64_t Integer) {
+  if (!Form) Form = DIEInteger::BestForm(false, Integer);
+  DIEValue *Value = Integer == 1 ?
+    DIEIntegerOne : new (DIEValueAllocator) DIEInteger(Integer);
+  Die->addValue(Attribute, Form, Value);
+}
+
+/// addSInt - Add an signed integer attribute data and value.
+///
+void CompileUnit::addSInt(DIE *Die, unsigned Attribute,
+                          unsigned Form, int64_t Integer) {
+  if (!Form) Form = DIEInteger::BestForm(true, Integer);
+  DIEValue *Value = new (DIEValueAllocator) DIEInteger(Integer);
+  Die->addValue(Attribute, Form, Value);
+}
+
+/// addString - Add a string attribute data and value. DIEString only
+/// keeps string reference.
+void CompileUnit::addString(DIE *Die, unsigned Attribute, unsigned Form,
+                            StringRef String) {
+  DIEValue *Value = new (DIEValueAllocator) DIEString(String);
+  Die->addValue(Attribute, Form, Value);
+}
+
+/// addLabel - Add a Dwarf label attribute data and value.
+///
+void CompileUnit::addLabel(DIE *Die, unsigned Attribute, unsigned Form,
+                           const MCSymbol *Label) {
+  DIEValue *Value = new (DIEValueAllocator) DIELabel(Label);
+  Die->addValue(Attribute, Form, Value);
+}
+
+/// addDelta - Add a label delta attribute data and value.
+///
+void CompileUnit::addDelta(DIE *Die, unsigned Attribute, unsigned Form,
+                           const MCSymbol *Hi, const MCSymbol *Lo) {
+  DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo);
+  Die->addValue(Attribute, Form, Value);
+}
+
+/// addDIEEntry - Add a DIE attribute data and value.
+///
+void CompileUnit::addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form,
+                              DIE *Entry) {
+  Die->addValue(Attribute, Form, createDIEEntry(Entry));
+}
+
+
+/// addBlock - Add block data.
+///
+void CompileUnit::addBlock(DIE *Die, unsigned Attribute, unsigned Form,
+                           DIEBlock *Block) {
+  Block->ComputeSize(Asm);
+  DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on.
+  Die->addValue(Attribute, Block->BestForm(), Block);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void CompileUnit::addSourceLine(DIE *Die, DIVariable V) {
+  // Verify variable.
+  if (!V.Verify())
+    return;
+  
+  unsigned Line = V.getLineNumber();
+  if (Line == 0)
+    return;
+  unsigned FileID = DD->GetOrCreateSourceID(V.getContext().getFilename(),
+                                            V.getContext().getDirectory());
+  assert(FileID && "Invalid file id");
+  addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+  addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void CompileUnit::addSourceLine(DIE *Die, DIGlobalVariable G) {
+  // Verify global variable.
+  if (!G.Verify())
+    return;
+
+  unsigned Line = G.getLineNumber();
+  if (Line == 0)
+    return;
+  unsigned FileID = DD->GetOrCreateSourceID(G.getContext().getFilename(),
+                                            G.getContext().getDirectory());
+  assert(FileID && "Invalid file id");
+  addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+  addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) {
+  // Verify subprogram.
+  if (!SP.Verify())
+    return;
+  // If the line number is 0, don't add it.
+  if (SP.getLineNumber() == 0)
+    return;
+
+  unsigned Line = SP.getLineNumber();
+  if (!SP.getContext().Verify())
+    return;
+  unsigned FileID = DD->GetOrCreateSourceID(SP.getFilename(), SP.getDirectory());
+  assert(FileID && "Invalid file id");
+  addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+  addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void CompileUnit::addSourceLine(DIE *Die, DIType Ty) {
+  // Verify type.
+  if (!Ty.Verify())
+    return;
+
+  unsigned Line = Ty.getLineNumber();
+  if (Line == 0 || !Ty.getContext().Verify())
+    return;
+  unsigned FileID = DD->GetOrCreateSourceID(Ty.getFilename(), Ty.getDirectory());
+  assert(FileID && "Invalid file id");
+  addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+  addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void CompileUnit::addSourceLine(DIE *Die, DINameSpace NS) {
+  // Verify namespace.
+  if (!NS.Verify())
+    return;
+
+  unsigned Line = NS.getLineNumber();
+  if (Line == 0)
+    return;
+  StringRef FN = NS.getFilename();
+
+  unsigned FileID = DD->GetOrCreateSourceID(FN, NS.getDirectory());
+  assert(FileID && "Invalid file id");
+  addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+  addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addVariableAddress - Add DW_AT_location attribute for a 
+/// DbgVariable based on provided MachineLocation.
+void CompileUnit::addVariableAddress(DbgVariable *&DV, DIE *Die, 
+                                     MachineLocation Location) {
+  if (DV->variableHasComplexAddress())
+    addComplexAddress(DV, Die, dwarf::DW_AT_location, Location);
+  else if (DV->isBlockByrefVariable())
+    addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location);
+  else
+    addAddress(Die, dwarf::DW_AT_location, Location);
+}
+
+/// addRegisterOp - Add register operand.
+void CompileUnit::addRegisterOp(DIE *TheDie, unsigned Reg) {
+  const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+  unsigned DWReg = RI->getDwarfRegNum(Reg, false);
+  if (DWReg < 32)
+    addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + DWReg);
+  else {
+    addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx);
+    addUInt(TheDie, 0, dwarf::DW_FORM_udata, DWReg);
+  }
+}
+
+/// addRegisterOffset - Add register offset.
+void CompileUnit::addRegisterOffset(DIE *TheDie, unsigned Reg,
+                                    int64_t Offset) {
+  const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+  unsigned DWReg = RI->getDwarfRegNum(Reg, false);
+  const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo();
+  if (Reg == TRI->getFrameRegister(*Asm->MF))
+    // If variable offset is based in frame register then use fbreg.
+    addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_fbreg);
+  else if (DWReg < 32)
+    addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + DWReg);
+  else {
+    addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
+    addUInt(TheDie, 0, dwarf::DW_FORM_udata, DWReg);
+  }
+  addSInt(TheDie, 0, dwarf::DW_FORM_sdata, Offset);
+}
+
+/// addAddress - Add an address attribute to a die based on the location
+/// provided.
+void CompileUnit::addAddress(DIE *Die, unsigned Attribute,
+                             const MachineLocation &Location) {
+  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+
+  if (Location.isReg())
+    addRegisterOp(Block, Location.getReg());
+  else
+    addRegisterOffset(Block, Location.getReg(), Location.getOffset());
+
+  // Now attach the location information to the DIE.
+  addBlock(Die, Attribute, 0, Block);
+}
+
+/// addComplexAddress - Start with the address based on the location provided,
+/// and generate the DWARF information necessary to find the actual variable
+/// given the extra address information encoded in the DIVariable, starting from
+/// the starting location.  Add the DWARF information to the die.
+///
+void CompileUnit::addComplexAddress(DbgVariable *&DV, DIE *Die,
+                                    unsigned Attribute,
+                                    const MachineLocation &Location) {
+  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+  unsigned N = DV->getNumAddrElements();
+  unsigned i = 0;
+  if (Location.isReg()) {
+    if (N >= 2 && DV->getAddrElement(0) == DIBuilder::OpPlus) {
+      // If first address element is OpPlus then emit
+      // DW_OP_breg + Offset instead of DW_OP_reg + Offset.
+      addRegisterOffset(Block, Location.getReg(), DV->getAddrElement(1));
+      i = 2;
+    } else
+      addRegisterOp(Block, Location.getReg());
+  }
+  else
+    addRegisterOffset(Block, Location.getReg(), Location.getOffset());
+
+  for (;i < N; ++i) {
+    uint64_t Element = DV->getAddrElement(i);
+    if (Element == DIBuilder::OpPlus) {
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+      addUInt(Block, 0, dwarf::DW_FORM_udata, DV->getAddrElement(++i));
+    } else if (Element == DIBuilder::OpDeref) {
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+    } else llvm_unreachable("unknown DIBuilder Opcode");
+  }
+
+  // Now attach the location information to the DIE.
+  addBlock(Die, Attribute, 0, Block);
+}
+
+/* Byref variables, in Blocks, are declared by the programmer as "SomeType
+   VarName;", but the compiler creates a __Block_byref_x_VarName struct, and
+   gives the variable VarName either the struct, or a pointer to the struct, as
+   its type.  This is necessary for various behind-the-scenes things the
+   compiler needs to do with by-reference variables in Blocks.
+
+   However, as far as the original *programmer* is concerned, the variable
+   should still have type 'SomeType', as originally declared.
+
+   The function getBlockByrefType dives into the __Block_byref_x_VarName
+   struct to find the original type of the variable, which is then assigned to
+   the variable's Debug Information Entry as its real type.  So far, so good.
+   However now the debugger will expect the variable VarName to have the type
+   SomeType.  So we need the location attribute for the variable to be an
+   expression that explains to the debugger how to navigate through the
+   pointers and struct to find the actual variable of type SomeType.
+
+   The following function does just that.  We start by getting
+   the "normal" location for the variable. This will be the location
+   of either the struct __Block_byref_x_VarName or the pointer to the
+   struct __Block_byref_x_VarName.
+
+   The struct will look something like:
+
+   struct __Block_byref_x_VarName {
+     ... <various fields>
+     struct __Block_byref_x_VarName *forwarding;
+     ... <various other fields>
+     SomeType VarName;
+     ... <maybe more fields>
+   };
+
+   If we are given the struct directly (as our starting point) we
+   need to tell the debugger to:
+
+   1).  Add the offset of the forwarding field.
+
+   2).  Follow that pointer to get the real __Block_byref_x_VarName
+   struct to use (the real one may have been copied onto the heap).
+
+   3).  Add the offset for the field VarName, to find the actual variable.
+
+   If we started with a pointer to the struct, then we need to
+   dereference that pointer first, before the other steps.
+   Translating this into DWARF ops, we will need to append the following
+   to the current location description for the variable:
+
+   DW_OP_deref                    -- optional, if we start with a pointer
+   DW_OP_plus_uconst <forward_fld_offset>
+   DW_OP_deref
+   DW_OP_plus_uconst <varName_fld_offset>
+
+   That is what this function does.  */
+
+/// addBlockByrefAddress - Start with the address based on the location
+/// provided, and generate the DWARF information necessary to find the
+/// actual Block variable (navigating the Block struct) based on the
+/// starting location.  Add the DWARF information to the die.  For
+/// more information, read large comment just above here.
+///
+void CompileUnit::addBlockByrefAddress(DbgVariable *&DV, DIE *Die,
+                                       unsigned Attribute,
+                                       const MachineLocation &Location) {
+  DIType Ty = DV->getType();
+  DIType TmpTy = Ty;
+  unsigned Tag = Ty.getTag();
+  bool isPointer = false;
+
+  StringRef varName = DV->getName();
+
+  if (Tag == dwarf::DW_TAG_pointer_type) {
+    DIDerivedType DTy = DIDerivedType(Ty);
+    TmpTy = DTy.getTypeDerivedFrom();
+    isPointer = true;
+  }
+
+  DICompositeType blockStruct = DICompositeType(TmpTy);
+
+  // Find the __forwarding field and the variable field in the __Block_byref
+  // struct.
+  DIArray Fields = blockStruct.getTypeArray();
+  DIDescriptor varField = DIDescriptor();
+  DIDescriptor forwardingField = DIDescriptor();
+
+  for (unsigned i = 0, N = Fields.getNumElements(); i < N; ++i) {
+    DIDescriptor Element = Fields.getElement(i);
+    DIDerivedType DT = DIDerivedType(Element);
+    StringRef fieldName = DT.getName();
+    if (fieldName == "__forwarding")
+      forwardingField = Element;
+    else if (fieldName == varName)
+      varField = Element;
+  }
+
+  // Get the offsets for the forwarding field and the variable field.
+  unsigned forwardingFieldOffset =
+    DIDerivedType(forwardingField).getOffsetInBits() >> 3;
+  unsigned varFieldOffset =
+    DIDerivedType(varField).getOffsetInBits() >> 3;
+
+  // Decode the original location, and use that as the start of the byref
+  // variable's location.
+  const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+  unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false);
+  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+
+  if (Location.isReg()) {
+    if (Reg < 32)
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
+    else {
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx);
+      addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+    }
+  } else {
+    if (Reg < 32)
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
+    else {
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
+      addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+    }
+
+    addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
+  }
+
+  // If we started with a pointer to the __Block_byref... struct, then
+  // the first thing we need to do is dereference the pointer (DW_OP_deref).
+  if (isPointer)
+    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+
+  // Next add the offset for the '__forwarding' field:
+  // DW_OP_plus_uconst ForwardingFieldOffset.  Note there's no point in
+  // adding the offset if it's 0.
+  if (forwardingFieldOffset > 0) {
+    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+    addUInt(Block, 0, dwarf::DW_FORM_udata, forwardingFieldOffset);
+  }
+
+  // Now dereference the __forwarding field to get to the real __Block_byref
+  // struct:  DW_OP_deref.
+  addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+
+  // Now that we've got the real __Block_byref... struct, add the offset
+  // for the variable's field to get to the location of the actual variable:
+  // DW_OP_plus_uconst varFieldOffset.  Again, don't add if it's 0.
+  if (varFieldOffset > 0) {
+    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+    addUInt(Block, 0, dwarf::DW_FORM_udata, varFieldOffset);
+  }
+
+  // Now attach the location information to the DIE.
+  addBlock(Die, Attribute, 0, Block);
+}
+
+/// addConstantValue - Add constant value entry in variable DIE.
+bool CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO,
+                                   DIType Ty) {
+  assert (MO.isImm() && "Invalid machine operand!");
+  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+  unsigned form = dwarf::DW_FORM_udata;
+  switch (Ty.getSizeInBits()) {
+    case 8: form = dwarf::DW_FORM_data1; break;
+    case 16: form = dwarf::DW_FORM_data2; break;
+    case 32: form = dwarf::DW_FORM_data4; break;
+    case 64: form = dwarf::DW_FORM_data8; break;
+    default: break;
+  }
+
+  DIBasicType BTy(Ty);
+  if (BTy.Verify() &&
+      (BTy.getEncoding()  == dwarf::DW_ATE_signed 
+       || BTy.getEncoding() == dwarf::DW_ATE_signed_char))
+    addSInt(Block, 0, form, MO.getImm());
+  else
+    addUInt(Block, 0, form, MO.getImm());
+
+  addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
+  return true;
+}
+
+/// addConstantFPValue - Add constant value entry in variable DIE.
+bool CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) {
+  assert (MO.isFPImm() && "Invalid machine operand!");
+  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+  APFloat FPImm = MO.getFPImm()->getValueAPF();
+
+  // Get the raw data form of the floating point.
+  const APInt FltVal = FPImm.bitcastToAPInt();
+  const char *FltPtr = (const char*)FltVal.getRawData();
+
+  int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte.
+  bool LittleEndian = Asm->getTargetData().isLittleEndian();
+  int Incr = (LittleEndian ? 1 : -1);
+  int Start = (LittleEndian ? 0 : NumBytes - 1);
+  int Stop = (LittleEndian ? NumBytes : -1);
+
+  // Output the constant to DWARF one byte at a time.
+  for (; Start != Stop; Start += Incr)
+    addUInt(Block, 0, dwarf::DW_FORM_data1,
+            (unsigned char)0xFF & FltPtr[Start]);
+
+  addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
+  return true;
+}
+
+/// addConstantValue - Add constant value entry in variable DIE.
+bool CompileUnit::addConstantValue(DIE *Die, ConstantInt *CI,
+                                   bool Unsigned) {
+  unsigned CIBitWidth = CI->getBitWidth();
+  if (CIBitWidth <= 64) {
+    unsigned form = 0;
+    switch (CIBitWidth) {
+    case 8: form = dwarf::DW_FORM_data1; break;
+    case 16: form = dwarf::DW_FORM_data2; break;
+    case 32: form = dwarf::DW_FORM_data4; break;
+    case 64: form = dwarf::DW_FORM_data8; break;
+    default: 
+      form = Unsigned ? dwarf::DW_FORM_udata : dwarf::DW_FORM_sdata;
+    }
+    if (Unsigned)
+      addUInt(Die, dwarf::DW_AT_const_value, form, CI->getZExtValue());
+    else
+      addSInt(Die, dwarf::DW_AT_const_value, form, CI->getSExtValue());
+    return true;
+  }
+
+  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+
+  // Get the raw data form of the large APInt.
+  const APInt Val = CI->getValue();
+  const char *Ptr = (const char*)Val.getRawData();
+
+  int NumBytes = Val.getBitWidth() / 8; // 8 bits per byte.
+  bool LittleEndian = Asm->getTargetData().isLittleEndian();
+  int Incr = (LittleEndian ? 1 : -1);
+  int Start = (LittleEndian ? 0 : NumBytes - 1);
+  int Stop = (LittleEndian ? NumBytes : -1);
+
+  // Output the constant to DWARF one byte at a time.
+  for (; Start != Stop; Start += Incr)
+    addUInt(Block, 0, dwarf::DW_FORM_data1,
+            (unsigned char)0xFF & Ptr[Start]);
+
+  addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
+  return true;
+}
+
+/// addTemplateParams - Add template parameters in buffer.
+void CompileUnit::addTemplateParams(DIE &Buffer, DIArray TParams) {
+  // Add template parameters.
+  for (unsigned i = 0, e = TParams.getNumElements(); i != e; ++i) {
+    DIDescriptor Element = TParams.getElement(i);
+    if (Element.isTemplateTypeParameter())
+      Buffer.addChild(getOrCreateTemplateTypeParameterDIE(
+                        DITemplateTypeParameter(Element)));
+    else if (Element.isTemplateValueParameter())
+      Buffer.addChild(getOrCreateTemplateValueParameterDIE(
+                        DITemplateValueParameter(Element)));
+  }
+
+}
+/// addToContextOwner - Add Die into the list of its context owner's children.
+void CompileUnit::addToContextOwner(DIE *Die, DIDescriptor Context) {
+  if (Context.isType()) {
+    DIE *ContextDIE = getOrCreateTypeDIE(DIType(Context));
+    ContextDIE->addChild(Die);
+  } else if (Context.isNameSpace()) {
+    DIE *ContextDIE = getOrCreateNameSpace(DINameSpace(Context));
+    ContextDIE->addChild(Die);
+  } else if (Context.isSubprogram()) {
+    DIE *ContextDIE = DD->createSubprogramDIE(DISubprogram(Context));
+    ContextDIE->addChild(Die);
+  } else if (DIE *ContextDIE = getDIE(Context))
+    ContextDIE->addChild(Die);
+  else
+    addDie(Die);
+}
+
+/// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
+/// given DIType.
+DIE *CompileUnit::getOrCreateTypeDIE(DIType Ty) {
+  DIE *TyDIE = getDIE(Ty);
+  if (TyDIE)
+    return TyDIE;
+
+  // Create new type.
+  TyDIE = new DIE(dwarf::DW_TAG_base_type);
+  insertDIE(Ty, TyDIE);
+  if (Ty.isBasicType())
+    constructTypeDIE(*TyDIE, DIBasicType(Ty));
+  else if (Ty.isCompositeType())
+    constructTypeDIE(*TyDIE, DICompositeType(Ty));
+  else {
+    assert(Ty.isDerivedType() && "Unknown kind of DIType");
+    constructTypeDIE(*TyDIE, DIDerivedType(Ty));
+  }
+
+  addToContextOwner(TyDIE, Ty.getContext());
+  return TyDIE;
+}
+
+/// addType - Add a new type attribute to the specified entity.
+void CompileUnit::addType(DIE *Entity, DIType Ty) {
+  if (!Ty.Verify())
+    return;
+
+  // Check for pre-existence.
+  DIEEntry *Entry = getDIEEntry(Ty);
+  // If it exists then use the existing value.
+  if (Entry) {
+    Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry);
+    return;
+  }
+
+  // Construct type.
+  DIE *Buffer = getOrCreateTypeDIE(Ty);
+
+  // Set up proxy.
+  Entry = createDIEEntry(Buffer);
+  insertDIEEntry(Ty, Entry);
+  Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry);
+
+  // If this is a complete composite type then include it in the
+  // list of global types.
+  addGlobalType(Ty);
+}
+
+/// addGlobalType - Add a new global type to the compile unit.
+///
+void CompileUnit::addGlobalType(DIType Ty) {
+  DIDescriptor Context = Ty.getContext();
+  if (Ty.isCompositeType() && !Ty.getName().empty() && !Ty.isForwardDecl() 
+      && (Context.isCompileUnit() || Context.isFile() || Context.isNameSpace()))
+    if (DIEEntry *Entry = getDIEEntry(Ty))
+      GlobalTypes[Ty.getName()] = Entry->getEntry();
+}
+
+/// addPubTypes - Add type for pubtypes section.
+void CompileUnit::addPubTypes(DISubprogram SP) {
+  DICompositeType SPTy = SP.getType();
+  unsigned SPTag = SPTy.getTag();
+  if (SPTag != dwarf::DW_TAG_subroutine_type)
+    return;
+
+  DIArray Args = SPTy.getTypeArray();
+  for (unsigned i = 0, e = Args.getNumElements(); i != e; ++i) {
+    DIType ATy(Args.getElement(i));
+    if (!ATy.Verify())
+      continue;
+    addGlobalType(ATy);
+  }
+}
+
+/// constructTypeDIE - Construct basic type die from DIBasicType.
+void CompileUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) {
+  // Get core information.
+  StringRef Name = BTy.getName();
+  Buffer.setTag(dwarf::DW_TAG_base_type);
+  addUInt(&Buffer, dwarf::DW_AT_encoding,  dwarf::DW_FORM_data1,
+          BTy.getEncoding());
+
+  // Add name if not anonymous or intermediate type.
+  if (!Name.empty())
+    addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+  uint64_t Size = BTy.getSizeInBits() >> 3;
+  addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
+}
+
+/// constructTypeDIE - Construct derived type die from DIDerivedType.
+void CompileUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) {
+  // Get core information.
+  StringRef Name = DTy.getName();
+  uint64_t Size = DTy.getSizeInBits() >> 3;
+  unsigned Tag = DTy.getTag();
+
+  // FIXME - Workaround for templates.
+  if (Tag == dwarf::DW_TAG_inheritance) Tag = dwarf::DW_TAG_reference_type;
+
+  Buffer.setTag(Tag);
+
+  // Map to main type, void will not have a type.
+  DIType FromTy = DTy.getTypeDerivedFrom();
+  addType(&Buffer, FromTy);
+
+  // Add name if not anonymous or intermediate type.
+  if (!Name.empty())
+    addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+
+  // Add size if non-zero (derived types might be zero-sized.)
+  if (Size)
+    addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
+
+  // Add source line info if available and TyDesc is not a forward declaration.
+  if (!DTy.isForwardDecl())
+    addSourceLine(&Buffer, DTy);
+}
+
+/// constructTypeDIE - Construct type DIE from DICompositeType.
+void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
+  // Get core information.
+  StringRef Name = CTy.getName();
+
+  uint64_t Size = CTy.getSizeInBits() >> 3;
+  unsigned Tag = CTy.getTag();
+  Buffer.setTag(Tag);
+
+  switch (Tag) {
+  case dwarf::DW_TAG_vector_type:
+  case dwarf::DW_TAG_array_type:
+    constructArrayTypeDIE(Buffer, &CTy);
+    break;
+  case dwarf::DW_TAG_enumeration_type: {
+    DIArray Elements = CTy.getTypeArray();
+
+    // Add enumerators to enumeration type.
+    for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+      DIE *ElemDie = NULL;
+      DIDescriptor Enum(Elements.getElement(i));
+      if (Enum.isEnumerator()) {
+        ElemDie = constructEnumTypeDIE(DIEnumerator(Enum));
+        Buffer.addChild(ElemDie);
+      }
+    }
+  }
+    break;
+  case dwarf::DW_TAG_subroutine_type: {
+    // Add return type.
+    DIArray Elements = CTy.getTypeArray();
+    DIDescriptor RTy = Elements.getElement(0);
+    addType(&Buffer, DIType(RTy));
+
+    bool isPrototyped = true;
+    // Add arguments.
+    for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) {
+      DIDescriptor Ty = Elements.getElement(i);
+      if (Ty.isUnspecifiedParameter()) {
+        DIE *Arg = new DIE(dwarf::DW_TAG_unspecified_parameters);
+        Buffer.addChild(Arg);
+        isPrototyped = false;
+      } else {
+        DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
+        addType(Arg, DIType(Ty));
+        Buffer.addChild(Arg);
+      }
+    }
+    // Add prototype flag.
+    if (isPrototyped)
+      addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
+  }
+    break;
+  case dwarf::DW_TAG_structure_type:
+  case dwarf::DW_TAG_union_type:
+  case dwarf::DW_TAG_class_type: {
+    // Add elements to structure type.
+    DIArray Elements = CTy.getTypeArray();
+
+    // A forward struct declared type may not have elements available.
+    unsigned N = Elements.getNumElements();
+    if (N == 0)
+      break;
+
+    // Add elements to structure type.
+    for (unsigned i = 0; i < N; ++i) {
+      DIDescriptor Element = Elements.getElement(i);
+      DIE *ElemDie = NULL;
+      if (Element.isSubprogram()) {
+        DISubprogram SP(Element);
+        ElemDie = DD->createSubprogramDIE(DISubprogram(Element));
+        if (SP.isProtected())
+          addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+                  dwarf::DW_ACCESS_protected);
+        else if (SP.isPrivate())
+          addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+                  dwarf::DW_ACCESS_private);
+        else 
+          addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+            dwarf::DW_ACCESS_public);
+        if (SP.isExplicit())
+          addUInt(ElemDie, dwarf::DW_AT_explicit, dwarf::DW_FORM_flag, 1);
+      }
+      else if (Element.isVariable()) {
+        DIVariable DV(Element);
+        ElemDie = new DIE(dwarf::DW_TAG_variable);
+        addString(ElemDie, dwarf::DW_AT_name, dwarf::DW_FORM_string,
+                  DV.getName());
+        addType(ElemDie, DV.getType());
+        addUInt(ElemDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+        addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+        addSourceLine(ElemDie, DV);
+      } else if (Element.isDerivedType())
+        ElemDie = createMemberDIE(DIDerivedType(Element));
+      else
+        continue;
+      Buffer.addChild(ElemDie);
+    }
+
+    if (CTy.isAppleBlockExtension())
+      addUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1);
+
+    unsigned RLang = CTy.getRunTimeLang();
+    if (RLang)
+      addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class,
+              dwarf::DW_FORM_data1, RLang);
+
+    DICompositeType ContainingType = CTy.getContainingType();
+    if (DIDescriptor(ContainingType).isCompositeType())
+      addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4,
+                  getOrCreateTypeDIE(DIType(ContainingType)));
+    else {
+      DIDescriptor Context = CTy.getContext();
+      addToContextOwner(&Buffer, Context);
+    }
+
+    if (CTy.isObjcClassComplete())
+      addUInt(&Buffer, dwarf::DW_AT_APPLE_objc_complete_type,
+              dwarf::DW_FORM_flag, 1);
+
+    if (Tag == dwarf::DW_TAG_class_type) 
+      addTemplateParams(Buffer, CTy.getTemplateParams());
+
+    break;
+  }
+  default:
+    break;
+  }
+
+  // Add name if not anonymous or intermediate type.
+  if (!Name.empty())
+    addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+
+  if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type
+      || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type)
+    {
+    // Add size if non-zero (derived types might be zero-sized.)
+    if (Size)
+      addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
+    else {
+      // Add zero size if it is not a forward declaration.
+      if (CTy.isForwardDecl())
+        addUInt(&Buffer, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+      else
+        addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, 0);
+    }
+
+    // Add source line info if available.
+    if (!CTy.isForwardDecl())
+      addSourceLine(&Buffer, CTy);
+  }
+}
+
+/// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE 
+/// for the given DITemplateTypeParameter.
+DIE *
+CompileUnit::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) {
+  DIE *ParamDIE = getDIE(TP);
+  if (ParamDIE)
+    return ParamDIE;
+
+  ParamDIE = new DIE(dwarf::DW_TAG_template_type_parameter);
+  addType(ParamDIE, TP.getType());
+  addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TP.getName());
+  return ParamDIE;
+}
+
+/// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE 
+/// for the given DITemplateValueParameter.
+DIE *
+CompileUnit::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV) {
+  DIE *ParamDIE = getDIE(TPV);
+  if (ParamDIE)
+    return ParamDIE;
+
+  ParamDIE = new DIE(dwarf::DW_TAG_template_value_parameter);
+  addType(ParamDIE, TPV.getType());
+  if (!TPV.getName().empty())
+    addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TPV.getName());
+  addUInt(ParamDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, 
+          TPV.getValue());
+  return ParamDIE;
+}
+
+/// getOrCreateNameSpace - Create a DIE for DINameSpace.
+DIE *CompileUnit::getOrCreateNameSpace(DINameSpace NS) {
+  DIE *NDie = getDIE(NS);
+  if (NDie)
+    return NDie;
+  NDie = new DIE(dwarf::DW_TAG_namespace);
+  insertDIE(NS, NDie);
+  if (!NS.getName().empty())
+    addString(NDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, NS.getName());
+  addSourceLine(NDie, NS);
+  addToContextOwner(NDie, NS.getContext());
+  return NDie;
+}
+
+/// constructSubrangeDIE - Construct subrange DIE from DISubrange.
+void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){
+  DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type);
+  addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy);
+  int64_t L = SR.getLo();
+  int64_t H = SR.getHi();
+
+  // The L value defines the lower bounds which is typically zero for C/C++. The
+  // H value is the upper bounds.  Values are 64 bit.  H - L + 1 is the size
+  // of the array. If L > H then do not emit DW_AT_lower_bound and 
+  // DW_AT_upper_bound attributes. If L is zero and H is also zero then the
+  // array has one element and in such case do not emit lower bound.
+
+  if (L > H) {
+    Buffer.addChild(DW_Subrange);
+    return;
+  }
+  if (L)
+    addSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L);
+  addSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H);
+  Buffer.addChild(DW_Subrange);
+}
+
+/// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
+void CompileUnit::constructArrayTypeDIE(DIE &Buffer,
+                                        DICompositeType *CTy) {
+  Buffer.setTag(dwarf::DW_TAG_array_type);
+  if (CTy->getTag() == dwarf::DW_TAG_vector_type)
+    addUInt(&Buffer, dwarf::DW_AT_GNU_vector, dwarf::DW_FORM_flag, 1);
+
+  // Emit derived type.
+  addType(&Buffer, CTy->getTypeDerivedFrom());
+  DIArray Elements = CTy->getTypeArray();
+
+  // Get an anonymous type for index type.
+  DIE *IdxTy = getIndexTyDie();
+  if (!IdxTy) {
+    // Construct an anonymous type for index type.
+    IdxTy = new DIE(dwarf::DW_TAG_base_type);
+    addUInt(IdxTy, dwarf::DW_AT_byte_size, 0, sizeof(int32_t));
+    addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
+            dwarf::DW_ATE_signed);
+    addDie(IdxTy);
+    setIndexTyDie(IdxTy);
+  }
+
+  // Add subranges to array type.
+  for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+    DIDescriptor Element = Elements.getElement(i);
+    if (Element.getTag() == dwarf::DW_TAG_subrange_type)
+      constructSubrangeDIE(Buffer, DISubrange(Element), IdxTy);
+  }
+}
+
+/// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
+DIE *CompileUnit::constructEnumTypeDIE(DIEnumerator ETy) {
+  DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator);
+  StringRef Name = ETy.getName();
+  addString(Enumerator, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+  int64_t Value = ETy.getEnumValue();
+  addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value);
+  return Enumerator;
+}
+
+/// createMemberDIE - Create new member DIE.
+DIE *CompileUnit::createMemberDIE(DIDerivedType DT) {
+  DIE *MemberDie = new DIE(DT.getTag());
+  StringRef Name = DT.getName();
+  if (!Name.empty())
+    addString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+
+  addType(MemberDie, DT.getTypeDerivedFrom());
+
+  addSourceLine(MemberDie, DT);
+
+  DIEBlock *MemLocationDie = new (DIEValueAllocator) DIEBlock();
+  addUInt(MemLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+
+  uint64_t Size = DT.getSizeInBits();
+  uint64_t FieldSize = DT.getOriginalTypeSize();
+
+  if (Size != FieldSize) {
+    // Handle bitfield.
+    addUInt(MemberDie, dwarf::DW_AT_byte_size, 0, DT.getOriginalTypeSize()>>3);
+    addUInt(MemberDie, dwarf::DW_AT_bit_size, 0, DT.getSizeInBits());
+
+    uint64_t Offset = DT.getOffsetInBits();
+    uint64_t AlignMask = ~(DT.getAlignInBits() - 1);
+    uint64_t HiMark = (Offset + FieldSize) & AlignMask;
+    uint64_t FieldOffset = (HiMark - FieldSize);
+    Offset -= FieldOffset;
+
+    // Maybe we need to work from the other end.
+    if (Asm->getTargetData().isLittleEndian())
+      Offset = FieldSize - (Offset + Size);
+    addUInt(MemberDie, dwarf::DW_AT_bit_offset, 0, Offset);
+
+    // Here WD_AT_data_member_location points to the anonymous
+    // field that includes this bit field.
+    addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, FieldOffset >> 3);
+
+  } else
+    // This is not a bitfield.
+    addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits() >> 3);
+
+  if (DT.getTag() == dwarf::DW_TAG_inheritance
+      && DT.isVirtual()) {
+
+    // For C++, virtual base classes are not at fixed offset. Use following
+    // expression to extract appropriate offset from vtable.
+    // BaseAddr = ObAddr + *((*ObAddr) - Offset)
+
+    DIEBlock *VBaseLocationDie = new (DIEValueAllocator) DIEBlock();
+    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_dup);
+    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits());
+    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_minus);
+    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
+
+    addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0,
+             VBaseLocationDie);
+  } else
+    addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie);
+
+  if (DT.isProtected())
+    addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+            dwarf::DW_ACCESS_protected);
+  else if (DT.isPrivate())
+    addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+            dwarf::DW_ACCESS_private);
+  // Otherwise C++ member and base classes are considered public.
+  else if (DT.getCompileUnit().getLanguage() == dwarf::DW_LANG_C_plus_plus)
+    addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+            dwarf::DW_ACCESS_public);
+  if (DT.isVirtual())
+    addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag,
+            dwarf::DW_VIRTUALITY_virtual);
+
+  // Objective-C properties.
+  StringRef PropertyName = DT.getObjCPropertyName();
+  if (!PropertyName.empty()) {
+    addString(MemberDie, dwarf::DW_AT_APPLE_property_name, dwarf::DW_FORM_string,
+              PropertyName);
+    StringRef GetterName = DT.getObjCPropertyGetterName();
+    if (!GetterName.empty())
+      addString(MemberDie, dwarf::DW_AT_APPLE_property_getter,
+                dwarf::DW_FORM_string, GetterName);
+    StringRef SetterName = DT.getObjCPropertySetterName();
+    if (!SetterName.empty())
+      addString(MemberDie, dwarf::DW_AT_APPLE_property_setter,
+                dwarf::DW_FORM_string, SetterName);
+    unsigned PropertyAttributes = 0;
+    if (DT.isReadOnlyObjCProperty())
+      PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readonly;
+    if (DT.isReadWriteObjCProperty())
+      PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readwrite;
+    if (DT.isAssignObjCProperty())
+      PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_assign;
+    if (DT.isRetainObjCProperty())
+      PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_retain;
+    if (DT.isCopyObjCProperty())
+      PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_copy;
+    if (DT.isNonAtomicObjCProperty())
+      PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_nonatomic;
+    if (PropertyAttributes)
+      addUInt(MemberDie, dwarf::DW_AT_APPLE_property_attribute, 0, 
+              PropertyAttributes);
+  }
+  return MemberDie;
+}
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
new file mode 100644
index 0000000..60a9b28
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -0,0 +1,280 @@
+//===-- llvm/CodeGen/DwarfCompileUnit.h - Dwarf Compile Unit ---*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf compile unit.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
+#define CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
+
+#include "DIE.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/OwningPtr.h"
+
+namespace llvm {
+
+class DwarfDebug;
+class MachineLocation;
+class MachineOperand;
+class ConstantInt;
+class DbgVariable;
+
+//===----------------------------------------------------------------------===//
+/// CompileUnit - This dwarf writer support class manages information associate
+/// with a source file.
+class CompileUnit {
+  /// ID - File identifier for source.
+  ///
+  unsigned ID;
+
+  /// Die - Compile unit debug information entry.
+  ///
+  const OwningPtr<DIE> CUDie;
+
+  /// Asm - Target of Dwarf emission.
+  AsmPrinter *Asm;
+
+  DwarfDebug *DD;
+
+  /// IndexTyDie - An anonymous type for index type.  Owned by CUDie.
+  DIE *IndexTyDie;
+
+  /// MDNodeToDieMap - Tracks the mapping of unit level debug informaton
+  /// variables to debug information entries.
+  DenseMap<const MDNode *, DIE *> MDNodeToDieMap;
+
+  /// MDNodeToDIEEntryMap - Tracks the mapping of unit level debug informaton
+  /// descriptors to debug information entries using a DIEEntry proxy.
+  DenseMap<const MDNode *, DIEEntry *> MDNodeToDIEEntryMap;
+
+  /// Globals - A map of globally visible named entities for this unit.
+  ///
+  StringMap<DIE*> Globals;
+
+  /// GlobalTypes - A map of globally visible types for this unit.
+  ///
+  StringMap<DIE*> GlobalTypes;
+
+  /// DIEBlocks - A list of all the DIEBlocks in use.
+  std::vector<DIEBlock *> DIEBlocks;
+
+public:
+  CompileUnit(unsigned I, DIE *D, AsmPrinter *A, DwarfDebug *DW);
+  ~CompileUnit();
+
+  // Accessors.
+  unsigned getID()                  const { return ID; }
+  DIE* getCUDie()                   const { return CUDie.get(); }
+  const StringMap<DIE*> &getGlobals()     const { return Globals; }
+  const StringMap<DIE*> &getGlobalTypes() const { return GlobalTypes; }
+
+  /// hasContent - Return true if this compile unit has something to write out.
+  ///
+  bool hasContent() const { return !CUDie->getChildren().empty(); }
+
+  /// addGlobal - Add a new global entity to the compile unit.
+  ///
+  void addGlobal(StringRef Name, DIE *Die) { Globals[Name] = Die; }
+
+  /// addGlobalType - Add a new global type to the compile unit.
+  ///
+  void addGlobalType(DIType Ty);
+
+  /// getDIE - Returns the debug information entry map slot for the
+  /// specified debug variable.
+  DIE *getDIE(const MDNode *N) { return MDNodeToDieMap.lookup(N); }
+
+  DIEBlock *getDIEBlock() { 
+    return new (DIEValueAllocator) DIEBlock();
+  }
+
+  /// insertDIE - Insert DIE into the map.
+  void insertDIE(const MDNode *N, DIE *D) {
+    MDNodeToDieMap.insert(std::make_pair(N, D));
+  }
+
+  /// getDIEEntry - Returns the debug information entry for the specified
+  /// debug variable.
+  DIEEntry *getDIEEntry(const MDNode *N) {
+    DenseMap<const MDNode *, DIEEntry *>::iterator I =
+      MDNodeToDIEEntryMap.find(N);
+    if (I == MDNodeToDIEEntryMap.end())
+      return NULL;
+    return I->second;
+  }
+
+  /// insertDIEEntry - Insert debug information entry into the map.
+  void insertDIEEntry(const MDNode *N, DIEEntry *E) {
+    MDNodeToDIEEntryMap.insert(std::make_pair(N, E));
+  }
+
+  /// addDie - Adds or interns the DIE to the compile unit.
+  ///
+  void addDie(DIE *Buffer) {
+    this->CUDie->addChild(Buffer);
+  }
+
+  // getIndexTyDie - Get an anonymous type for index type.
+  DIE *getIndexTyDie() {
+    return IndexTyDie;
+  }
+
+  // setIndexTyDie - Set D as anonymous type for index which can be reused
+  // later.
+  void setIndexTyDie(DIE *D) {
+    IndexTyDie = D;
+  }
+public:
+
+  /// addUInt - Add an unsigned integer attribute data and value.
+  ///
+  void addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer);
+
+  /// addSInt - Add an signed integer attribute data and value.
+  ///
+  void addSInt(DIE *Die, unsigned Attribute, unsigned Form, int64_t Integer);
+
+  /// addString - Add a string attribute data and value.
+  ///
+  void addString(DIE *Die, unsigned Attribute, unsigned Form,
+                 const StringRef Str);
+
+  /// addLabel - Add a Dwarf label attribute data and value.
+  ///
+  void addLabel(DIE *Die, unsigned Attribute, unsigned Form,
+                const MCSymbol *Label);
+
+  /// addDelta - Add a label delta attribute data and value.
+  ///
+  void addDelta(DIE *Die, unsigned Attribute, unsigned Form,
+                const MCSymbol *Hi, const MCSymbol *Lo);
+
+  /// addDIEEntry - Add a DIE attribute data and value.
+  ///
+  void addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry);
+  
+  /// addBlock - Add block data.
+  ///
+  void addBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block);
+
+  /// addSourceLine - Add location information to specified debug information
+  /// entry.
+  void addSourceLine(DIE *Die, DIVariable V);
+  void addSourceLine(DIE *Die, DIGlobalVariable G);
+  void addSourceLine(DIE *Die, DISubprogram SP);
+  void addSourceLine(DIE *Die, DIType Ty);
+  void addSourceLine(DIE *Die, DINameSpace NS);
+
+  /// addAddress - Add an address attribute to a die based on the location
+  /// provided.
+  void addAddress(DIE *Die, unsigned Attribute,
+                  const MachineLocation &Location);
+
+  /// addConstantValue - Add constant value entry in variable DIE.
+  bool addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty);
+  bool addConstantValue(DIE *Die, ConstantInt *CI, bool Unsigned);
+
+  /// addConstantFPValue - Add constant value entry in variable DIE.
+  bool addConstantFPValue(DIE *Die, const MachineOperand &MO);
+
+  /// addTemplateParams - Add template parameters in buffer.
+  void addTemplateParams(DIE &Buffer, DIArray TParams);
+
+  /// addRegisterOp - Add register operand.
+  void addRegisterOp(DIE *TheDie, unsigned Reg);
+
+  /// addRegisterOffset - Add register offset.
+  void addRegisterOffset(DIE *TheDie, unsigned Reg, int64_t Offset);
+
+  /// addComplexAddress - Start with the address based on the location provided,
+  /// and generate the DWARF information necessary to find the actual variable
+  /// (navigating the extra location information encoded in the type) based on
+  /// the starting location.  Add the DWARF information to the die.
+  ///
+  void addComplexAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute,
+                         const MachineLocation &Location);
+
+  // FIXME: Should be reformulated in terms of addComplexAddress.
+  /// addBlockByrefAddress - Start with the address based on the location
+  /// provided, and generate the DWARF information necessary to find the
+  /// actual Block variable (navigating the Block struct) based on the
+  /// starting location.  Add the DWARF information to the die.  Obsolete,
+  /// please use addComplexAddress instead.
+  ///
+  void addBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute,
+                            const MachineLocation &Location);
+
+  /// addVariableAddress - Add DW_AT_location attribute for a 
+  /// DbgVariable based on provided MachineLocation.
+  void addVariableAddress(DbgVariable *&DV, DIE *Die, MachineLocation Location);
+
+  /// addToContextOwner - Add Die into the list of its context owner's children.
+  void addToContextOwner(DIE *Die, DIDescriptor Context);
+
+  /// addType - Add a new type attribute to the specified entity.
+  void addType(DIE *Entity, DIType Ty);
+
+  /// getOrCreateNameSpace - Create a DIE for DINameSpace.
+  DIE *getOrCreateNameSpace(DINameSpace NS);
+
+  /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
+  /// given DIType.
+  DIE *getOrCreateTypeDIE(DIType Ty);
+
+  /// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE 
+  /// for the given DITemplateTypeParameter.
+  DIE *getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP);
+
+  /// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE 
+  /// for the given DITemplateValueParameter.
+  DIE *getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TVP);
+
+  /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug
+  /// information entry.
+  DIEEntry *createDIEEntry(DIE *Entry);
+
+  void addPubTypes(DISubprogram SP);
+
+  /// constructTypeDIE - Construct basic type die from DIBasicType.
+  void constructTypeDIE(DIE &Buffer,
+                        DIBasicType BTy);
+
+  /// constructTypeDIE - Construct derived type die from DIDerivedType.
+  void constructTypeDIE(DIE &Buffer,
+                        DIDerivedType DTy);
+
+  /// constructTypeDIE - Construct type DIE from DICompositeType.
+  void constructTypeDIE(DIE &Buffer,
+                        DICompositeType CTy);
+
+  /// constructSubrangeDIE - Construct subrange DIE from DISubrange.
+  void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy);
+
+  /// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
+  void constructArrayTypeDIE(DIE &Buffer, 
+                             DICompositeType *CTy);
+
+  /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
+  DIE *constructEnumTypeDIE(DIEnumerator ETy);
+
+  /// createMemberDIE - Create new member DIE.
+  DIE *createMemberDIE(DIDerivedType DT);
+
+private:
+
+  // DIEValueAllocator - All DIEValues are allocated through this allocator.
+  BumpPtrAllocator DIEValueAllocator;
+  DIEInteger *DIEIntegerOne;
+};
+
+} // end llvm namespace
+#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index bad87c1..8845bfa 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -14,6 +14,7 @@
 #define DEBUG_TYPE "dwarfdebug"
 #include "DwarfDebug.h"
 #include "DIE.h"
+#include "DwarfCompileUnit.h"
 #include "llvm/Constants.h"
 #include "llvm/Module.h"
 #include "llvm/Instructions.h"
@@ -52,13 +53,9 @@ static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print",
      cl::desc("Disable debug info printing"));
 
 static cl::opt<bool> UnknownLocations("use-unknown-locations", cl::Hidden,
-     cl::desc("Make an absense of debug location information explicit."),
+     cl::desc("Make an absence of debug location information explicit."),
      cl::init(false));
 
-#ifndef NDEBUG
-STATISTIC(BlocksWithoutLineNo, "Number of blocks without any line number");
-#endif
-
 namespace {
   const char *DWARFGroupName = "DWARF Emission";
   const char *DbgTimerName = "DWARF Debug Writer";
@@ -72,189 +69,56 @@ static const unsigned InitAbbreviationsSetSize = 9; // log2(512)
 
 namespace llvm {
 
-//===----------------------------------------------------------------------===//
-/// CompileUnit - This dwarf writer support class manages information associate
-/// with a source file.
-class CompileUnit {
-  /// ID - File identifier for source.
-  ///
-  unsigned ID;
-
-  /// Die - Compile unit debug information entry.
-  ///
-  const OwningPtr<DIE> CUDie;
-
-  /// IndexTyDie - An anonymous type for index type.  Owned by CUDie.
-  DIE *IndexTyDie;
-
-  /// MDNodeToDieMap - Tracks the mapping of unit level debug informaton
-  /// variables to debug information entries.
-  DenseMap<const MDNode *, DIE *> MDNodeToDieMap;
-
-  /// MDNodeToDIEEntryMap - Tracks the mapping of unit level debug informaton
-  /// descriptors to debug information entries using a DIEEntry proxy.
-  DenseMap<const MDNode *, DIEEntry *> MDNodeToDIEEntryMap;
-
-  /// Globals - A map of globally visible named entities for this unit.
-  ///
-  StringMap<DIE*> Globals;
-
-  /// GlobalTypes - A map of globally visible types for this unit.
-  ///
-  StringMap<DIE*> GlobalTypes;
-
-public:
-  CompileUnit(unsigned I, DIE *D)
-    : ID(I), CUDie(D), IndexTyDie(0) {}
-
-  // Accessors.
-  unsigned getID()                  const { return ID; }
-  DIE* getCUDie()                   const { return CUDie.get(); }
-  const StringMap<DIE*> &getGlobals()     const { return Globals; }
-  const StringMap<DIE*> &getGlobalTypes() const { return GlobalTypes; }
-
-  /// hasContent - Return true if this compile unit has something to write out.
-  ///
-  bool hasContent() const { return !CUDie->getChildren().empty(); }
-
-  /// addGlobal - Add a new global entity to the compile unit.
-  ///
-  void addGlobal(StringRef Name, DIE *Die) { Globals[Name] = Die; }
-
-  /// addGlobalType - Add a new global type to the compile unit.
-  ///
-  void addGlobalType(StringRef Name, DIE *Die) {
-    GlobalTypes[Name] = Die;
-  }
-
-  /// getDIE - Returns the debug information entry map slot for the
-  /// specified debug variable.
-  DIE *getDIE(const MDNode *N) { return MDNodeToDieMap.lookup(N); }
-
-  /// insertDIE - Insert DIE into the map.
-  void insertDIE(const MDNode *N, DIE *D) {
-    MDNodeToDieMap.insert(std::make_pair(N, D));
-  }
-
-  /// getDIEEntry - Returns the debug information entry for the speciefied
-  /// debug variable.
-  DIEEntry *getDIEEntry(const MDNode *N) {
-    DenseMap<const MDNode *, DIEEntry *>::iterator I =
-      MDNodeToDIEEntryMap.find(N);
-    if (I == MDNodeToDIEEntryMap.end())
-      return NULL;
-    return I->second;
-  }
-
-  /// insertDIEEntry - Insert debug information entry into the map.
-  void insertDIEEntry(const MDNode *N, DIEEntry *E) {
-    MDNodeToDIEEntryMap.insert(std::make_pair(N, E));
-  }
-
-  /// addDie - Adds or interns the DIE to the compile unit.
-  ///
-  void addDie(DIE *Buffer) {
-    this->CUDie->addChild(Buffer);
-  }
-
-  // getIndexTyDie - Get an anonymous type for index type.
-  DIE *getIndexTyDie() {
-    return IndexTyDie;
-  }
-
-  // setIndexTyDie - Set D as anonymous type for index which can be reused
-  // later.
-  void setIndexTyDie(DIE *D) {
-    IndexTyDie = D;
-  }
-
-};
-
-//===----------------------------------------------------------------------===//
-/// DbgVariable - This class is used to track local variable information.
-///
-class DbgVariable {
-  DIVariable Var;                    // Variable Descriptor.
-  DIE *TheDIE;                       // Variable DIE.
-  unsigned DotDebugLocOffset;        // Offset in DotDebugLocEntries.
-public:
-  // AbsVar may be NULL.
-  DbgVariable(DIVariable V) : Var(V), TheDIE(0), DotDebugLocOffset(~0U) {}
-
-  // Accessors.
-  DIVariable getVariable()           const { return Var; }
-  void setDIE(DIE *D)                      { TheDIE = D; }
-  DIE *getDIE()                      const { return TheDIE; }
-  void setDotDebugLocOffset(unsigned O)    { DotDebugLocOffset = O; }
-  unsigned getDotDebugLocOffset()    const { return DotDebugLocOffset; }
-  StringRef getName()                const { return Var.getName(); }
-  unsigned getTag()                  const { return Var.getTag(); }
-  bool variableHasComplexAddress()   const {
-    assert(Var.Verify() && "Invalid complex DbgVariable!");
-    return Var.hasComplexAddress();
-  }
-  bool isBlockByrefVariable()        const {
-    assert(Var.Verify() && "Invalid complex DbgVariable!");
-    return Var.isBlockByrefVariable();
-  }
-  unsigned getNumAddrElements()      const { 
-    assert(Var.Verify() && "Invalid complex DbgVariable!");
-    return Var.getNumAddrElements();
-  }
-  uint64_t getAddrElement(unsigned i) const {
-    return Var.getAddrElement(i);
-  }
-  DIType getType()               const {
-    DIType Ty = Var.getType();
-    // FIXME: isBlockByrefVariable should be reformulated in terms of complex
-    // addresses instead.
-    if (Var.isBlockByrefVariable()) {
-      /* Byref variables, in Blocks, are declared by the programmer as
-         "SomeType VarName;", but the compiler creates a
-         __Block_byref_x_VarName struct, and gives the variable VarName
-         either the struct, or a pointer to the struct, as its type.  This
-         is necessary for various behind-the-scenes things the compiler
-         needs to do with by-reference variables in blocks.
-         
-         However, as far as the original *programmer* is concerned, the
-         variable should still have type 'SomeType', as originally declared.
-         
-         The following function dives into the __Block_byref_x_VarName
-         struct to find the original type of the variable.  This will be
-         passed back to the code generating the type for the Debug
-         Information Entry for the variable 'VarName'.  'VarName' will then
-         have the original type 'SomeType' in its debug information.
-         
-         The original type 'SomeType' will be the type of the field named
-         'VarName' inside the __Block_byref_x_VarName struct.
-         
-         NOTE: In order for this to not completely fail on the debugger
-         side, the Debug Information Entry for the variable VarName needs to
-         have a DW_AT_location that tells the debugger how to unwind through
-         the pointers and __Block_byref_x_VarName struct to find the actual
-         value of the variable.  The function addBlockByrefType does this.  */
-      DIType subType = Ty;
-      unsigned tag = Ty.getTag();
-      
-      if (tag == dwarf::DW_TAG_pointer_type) {
-        DIDerivedType DTy = DIDerivedType(Ty);
-        subType = DTy.getTypeDerivedFrom();
-      }
-      
-      DICompositeType blockStruct = DICompositeType(subType);
-      DIArray Elements = blockStruct.getTypeArray();
-      
-      for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
-        DIDescriptor Element = Elements.getElement(i);
-        DIDerivedType DT = DIDerivedType(Element);
-        if (getName() == DT.getName())
-          return (DT.getTypeDerivedFrom());
-      }
-      return Ty;
+DIType DbgVariable::getType()               const {
+  DIType Ty = Var.getType();
+  // FIXME: isBlockByrefVariable should be reformulated in terms of complex
+  // addresses instead.
+  if (Var.isBlockByrefVariable()) {
+    /* Byref variables, in Blocks, are declared by the programmer as
+       "SomeType VarName;", but the compiler creates a
+       __Block_byref_x_VarName struct, and gives the variable VarName
+       either the struct, or a pointer to the struct, as its type.  This
+       is necessary for various behind-the-scenes things the compiler
+       needs to do with by-reference variables in blocks.
+       
+       However, as far as the original *programmer* is concerned, the
+       variable should still have type 'SomeType', as originally declared.
+       
+       The following function dives into the __Block_byref_x_VarName
+       struct to find the original type of the variable.  This will be
+       passed back to the code generating the type for the Debug
+       Information Entry for the variable 'VarName'.  'VarName' will then
+       have the original type 'SomeType' in its debug information.
+       
+       The original type 'SomeType' will be the type of the field named
+       'VarName' inside the __Block_byref_x_VarName struct.
+       
+       NOTE: In order for this to not completely fail on the debugger
+       side, the Debug Information Entry for the variable VarName needs to
+       have a DW_AT_location that tells the debugger how to unwind through
+       the pointers and __Block_byref_x_VarName struct to find the actual
+       value of the variable.  The function addBlockByrefType does this.  */
+    DIType subType = Ty;
+    unsigned tag = Ty.getTag();
+    
+    if (tag == dwarf::DW_TAG_pointer_type) {
+      DIDerivedType DTy = DIDerivedType(Ty);
+      subType = DTy.getTypeDerivedFrom();
+    }
+    
+    DICompositeType blockStruct = DICompositeType(subType);
+    DIArray Elements = blockStruct.getTypeArray();
+    
+    for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+      DIDescriptor Element = Elements.getElement(i);
+      DIDerivedType DT = DIDerivedType(Element);
+      if (getName() == DT.getName())
+        return (DT.getTypeDerivedFrom());
     }
     return Ty;
   }
-};
+  return Ty;
+}
 
 //===----------------------------------------------------------------------===//
 /// DbgRange - This is used to track range of instructions with identical
@@ -392,19 +256,16 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
     CurrentFnDbgScope(0), PrevLabel(NULL) {
   NextStringPoolNumber = 0;
 
-  DwarfFrameSectionSym = DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0;
+  DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0;
   DwarfStrSectionSym = TextSectionSym = 0;
   DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0;
   FunctionBeginSym = FunctionEndSym = 0;
-  DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1);
   {
     NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
     beginModule(M);
   }
 }
 DwarfDebug::~DwarfDebug() {
-  for (unsigned j = 0, M = DIEBlocks.size(); j < M; ++j)
-    DIEBlocks[j]->~DIEBlock();
 }
 
 MCSymbol *DwarfDebug::getStringPoolEntry(StringRef Str) {
@@ -439,858 +300,6 @@ void DwarfDebug::assignAbbrevNumber(DIEAbbrev &Abbrev) {
   }
 }
 
-/// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug
-/// information entry.
-DIEEntry *DwarfDebug::createDIEEntry(DIE *Entry) {
-  DIEEntry *Value = new (DIEValueAllocator) DIEEntry(Entry);
-  return Value;
-}
-
-/// addUInt - Add an unsigned integer attribute data and value.
-///
-void DwarfDebug::addUInt(DIE *Die, unsigned Attribute,
-                         unsigned Form, uint64_t Integer) {
-  if (!Form) Form = DIEInteger::BestForm(false, Integer);
-  DIEValue *Value = Integer == 1 ?
-    DIEIntegerOne : new (DIEValueAllocator) DIEInteger(Integer);
-  Die->addValue(Attribute, Form, Value);
-}
-
-/// addSInt - Add an signed integer attribute data and value.
-///
-void DwarfDebug::addSInt(DIE *Die, unsigned Attribute,
-                         unsigned Form, int64_t Integer) {
-  if (!Form) Form = DIEInteger::BestForm(true, Integer);
-  DIEValue *Value = new (DIEValueAllocator) DIEInteger(Integer);
-  Die->addValue(Attribute, Form, Value);
-}
-
-/// addString - Add a string attribute data and value. DIEString only
-/// keeps string reference.
-void DwarfDebug::addString(DIE *Die, unsigned Attribute, unsigned Form,
-                           StringRef String) {
-  DIEValue *Value = new (DIEValueAllocator) DIEString(String);
-  Die->addValue(Attribute, Form, Value);
-}
-
-/// addLabel - Add a Dwarf label attribute data and value.
-///
-void DwarfDebug::addLabel(DIE *Die, unsigned Attribute, unsigned Form,
-                          const MCSymbol *Label) {
-  DIEValue *Value = new (DIEValueAllocator) DIELabel(Label);
-  Die->addValue(Attribute, Form, Value);
-}
-
-/// addDelta - Add a label delta attribute data and value.
-///
-void DwarfDebug::addDelta(DIE *Die, unsigned Attribute, unsigned Form,
-                          const MCSymbol *Hi, const MCSymbol *Lo) {
-  DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo);
-  Die->addValue(Attribute, Form, Value);
-}
-
-/// addDIEEntry - Add a DIE attribute data and value.
-///
-void DwarfDebug::addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form,
-                             DIE *Entry) {
-  Die->addValue(Attribute, Form, createDIEEntry(Entry));
-}
-
-
-/// addBlock - Add block data.
-///
-void DwarfDebug::addBlock(DIE *Die, unsigned Attribute, unsigned Form,
-                          DIEBlock *Block) {
-  Block->ComputeSize(Asm);
-  DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on.
-  Die->addValue(Attribute, Block->BestForm(), Block);
-}
-
-/// addSourceLine - Add location information to specified debug information
-/// entry.
-void DwarfDebug::addSourceLine(DIE *Die, DIVariable V) {
-  // Verify variable.
-  if (!V.Verify())
-    return;
-
-  unsigned Line = V.getLineNumber();
-  if (Line == 0)
-    return;
-  unsigned FileID = GetOrCreateSourceID(V.getContext().getFilename(),
-                                        V.getContext().getDirectory());
-  assert(FileID && "Invalid file id");
-  addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
-  addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
-}
-
-/// addSourceLine - Add location information to specified debug information
-/// entry.
-void DwarfDebug::addSourceLine(DIE *Die, DIGlobalVariable G) {
-  // Verify global variable.
-  if (!G.Verify())
-    return;
-
-  unsigned Line = G.getLineNumber();
-  if (Line == 0)
-    return;
-  unsigned FileID = GetOrCreateSourceID(G.getContext().getFilename(),
-                                        G.getContext().getDirectory());
-  assert(FileID && "Invalid file id");
-  addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
-  addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
-}
-
-/// addSourceLine - Add location information to specified debug information
-/// entry.
-void DwarfDebug::addSourceLine(DIE *Die, DISubprogram SP) {
-  // Verify subprogram.
-  if (!SP.Verify())
-    return;
-  // If the line number is 0, don't add it.
-  if (SP.getLineNumber() == 0)
-    return;
-
-  unsigned Line = SP.getLineNumber();
-  if (!SP.getContext().Verify())
-    return;
-  unsigned FileID = GetOrCreateSourceID(SP.getFilename(), SP.getDirectory());
-  assert(FileID && "Invalid file id");
-  addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
-  addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
-}
-
-/// addSourceLine - Add location information to specified debug information
-/// entry.
-void DwarfDebug::addSourceLine(DIE *Die, DIType Ty) {
-  // Verify type.
-  if (!Ty.Verify())
-    return;
-
-  unsigned Line = Ty.getLineNumber();
-  if (Line == 0 || !Ty.getContext().Verify())
-    return;
-  unsigned FileID = GetOrCreateSourceID(Ty.getFilename(), Ty.getDirectory());
-  assert(FileID && "Invalid file id");
-  addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
-  addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
-}
-
-/// addSourceLine - Add location information to specified debug information
-/// entry.
-void DwarfDebug::addSourceLine(DIE *Die, DINameSpace NS) {
-  // Verify namespace.
-  if (!NS.Verify())
-    return;
-
-  unsigned Line = NS.getLineNumber();
-  if (Line == 0)
-    return;
-  StringRef FN = NS.getFilename();
-
-  unsigned FileID = GetOrCreateSourceID(FN, NS.getDirectory());
-  assert(FileID && "Invalid file id");
-  addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
-  addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
-}
-
-/// addVariableAddress - Add DW_AT_location attribute for a DbgVariable based
-/// on provided frame index.
-void DwarfDebug::addVariableAddress(DbgVariable *&DV, DIE *Die, int64_t FI) {
-  MachineLocation Location;
-  unsigned FrameReg;
-  const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
-  int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
-  Location.set(FrameReg, Offset);
-
-  if (DV->variableHasComplexAddress())
-    addComplexAddress(DV, Die, dwarf::DW_AT_location, Location);
-  else if (DV->isBlockByrefVariable())
-    addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location);
-  else
-    addAddress(Die, dwarf::DW_AT_location, Location);
-}
-
-/// addComplexAddress - Start with the address based on the location provided,
-/// and generate the DWARF information necessary to find the actual variable
-/// given the extra address information encoded in the DIVariable, starting from
-/// the starting location.  Add the DWARF information to the die.
-///
-void DwarfDebug::addComplexAddress(DbgVariable *&DV, DIE *Die,
-                                   unsigned Attribute,
-                                   const MachineLocation &Location) {
-  DIType Ty = DV->getType();
-
-  // Decode the original location, and use that as the start of the byref
-  // variable's location.
-  const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
-  unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false);
-  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
-
-  if (Location.isReg()) {
-    if (Reg < 32) {
-      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
-    } else {
-      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx);
-      addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
-    }
-  } else {
-    if (Reg < 32)
-      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
-    else {
-      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
-      addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
-    }
-
-    addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
-  }
-
-  for (unsigned i = 0, N = DV->getNumAddrElements(); i < N; ++i) {
-    uint64_t Element = DV->getAddrElement(i);
-
-    if (Element == DIBuilder::OpPlus) {
-      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
-      addUInt(Block, 0, dwarf::DW_FORM_udata, DV->getAddrElement(++i));
-    } else if (Element == DIBuilder::OpDeref) {
-      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
-    } else llvm_unreachable("unknown DIBuilder Opcode");
-  }
-
-  // Now attach the location information to the DIE.
-  addBlock(Die, Attribute, 0, Block);
-}
-
-/* Byref variables, in Blocks, are declared by the programmer as "SomeType
-   VarName;", but the compiler creates a __Block_byref_x_VarName struct, and
-   gives the variable VarName either the struct, or a pointer to the struct, as
-   its type.  This is necessary for various behind-the-scenes things the
-   compiler needs to do with by-reference variables in Blocks.
-
-   However, as far as the original *programmer* is concerned, the variable
-   should still have type 'SomeType', as originally declared.
-
-   The function getBlockByrefType dives into the __Block_byref_x_VarName
-   struct to find the original type of the variable, which is then assigned to
-   the variable's Debug Information Entry as its real type.  So far, so good.
-   However now the debugger will expect the variable VarName to have the type
-   SomeType.  So we need the location attribute for the variable to be an
-   expression that explains to the debugger how to navigate through the
-   pointers and struct to find the actual variable of type SomeType.
-
-   The following function does just that.  We start by getting
-   the "normal" location for the variable. This will be the location
-   of either the struct __Block_byref_x_VarName or the pointer to the
-   struct __Block_byref_x_VarName.
-
-   The struct will look something like:
-
-   struct __Block_byref_x_VarName {
-     ... <various fields>
-     struct __Block_byref_x_VarName *forwarding;
-     ... <various other fields>
-     SomeType VarName;
-     ... <maybe more fields>
-   };
-
-   If we are given the struct directly (as our starting point) we
-   need to tell the debugger to:
-
-   1).  Add the offset of the forwarding field.
-
-   2).  Follow that pointer to get the real __Block_byref_x_VarName
-   struct to use (the real one may have been copied onto the heap).
-
-   3).  Add the offset for the field VarName, to find the actual variable.
-
-   If we started with a pointer to the struct, then we need to
-   dereference that pointer first, before the other steps.
-   Translating this into DWARF ops, we will need to append the following
-   to the current location description for the variable:
-
-   DW_OP_deref                    -- optional, if we start with a pointer
-   DW_OP_plus_uconst <forward_fld_offset>
-   DW_OP_deref
-   DW_OP_plus_uconst <varName_fld_offset>
-
-   That is what this function does.  */
-
-/// addBlockByrefAddress - Start with the address based on the location
-/// provided, and generate the DWARF information necessary to find the
-/// actual Block variable (navigating the Block struct) based on the
-/// starting location.  Add the DWARF information to the die.  For
-/// more information, read large comment just above here.
-///
-void DwarfDebug::addBlockByrefAddress(DbgVariable *&DV, DIE *Die,
-                                      unsigned Attribute,
-                                      const MachineLocation &Location) {
-  DIType Ty = DV->getType();
-  DIType TmpTy = Ty;
-  unsigned Tag = Ty.getTag();
-  bool isPointer = false;
-
-  StringRef varName = DV->getName();
-
-  if (Tag == dwarf::DW_TAG_pointer_type) {
-    DIDerivedType DTy = DIDerivedType(Ty);
-    TmpTy = DTy.getTypeDerivedFrom();
-    isPointer = true;
-  }
-
-  DICompositeType blockStruct = DICompositeType(TmpTy);
-
-  // Find the __forwarding field and the variable field in the __Block_byref
-  // struct.
-  DIArray Fields = blockStruct.getTypeArray();
-  DIDescriptor varField = DIDescriptor();
-  DIDescriptor forwardingField = DIDescriptor();
-
-  for (unsigned i = 0, N = Fields.getNumElements(); i < N; ++i) {
-    DIDescriptor Element = Fields.getElement(i);
-    DIDerivedType DT = DIDerivedType(Element);
-    StringRef fieldName = DT.getName();
-    if (fieldName == "__forwarding")
-      forwardingField = Element;
-    else if (fieldName == varName)
-      varField = Element;
-  }
-
-  // Get the offsets for the forwarding field and the variable field.
-  unsigned forwardingFieldOffset =
-    DIDerivedType(forwardingField).getOffsetInBits() >> 3;
-  unsigned varFieldOffset =
-    DIDerivedType(varField).getOffsetInBits() >> 3;
-
-  // Decode the original location, and use that as the start of the byref
-  // variable's location.
-  const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
-  unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false);
-  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
-
-  if (Location.isReg()) {
-    if (Reg < 32)
-      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
-    else {
-      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx);
-      addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
-    }
-  } else {
-    if (Reg < 32)
-      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
-    else {
-      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
-      addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
-    }
-
-    addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
-  }
-
-  // If we started with a pointer to the __Block_byref... struct, then
-  // the first thing we need to do is dereference the pointer (DW_OP_deref).
-  if (isPointer)
-    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
-
-  // Next add the offset for the '__forwarding' field:
-  // DW_OP_plus_uconst ForwardingFieldOffset.  Note there's no point in
-  // adding the offset if it's 0.
-  if (forwardingFieldOffset > 0) {
-    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
-    addUInt(Block, 0, dwarf::DW_FORM_udata, forwardingFieldOffset);
-  }
-
-  // Now dereference the __forwarding field to get to the real __Block_byref
-  // struct:  DW_OP_deref.
-  addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
-
-  // Now that we've got the real __Block_byref... struct, add the offset
-  // for the variable's field to get to the location of the actual variable:
-  // DW_OP_plus_uconst varFieldOffset.  Again, don't add if it's 0.
-  if (varFieldOffset > 0) {
-    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
-    addUInt(Block, 0, dwarf::DW_FORM_udata, varFieldOffset);
-  }
-
-  // Now attach the location information to the DIE.
-  addBlock(Die, Attribute, 0, Block);
-}
-
-/// addAddress - Add an address attribute to a die based on the location
-/// provided.
-void DwarfDebug::addAddress(DIE *Die, unsigned Attribute,
-                            const MachineLocation &Location) {
-  const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
-  unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false);
-  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
-
-  if (RI->getFrameRegister(*Asm->MF) == Location.getReg()
-      && Location.getOffset()) {
-    // If variable offset is based in frame register then use fbreg.
-    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_fbreg);
-    addSInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
-    addBlock(Die, Attribute, 0, Block);
-    return;
-  }
-
-  if (Location.isReg()) {
-    if (Reg < 32) {
-      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
-    } else {
-      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx);
-      addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
-    }
-  } else {
-    if (Reg < 32) {
-      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
-    } else {
-      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
-      addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
-    }
-
-    addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
-  }
-
-  addBlock(Die, Attribute, 0, Block);
-}
-
-/// addRegisterAddress - Add register location entry in variable DIE.
-bool DwarfDebug::addRegisterAddress(DIE *Die, const MachineOperand &MO) {
-  assert (MO.isReg() && "Invalid machine operand!");
-  if (!MO.getReg())
-    return false;
-  MachineLocation Location;
-  Location.set(MO.getReg());
-  addAddress(Die, dwarf::DW_AT_location, Location);
-  return true;
-}
-
-/// addConstantValue - Add constant value entry in variable DIE.
-bool DwarfDebug::addConstantValue(DIE *Die, const MachineOperand &MO) {
-  assert (MO.isImm() && "Invalid machine operand!");
-  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
-  unsigned Imm = MO.getImm();
-  addUInt(Block, 0, dwarf::DW_FORM_udata, Imm);
-  addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
-  return true;
-}
-
-/// addConstantFPValue - Add constant value entry in variable DIE.
-bool DwarfDebug::addConstantFPValue(DIE *Die, const MachineOperand &MO) {
-  assert (MO.isFPImm() && "Invalid machine operand!");
-  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
-  APFloat FPImm = MO.getFPImm()->getValueAPF();
-
-  // Get the raw data form of the floating point.
-  const APInt FltVal = FPImm.bitcastToAPInt();
-  const char *FltPtr = (const char*)FltVal.getRawData();
-
-  int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte.
-  bool LittleEndian = Asm->getTargetData().isLittleEndian();
-  int Incr = (LittleEndian ? 1 : -1);
-  int Start = (LittleEndian ? 0 : NumBytes - 1);
-  int Stop = (LittleEndian ? NumBytes : -1);
-
-  // Output the constant to DWARF one byte at a time.
-  for (; Start != Stop; Start += Incr)
-    addUInt(Block, 0, dwarf::DW_FORM_data1,
-            (unsigned char)0xFF & FltPtr[Start]);
-
-  addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
-  return true;
-}
-
-/// addConstantValue - Add constant value entry in variable DIE.
-bool DwarfDebug::addConstantValue(DIE *Die, ConstantInt *CI,
-                                  bool Unsigned) {
-  if (CI->getBitWidth() <= 64) {
-    if (Unsigned)
-      addUInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata,
-              CI->getZExtValue());
-    else
-      addSInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata,
-              CI->getSExtValue());
-    return true;
-  }
-
-  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
-
-  // Get the raw data form of the large APInt.
-  const APInt Val = CI->getValue();
-  const char *Ptr = (const char*)Val.getRawData();
-
-  int NumBytes = Val.getBitWidth() / 8; // 8 bits per byte.
-  bool LittleEndian = Asm->getTargetData().isLittleEndian();
-  int Incr = (LittleEndian ? 1 : -1);
-  int Start = (LittleEndian ? 0 : NumBytes - 1);
-  int Stop = (LittleEndian ? NumBytes : -1);
-
-  // Output the constant to DWARF one byte at a time.
-  for (; Start != Stop; Start += Incr)
-    addUInt(Block, 0, dwarf::DW_FORM_data1,
-            (unsigned char)0xFF & Ptr[Start]);
-
-  addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
-  return true;
-}
-
-/// addTemplateParams - Add template parameters in buffer.
-void DwarfDebug::addTemplateParams(DIE &Buffer, DIArray TParams) {
-  // Add template parameters.
-  for (unsigned i = 0, e = TParams.getNumElements(); i != e; ++i) {
-    DIDescriptor Element = TParams.getElement(i);
-    if (Element.isTemplateTypeParameter())
-      Buffer.addChild(getOrCreateTemplateTypeParameterDIE(
-                        DITemplateTypeParameter(Element)));
-    else if (Element.isTemplateValueParameter())
-      Buffer.addChild(getOrCreateTemplateValueParameterDIE(
-                        DITemplateValueParameter(Element)));
-  }
-
-}
-/// addToContextOwner - Add Die into the list of its context owner's children.
-void DwarfDebug::addToContextOwner(DIE *Die, DIDescriptor Context) {
-  if (Context.isType()) {
-    DIE *ContextDIE = getOrCreateTypeDIE(DIType(Context));
-    ContextDIE->addChild(Die);
-  } else if (Context.isNameSpace()) {
-    DIE *ContextDIE = getOrCreateNameSpace(DINameSpace(Context));
-    ContextDIE->addChild(Die);
-  } else if (Context.isSubprogram()) {
-    DIE *ContextDIE = createSubprogramDIE(DISubprogram(Context));
-    ContextDIE->addChild(Die);
-  } else if (DIE *ContextDIE = getCompileUnit(Context)->getDIE(Context))
-    ContextDIE->addChild(Die);
-  else
-    getCompileUnit(Context)->addDie(Die);
-}
-
-/// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
-/// given DIType.
-DIE *DwarfDebug::getOrCreateTypeDIE(DIType Ty) {
-  CompileUnit *TypeCU = getCompileUnit(Ty);
-  DIE *TyDIE = TypeCU->getDIE(Ty);
-  if (TyDIE)
-    return TyDIE;
-
-  // Create new type.
-  TyDIE = new DIE(dwarf::DW_TAG_base_type);
-  TypeCU->insertDIE(Ty, TyDIE);
-  if (Ty.isBasicType())
-    constructTypeDIE(*TyDIE, DIBasicType(Ty));
-  else if (Ty.isCompositeType())
-    constructTypeDIE(*TyDIE, DICompositeType(Ty));
-  else {
-    assert(Ty.isDerivedType() && "Unknown kind of DIType");
-    constructTypeDIE(*TyDIE, DIDerivedType(Ty));
-  }
-
-  addToContextOwner(TyDIE, Ty.getContext());
-  return TyDIE;
-}
-
-/// addType - Add a new type attribute to the specified entity.
-void DwarfDebug::addType(DIE *Entity, DIType Ty) {
-  if (!Ty.Verify())
-    return;
-
-  // Check for pre-existence.
-  CompileUnit *TypeCU = getCompileUnit(Ty);
-  DIEEntry *Entry = TypeCU->getDIEEntry(Ty);
-  // If it exists then use the existing value.
-  if (Entry) {
-    Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry);
-    return;
-  }
-
-  // Construct type.
-  DIE *Buffer = getOrCreateTypeDIE(Ty);
-
-  // Set up proxy.
-  Entry = createDIEEntry(Buffer);
-  TypeCU->insertDIEEntry(Ty, Entry);
-
-  Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry);
-}
-
-/// constructTypeDIE - Construct basic type die from DIBasicType.
-void DwarfDebug::constructTypeDIE(DIE &Buffer, DIBasicType BTy) {
-  // Get core information.
-  StringRef Name = BTy.getName();
-  Buffer.setTag(dwarf::DW_TAG_base_type);
-  addUInt(&Buffer, dwarf::DW_AT_encoding,  dwarf::DW_FORM_data1,
-          BTy.getEncoding());
-
-  // Add name if not anonymous or intermediate type.
-  if (!Name.empty())
-    addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
-  uint64_t Size = BTy.getSizeInBits() >> 3;
-  addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
-}
-
-/// constructTypeDIE - Construct derived type die from DIDerivedType.
-void DwarfDebug::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) {
-  // Get core information.
-  StringRef Name = DTy.getName();
-  uint64_t Size = DTy.getSizeInBits() >> 3;
-  unsigned Tag = DTy.getTag();
-
-  // FIXME - Workaround for templates.
-  if (Tag == dwarf::DW_TAG_inheritance) Tag = dwarf::DW_TAG_reference_type;
-
-  Buffer.setTag(Tag);
-
-  // Map to main type, void will not have a type.
-  DIType FromTy = DTy.getTypeDerivedFrom();
-  addType(&Buffer, FromTy);
-
-  // Add name if not anonymous or intermediate type.
-  if (!Name.empty())
-    addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
-
-  // Add size if non-zero (derived types might be zero-sized.)
-  if (Size)
-    addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
-
-  // Add source line info if available and TyDesc is not a forward declaration.
-  if (!DTy.isForwardDecl())
-    addSourceLine(&Buffer, DTy);
-}
-
-/// constructTypeDIE - Construct type DIE from DICompositeType.
-void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
-  // Get core information.
-  StringRef Name = CTy.getName();
-
-  uint64_t Size = CTy.getSizeInBits() >> 3;
-  unsigned Tag = CTy.getTag();
-  Buffer.setTag(Tag);
-
-  switch (Tag) {
-  case dwarf::DW_TAG_vector_type:
-  case dwarf::DW_TAG_array_type:
-    constructArrayTypeDIE(Buffer, &CTy);
-    break;
-  case dwarf::DW_TAG_enumeration_type: {
-    DIArray Elements = CTy.getTypeArray();
-
-    // Add enumerators to enumeration type.
-    for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
-      DIE *ElemDie = NULL;
-      DIDescriptor Enum(Elements.getElement(i));
-      if (Enum.isEnumerator()) {
-        ElemDie = constructEnumTypeDIE(DIEnumerator(Enum));
-        Buffer.addChild(ElemDie);
-      }
-    }
-  }
-    break;
-  case dwarf::DW_TAG_subroutine_type: {
-    // Add return type.
-    DIArray Elements = CTy.getTypeArray();
-    DIDescriptor RTy = Elements.getElement(0);
-    addType(&Buffer, DIType(RTy));
-
-    bool isPrototyped = true;
-    // Add arguments.
-    for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) {
-      DIDescriptor Ty = Elements.getElement(i);
-      if (Ty.isUnspecifiedParameter()) {
-        DIE *Arg = new DIE(dwarf::DW_TAG_unspecified_parameters);
-        Buffer.addChild(Arg);
-        isPrototyped = false;
-      } else {
-        DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
-        addType(Arg, DIType(Ty));
-        Buffer.addChild(Arg);
-      }
-    }
-    // Add prototype flag.
-    if (isPrototyped)
-      addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
-  }
-    break;
-  case dwarf::DW_TAG_structure_type:
-  case dwarf::DW_TAG_union_type:
-  case dwarf::DW_TAG_class_type: {
-    // Add elements to structure type.
-    DIArray Elements = CTy.getTypeArray();
-
-    // A forward struct declared type may not have elements available.
-    unsigned N = Elements.getNumElements();
-    if (N == 0)
-      break;
-
-    // Add elements to structure type.
-    for (unsigned i = 0; i < N; ++i) {
-      DIDescriptor Element = Elements.getElement(i);
-      DIE *ElemDie = NULL;
-      if (Element.isSubprogram()) {
-        DISubprogram SP(Element);
-        ElemDie = createSubprogramDIE(DISubprogram(Element));
-        if (SP.isProtected())
-          addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
-                  dwarf::DW_ACCESS_protected);
-        else if (SP.isPrivate())
-          addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
-                  dwarf::DW_ACCESS_private);
-        else 
-          addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
-            dwarf::DW_ACCESS_public);
-        if (SP.isExplicit())
-          addUInt(ElemDie, dwarf::DW_AT_explicit, dwarf::DW_FORM_flag, 1);
-      }
-      else if (Element.isVariable()) {
-        DIVariable DV(Element);
-        ElemDie = new DIE(dwarf::DW_TAG_variable);
-        addString(ElemDie, dwarf::DW_AT_name, dwarf::DW_FORM_string,
-                  DV.getName());
-        addType(ElemDie, DV.getType());
-        addUInt(ElemDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
-        addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
-        addSourceLine(ElemDie, DV);
-      } else if (Element.isDerivedType())
-        ElemDie = createMemberDIE(DIDerivedType(Element));
-      else
-        continue;
-      Buffer.addChild(ElemDie);
-    }
-
-    if (CTy.isAppleBlockExtension())
-      addUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1);
-
-    unsigned RLang = CTy.getRunTimeLang();
-    if (RLang)
-      addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class,
-              dwarf::DW_FORM_data1, RLang);
-
-    DICompositeType ContainingType = CTy.getContainingType();
-    if (DIDescriptor(ContainingType).isCompositeType())
-      addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4,
-                  getOrCreateTypeDIE(DIType(ContainingType)));
-    else {
-      DIDescriptor Context = CTy.getContext();
-      addToContextOwner(&Buffer, Context);
-    }
-
-    if (Tag == dwarf::DW_TAG_class_type) 
-      addTemplateParams(Buffer, CTy.getTemplateParams());
-
-    break;
-  }
-  default:
-    break;
-  }
-
-  // Add name if not anonymous or intermediate type.
-  if (!Name.empty())
-    addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
-
-  if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type
-      || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type)
-    {
-    // Add size if non-zero (derived types might be zero-sized.)
-    if (Size)
-      addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
-    else {
-      // Add zero size if it is not a forward declaration.
-      if (CTy.isForwardDecl())
-        addUInt(&Buffer, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
-      else
-        addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, 0);
-    }
-
-    // Add source line info if available.
-    if (!CTy.isForwardDecl())
-      addSourceLine(&Buffer, CTy);
-  }
-}
-
-/// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE 
-/// for the given DITemplateTypeParameter.
-DIE *
-DwarfDebug::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) {
-  CompileUnit *TypeCU = getCompileUnit(TP);
-  DIE *ParamDIE = TypeCU->getDIE(TP);
-  if (ParamDIE)
-    return ParamDIE;
-
-  ParamDIE = new DIE(dwarf::DW_TAG_template_type_parameter);
-  addType(ParamDIE, TP.getType());
-  addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TP.getName());
-  return ParamDIE;
-}
-
-/// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE 
-/// for the given DITemplateValueParameter.
-DIE *
-DwarfDebug::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV) {
-  CompileUnit *TVCU = getCompileUnit(TPV);
-  DIE *ParamDIE = TVCU->getDIE(TPV);
-  if (ParamDIE)
-    return ParamDIE;
-
-  ParamDIE = new DIE(dwarf::DW_TAG_template_value_parameter);
-  addType(ParamDIE, TPV.getType());
-  if (!TPV.getName().empty())
-    addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TPV.getName());
-  addUInt(ParamDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, 
-          TPV.getValue());
-  return ParamDIE;
-}
-
-/// constructSubrangeDIE - Construct subrange DIE from DISubrange.
-void DwarfDebug::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){
-  int64_t L = SR.getLo();
-  int64_t H = SR.getHi();
-  DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type);
-
-  addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy);
-  if (L)
-    addSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L);
-  addSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H);
-
-  Buffer.addChild(DW_Subrange);
-}
-
-/// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
-void DwarfDebug::constructArrayTypeDIE(DIE &Buffer,
-                                       DICompositeType *CTy) {
-  Buffer.setTag(dwarf::DW_TAG_array_type);
-  if (CTy->getTag() == dwarf::DW_TAG_vector_type)
-    addUInt(&Buffer, dwarf::DW_AT_GNU_vector, dwarf::DW_FORM_flag, 1);
-
-  // Emit derived type.
-  addType(&Buffer, CTy->getTypeDerivedFrom());
-  DIArray Elements = CTy->getTypeArray();
-
-  // Get an anonymous type for index type.
-  CompileUnit *TheCU = getCompileUnit(*CTy);
-  DIE *IdxTy = TheCU->getIndexTyDie();
-  if (!IdxTy) {
-    // Construct an anonymous type for index type.
-    IdxTy = new DIE(dwarf::DW_TAG_base_type);
-    addUInt(IdxTy, dwarf::DW_AT_byte_size, 0, sizeof(int32_t));
-    addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
-            dwarf::DW_ATE_signed);
-    TheCU->addDie(IdxTy);
-    TheCU->setIndexTyDie(IdxTy);
-  }
-
-  // Add subranges to array type.
-  for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
-    DIDescriptor Element = Elements.getElement(i);
-    if (Element.getTag() == dwarf::DW_TAG_subrange_type)
-      constructSubrangeDIE(Buffer, DISubrange(Element), IdxTy);
-  }
-}
-
-/// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
-DIE *DwarfDebug::constructEnumTypeDIE(DIEnumerator ETy) {
-  DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator);
-  StringRef Name = ETy.getName();
-  addString(Enumerator, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
-  int64_t Value = ETy.getEnumValue();
-  addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value);
-  return Enumerator;
-}
-
 /// getRealLinkageName - If special LLVM prefix that is used to inform the asm
 /// printer to not emit usual symbol prefix before the symbol name is used then
 /// return linkage name after skipping this special LLVM prefix.
@@ -1301,84 +310,6 @@ static StringRef getRealLinkageName(StringRef LinkageName) {
   return LinkageName;
 }
 
-/// createMemberDIE - Create new member DIE.
-DIE *DwarfDebug::createMemberDIE(DIDerivedType DT) {
-  DIE *MemberDie = new DIE(DT.getTag());
-  StringRef Name = DT.getName();
-  if (!Name.empty())
-    addString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
-
-  addType(MemberDie, DT.getTypeDerivedFrom());
-
-  addSourceLine(MemberDie, DT);
-
-  DIEBlock *MemLocationDie = new (DIEValueAllocator) DIEBlock();
-  addUInt(MemLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
-
-  uint64_t Size = DT.getSizeInBits();
-  uint64_t FieldSize = DT.getOriginalTypeSize();
-
-  if (Size != FieldSize) {
-    // Handle bitfield.
-    addUInt(MemberDie, dwarf::DW_AT_byte_size, 0, DT.getOriginalTypeSize()>>3);
-    addUInt(MemberDie, dwarf::DW_AT_bit_size, 0, DT.getSizeInBits());
-
-    uint64_t Offset = DT.getOffsetInBits();
-    uint64_t AlignMask = ~(DT.getAlignInBits() - 1);
-    uint64_t HiMark = (Offset + FieldSize) & AlignMask;
-    uint64_t FieldOffset = (HiMark - FieldSize);
-    Offset -= FieldOffset;
-
-    // Maybe we need to work from the other end.
-    if (Asm->getTargetData().isLittleEndian())
-      Offset = FieldSize - (Offset + Size);
-    addUInt(MemberDie, dwarf::DW_AT_bit_offset, 0, Offset);
-
-    // Here WD_AT_data_member_location points to the anonymous
-    // field that includes this bit field.
-    addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, FieldOffset >> 3);
-
-  } else
-    // This is not a bitfield.
-    addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits() >> 3);
-
-  if (DT.getTag() == dwarf::DW_TAG_inheritance
-      && DT.isVirtual()) {
-
-    // For C++, virtual base classes are not at fixed offset. Use following
-    // expression to extract appropriate offset from vtable.
-    // BaseAddr = ObAddr + *((*ObAddr) - Offset)
-
-    DIEBlock *VBaseLocationDie = new (DIEValueAllocator) DIEBlock();
-    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_dup);
-    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
-    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
-    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits());
-    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_minus);
-    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
-    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
-
-    addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0,
-             VBaseLocationDie);
-  } else
-    addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie);
-
-  if (DT.isProtected())
-    addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
-            dwarf::DW_ACCESS_protected);
-  else if (DT.isPrivate())
-    addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
-            dwarf::DW_ACCESS_private);
-  // Otherwise C++ member and base classes are considered public.
-  else if (DT.getCompileUnit().getLanguage() == dwarf::DW_LANG_C_plus_plus)
-    addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
-            dwarf::DW_ACCESS_public);
-  if (DT.isVirtual())
-    addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag,
-            dwarf::DW_VIRTUALITY_virtual);
-  return MemberDie;
-}
-
 /// createSubprogramDIE - Create new DIE using SP.
 DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP) {
   CompileUnit *SPCU = getCompileUnit(SP);
@@ -1387,19 +318,35 @@ DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP) {
     return SPDie;
 
   SPDie = new DIE(dwarf::DW_TAG_subprogram);
-  // Constructors and operators for anonymous aggregates do not have names.
-  if (!SP.getName().empty())
-    addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, SP.getName());
+  
+  // DW_TAG_inlined_subroutine may refer to this DIE.
+  SPCU->insertDIE(SP, SPDie);
+  
+  // Add to context owner.
+  SPCU->addToContextOwner(SPDie, SP.getContext());
+
+  // Add function template parameters.
+  SPCU->addTemplateParams(*SPDie, SP.getTemplateParams());
 
   StringRef LinkageName = SP.getLinkageName();
   if (!LinkageName.empty())
-    addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string,
-              getRealLinkageName(LinkageName));
+    SPCU->addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string,
+                    getRealLinkageName(LinkageName));
 
-  addSourceLine(SPDie, SP);
+  // If this DIE is going to refer declaration info using AT_specification
+  // then there is no need to add other attributes.
+  if (SP.getFunctionDeclaration().isSubprogram())
+    return SPDie;
+
+  // Constructors and operators for anonymous aggregates do not have names.
+  if (!SP.getName().empty())
+    SPCU->addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, 
+                    SP.getName());
+
+  SPCU->addSourceLine(SPDie, SP);
 
   if (SP.isPrototyped()) 
-    addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
+    SPCU->addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
 
   // Add Return Type.
   DICompositeType SPTy = SP.getType();
@@ -1407,24 +354,24 @@ DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP) {
   unsigned SPTag = SPTy.getTag();
 
   if (Args.getNumElements() == 0 || SPTag != dwarf::DW_TAG_subroutine_type)
-    addType(SPDie, SPTy);
+    SPCU->addType(SPDie, SPTy);
   else
-    addType(SPDie, DIType(Args.getElement(0)));
+    SPCU->addType(SPDie, DIType(Args.getElement(0)));
 
   unsigned VK = SP.getVirtuality();
   if (VK) {
-    addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, VK);
-    DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
-    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
-    addUInt(Block, 0, dwarf::DW_FORM_udata, SP.getVirtualIndex());
-    addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block);
+    SPCU->addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, VK);
+    DIEBlock *Block = SPCU->getDIEBlock();
+    SPCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+    SPCU->addUInt(Block, 0, dwarf::DW_FORM_udata, SP.getVirtualIndex());
+    SPCU->addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block);
     ContainingTypeMap.insert(std::make_pair(SPDie,
                                             SP.getContainingType()));
   }
 
   if (!SP.isDefinition()) {
-    addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
-
+    SPCU->addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+    
     // Add arguments. Do not add arguments for subprogram definition. They will
     // be handled while processing variables.
     DICompositeType SPTy = SP.getType();
@@ -1435,35 +382,26 @@ DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP) {
       for (unsigned i = 1, N =  Args.getNumElements(); i < N; ++i) {
         DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
         DIType ATy = DIType(DIType(Args.getElement(i)));
-        addType(Arg, ATy);
+        SPCU->addType(Arg, ATy);
         if (ATy.isArtificial())
-          addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
+          SPCU->addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
         SPDie->addChild(Arg);
       }
   }
 
   if (SP.isArtificial())
-    addUInt(SPDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
+    SPCU->addUInt(SPDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
 
   if (!SP.isLocalToUnit())
-    addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+    SPCU->addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
 
   if (SP.isOptimized())
-    addUInt(SPDie, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1);
+    SPCU->addUInt(SPDie, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1);
 
   if (unsigned isa = Asm->getISAEncoding()) {
-    addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa);
+    SPCU->addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa);
   }
 
-  // Add function template parameters.
-  addTemplateParams(*SPDie, SP.getTemplateParams());
-
-  // DW_TAG_inlined_subroutine may refer to this DIE.
-  SPCU->insertDIE(SP, SPDie);
-
-  // Add to context owner.
-  addToContextOwner(SPDie, SP.getContext());
-
   return SPDie;
 }
 
@@ -1518,51 +456,57 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(const MDNode *SPNode) {
   assert(SPDie && "Unable to find subprogram DIE!");
   DISubprogram SP(SPNode);
 
-  // There is not any need to generate specification DIE for a function
-  // defined at compile unit level. If a function is defined inside another
-  // function then gdb prefers the definition at top level and but does not
-  // expect specification DIE in parent function. So avoid creating
-  // specification DIE for a function defined inside a function.
-  if (SP.isDefinition() && !SP.getContext().isCompileUnit() &&
-      !SP.getContext().isFile() &&
-      !isSubprogramContext(SP.getContext())) {
-    addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
-
-    // Add arguments.
-    DICompositeType SPTy = SP.getType();
-    DIArray Args = SPTy.getTypeArray();
-    unsigned SPTag = SPTy.getTag();
-    if (SPTag == dwarf::DW_TAG_subroutine_type)
-      for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
-        DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
-        DIType ATy = DIType(DIType(Args.getElement(i)));
-        addType(Arg, ATy);
-        if (ATy.isArtificial())
-          addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
-        SPDie->addChild(Arg);
-      }
-    DIE *SPDeclDie = SPDie;
-    SPDie = new DIE(dwarf::DW_TAG_subprogram);
-    addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4,
-                SPDeclDie);
-    SPCU->addDie(SPDie);
+  DISubprogram SPDecl = SP.getFunctionDeclaration();
+  if (SPDecl.isSubprogram())
+    // Refer function declaration directly.
+    SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4,
+                      createSubprogramDIE(SPDecl));
+  else {
+    // There is not any need to generate specification DIE for a function
+    // defined at compile unit level. If a function is defined inside another
+    // function then gdb prefers the definition at top level and but does not
+    // expect specification DIE in parent function. So avoid creating
+    // specification DIE for a function defined inside a function.
+    if (SP.isDefinition() && !SP.getContext().isCompileUnit() &&
+        !SP.getContext().isFile() &&
+        !isSubprogramContext(SP.getContext())) {
+      SPCU-> addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+      
+      // Add arguments.
+      DICompositeType SPTy = SP.getType();
+      DIArray Args = SPTy.getTypeArray();
+      unsigned SPTag = SPTy.getTag();
+      if (SPTag == dwarf::DW_TAG_subroutine_type)
+        for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
+          DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
+          DIType ATy = DIType(DIType(Args.getElement(i)));
+          SPCU->addType(Arg, ATy);
+          if (ATy.isArtificial())
+            SPCU->addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
+          SPDie->addChild(Arg);
+        }
+      DIE *SPDeclDie = SPDie;
+      SPDie = new DIE(dwarf::DW_TAG_subprogram);
+      SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4,
+                        SPDeclDie);
+      SPCU->addDie(SPDie);
+    }
   }
-
   // Pick up abstract subprogram DIE.
   if (DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode)) {
     SPDie = new DIE(dwarf::DW_TAG_subprogram);
-    addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin,
-                dwarf::DW_FORM_ref4, AbsSPDIE);
+    SPCU->addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin,
+                      dwarf::DW_FORM_ref4, AbsSPDIE);
     SPCU->addDie(SPDie);
   }
 
-  addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
-           Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber()));
-  addLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
-           Asm->GetTempSymbol("func_end", Asm->getFunctionNumber()));
+  SPCU->addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+                 Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber()));
+  SPCU->addLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
+                 Asm->GetTempSymbol("func_end", Asm->getFunctionNumber()));
   const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
   MachineLocation Location(RI->getFrameRegister(*Asm->MF));
-  addAddress(SPDie, dwarf::DW_AT_frame_base, Location);
+  SPCU->addAddress(SPDie, dwarf::DW_AT_frame_base, Location);
 
   return SPDie;
 }
@@ -1579,13 +523,14 @@ DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) {
   if (Ranges.empty())
     return 0;
 
+  CompileUnit *TheCU = getCompileUnit(Scope->getScopeNode());
   SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin();
   if (Ranges.size() > 1) {
     // .debug_range section has not been laid out yet. Emit offset in
     // .debug_range as a uint, size 4, for now. emitDIE will handle
     // DW_AT_ranges appropriately.
-    addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4,
-            DebugRangeSymbols.size() * Asm->getTargetData().getPointerSize());
+    TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4,
+                   DebugRangeSymbols.size() * Asm->getTargetData().getPointerSize());
     for (SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(),
          RE = Ranges.end(); RI != RE; ++RI) {
       DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first));
@@ -1604,8 +549,8 @@ DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) {
   assert(Start->isDefined() && "Invalid starting label for an inlined scope!");
   assert(End->isDefined() && "Invalid end label for an inlined scope!");
 
-  addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, Start);
-  addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, End);
+  TheCU->addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, Start);
+  TheCU->addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, End);
 
   return ScopeDIE;
 }
@@ -1639,17 +584,19 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) {
   if (!Scope->getScopeNode())
     return NULL;
   DIScope DS(Scope->getScopeNode());
-  DIE *ScopeDIE = new DIE(dwarf::DW_TAG_inlined_subroutine);
-
   DISubprogram InlinedSP = getDISubprogram(DS);
   CompileUnit *TheCU = getCompileUnit(InlinedSP);
   DIE *OriginDIE = TheCU->getDIE(InlinedSP);
-  assert(OriginDIE && "Unable to find Origin DIE!");
-  addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin,
-              dwarf::DW_FORM_ref4, OriginDIE);
+  if (!OriginDIE) {
+    DEBUG(dbgs() << "Unable to find original DIE for inlined subprogram.");
+    return NULL;
+  }
+  DIE *ScopeDIE = new DIE(dwarf::DW_TAG_inlined_subroutine);
+  TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin,
+                     dwarf::DW_FORM_ref4, OriginDIE);
 
-  addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, StartLabel);
-  addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, EndLabel);
+  TheCU->addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, StartLabel);
+  TheCU->addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, EndLabel);
 
   InlinedSubprogramDIEs.insert(OriginDIE);
 
@@ -1665,8 +612,8 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) {
     I->second.push_back(std::make_pair(StartLabel, ScopeDIE));
 
   DILocation DL(Scope->getInlinedAt());
-  addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, TheCU->getID());
-  addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber());
+  TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, TheCU->getID());
+  TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber());
 
   return ScopeDIE;
 }
@@ -1695,7 +642,7 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
 
   // Define variable debug information entry.
   DIE *VariableDie = new DIE(Tag);
-
+  CompileUnit *VariableCU = getCompileUnit(DV->getVariable());
   DIE *AbsDIE = NULL;
   DenseMap<const DbgVariable *, const DbgVariable *>::iterator
     V2AVI = VarToAbstractVarMap.find(DV);
@@ -1703,20 +650,23 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
     AbsDIE = V2AVI->second->getDIE();
 
   if (AbsDIE)
-    addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin,
-                dwarf::DW_FORM_ref4, AbsDIE);
+    VariableCU->addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin,
+                       dwarf::DW_FORM_ref4, AbsDIE);
   else {
-    addString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
-    addSourceLine(VariableDie, DV->getVariable());
+    VariableCU->addString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string,
+                          Name);
+    VariableCU->addSourceLine(VariableDie, DV->getVariable());
 
     // Add variable type.
-    addType(VariableDie, DV->getType());
+    VariableCU->addType(VariableDie, DV->getType());
   }
 
   if (Tag == dwarf::DW_TAG_formal_parameter && DV->getType().isArtificial())
-    addUInt(VariableDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
+    VariableCU->addUInt(VariableDie, dwarf::DW_AT_artificial, 
+                        dwarf::DW_FORM_flag, 1);
   else if (DIVariable(DV->getVariable()).isArtificial())
-    addUInt(VariableDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
+    VariableCU->addUInt(VariableDie, dwarf::DW_AT_artificial, 
+                        dwarf::DW_FORM_flag, 1);
 
   if (Scope->isAbstractScope()) {
     DV->setDIE(VariableDie);
@@ -1727,7 +677,7 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
 
   unsigned Offset = DV->getDotDebugLocOffset();
   if (Offset != ~0U) {
-    addLabel(VariableDie, dwarf::DW_AT_location, dwarf::DW_FORM_data4,
+    VariableCU->addLabel(VariableDie, dwarf::DW_AT_location, dwarf::DW_FORM_data4,
              Asm->GetTempSymbol("debug_loc", Offset));
     DV->setDIE(VariableDie);
     UseDotDebugLocEntry.insert(VariableDie);
@@ -1747,22 +697,31 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
         const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo();
         if (DVInsn->getOperand(1).isImm() &&
             TRI->getFrameRegister(*Asm->MF) == RegOp.getReg()) {
-          addVariableAddress(DV, VariableDie, DVInsn->getOperand(1).getImm());
-          updated = true;
-        } else
-          updated = addRegisterAddress(VariableDie, RegOp);
+          unsigned FrameReg = 0;
+          const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
+          int Offset = 
+            TFI->getFrameIndexReference(*Asm->MF, 
+                                        DVInsn->getOperand(1).getImm(), 
+                                        FrameReg);
+          MachineLocation Location(FrameReg, Offset);
+          VariableCU->addVariableAddress(DV, VariableDie, Location);
+          
+        } else if (RegOp.getReg())
+          VariableCU->addVariableAddress(DV, VariableDie, 
+                                         MachineLocation(RegOp.getReg()));
+        updated = true;
       }
       else if (DVInsn->getOperand(0).isImm())
-        updated = addConstantValue(VariableDie, DVInsn->getOperand(0));
+        updated = 
+          VariableCU->addConstantValue(VariableDie, DVInsn->getOperand(0),
+                                       DV->getType());
       else if (DVInsn->getOperand(0).isFPImm())
         updated =
-          addConstantFPValue(VariableDie, DVInsn->getOperand(0));
+          VariableCU->addConstantFPValue(VariableDie, DVInsn->getOperand(0));
     } else {
-      MachineLocation Location = Asm->getDebugValueLocation(DVInsn);
-      if (Location.getReg()) {
-        addAddress(VariableDie, dwarf::DW_AT_location, Location);
-        updated = true;
-      }
+      VariableCU->addVariableAddress(DV, VariableDie, 
+                                     Asm->getDebugValueLocation(DVInsn));
+      updated = true;
     }
     if (!updated) {
       // If variableDie is not updated then DBG_VALUE instruction does not
@@ -1776,35 +735,20 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
 
   // .. else use frame index, if available.
   int FI = 0;
-  if (findVariableFrameIndex(DV, &FI))
-    addVariableAddress(DV, VariableDie, FI);
-  
+  if (findVariableFrameIndex(DV, &FI)) {
+    unsigned FrameReg = 0;
+    const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
+    int Offset = 
+      TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
+    MachineLocation Location(FrameReg, Offset);
+    VariableCU->addVariableAddress(DV, VariableDie, Location);
+  }
+
   DV->setDIE(VariableDie);
   return VariableDie;
 
 }
 
-void DwarfDebug::addPubTypes(DISubprogram SP) {
-  DICompositeType SPTy = SP.getType();
-  unsigned SPTag = SPTy.getTag();
-  if (SPTag != dwarf::DW_TAG_subroutine_type)
-    return;
-
-  DIArray Args = SPTy.getTypeArray();
-  for (unsigned i = 0, e = Args.getNumElements(); i != e; ++i) {
-    DIType ATy(Args.getElement(i));
-    if (!ATy.Verify())
-      continue;
-    DICompositeType CATy = getDICompositeType(ATy);
-    if (DIDescriptor(CATy).Verify() && !CATy.getName().empty()
-        && !CATy.isForwardDecl()) {
-      CompileUnit *TheCU = getCompileUnit(CATy);
-      if (DIEEntry *Entry = TheCU->getDIEEntry(CATy))
-        TheCU->addGlobalType(CATy.getName(), Entry->getEntry());
-    }
-  }
-}
-
 /// constructScopeDIE - Construct a DIE for this scope.
 DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
   if (!Scope || !Scope->getScopeNode())
@@ -1858,7 +802,7 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
     ScopeDIE->addChild(*I);
 
   if (DS.isSubprogram())
-    addPubTypes(DISubprogram(DS));
+    getCompileUnit(DS)->addPubTypes(DISubprogram(DS));
 
  return ScopeDIE;
 }
@@ -1875,11 +819,9 @@ unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName,
     return GetOrCreateSourceID("<stdin>", StringRef());
 
   // MCStream expects full path name as filename.
-  if (!DirName.empty() && !FileName.startswith("/")) {
-    std::string FullPathName(DirName.data());
-    if (!DirName.endswith("/"))
-      FullPathName += "/";
-    FullPathName += FileName.data();
+  if (!DirName.empty() && !sys::path::is_absolute(FileName)) {
+    SmallString<128> FullPathName = DirName;
+    sys::path::append(FullPathName, FileName);
     // Here FullPathName will be copied into StringMap by GetOrCreateSourceID.
     return GetOrCreateSourceID(StringRef(FullPathName), StringRef());
   }
@@ -1897,21 +839,6 @@ unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName,
   return SrcId;
 }
 
-/// getOrCreateNameSpace - Create a DIE for DINameSpace.
-DIE *DwarfDebug::getOrCreateNameSpace(DINameSpace NS) {
-  CompileUnit *TheCU = getCompileUnit(NS);
-  DIE *NDie = TheCU->getDIE(NS);
-  if (NDie)
-    return NDie;
-  NDie = new DIE(dwarf::DW_TAG_namespace);
-  TheCU->insertDIE(NS, NDie);
-  if (!NS.getName().empty())
-    addString(NDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, NS.getName());
-  addSourceLine(NDie, NS);
-  addToContextOwner(NDie, NS.getContext());
-  return NDie;
-}
-
 /// constructCompileUnit - Create new CompileUnit for the given
 /// metadata node with tag DW_TAG_compile_unit.
 void DwarfDebug::constructCompileUnit(const MDNode *N) {
@@ -1921,37 +848,37 @@ void DwarfDebug::constructCompileUnit(const MDNode *N) {
   unsigned ID = GetOrCreateSourceID(FN, Dir);
 
   DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
-  addString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string,
-            DIUnit.getProducer());
-  addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
-          DIUnit.getLanguage());
-  addString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN);
+  CompileUnit *NewCU = new CompileUnit(ID, Die, Asm, this);
+  NewCU->addString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string,
+                   DIUnit.getProducer());
+  NewCU->addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
+                 DIUnit.getLanguage());
+  NewCU->addString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN);
   // Use DW_AT_entry_pc instead of DW_AT_low_pc/DW_AT_high_pc pair. This
   // simplifies debug range entries.
-  addUInt(Die, dwarf::DW_AT_entry_pc, dwarf::DW_FORM_addr, 0);
+  NewCU->addUInt(Die, dwarf::DW_AT_entry_pc, dwarf::DW_FORM_addr, 0);
   // DW_AT_stmt_list is a offset of line number information for this
   // compile unit in debug_line section.
-  if (Asm->MAI->doesDwarfUsesAbsoluteLabelForStmtList())
-    addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_addr,
-             Asm->GetTempSymbol("section_line"));
+  if(Asm->MAI->doesDwarfRequireRelocationForSectionOffset())
+    NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
+                    Asm->GetTempSymbol("section_line"));
   else
-    addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0);
+    NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0);
 
   if (!Dir.empty())
-    addString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir);
+    NewCU->addString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir);
   if (DIUnit.isOptimized())
-    addUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1);
+    NewCU->addUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1);
 
   StringRef Flags = DIUnit.getFlags();
   if (!Flags.empty())
-    addString(Die, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string, Flags);
-
+    NewCU->addString(Die, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string, Flags);
+  
   unsigned RVer = DIUnit.getRunTimeVersion();
   if (RVer)
-    addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers,
+    NewCU->addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers,
             dwarf::DW_FORM_data1, RVer);
 
-  CompileUnit *NewCU = new CompileUnit(ID, Die);
   if (!FirstCU)
     FirstCU = NewCU;
   CUMap.insert(std::make_pair(N, NewCU));
@@ -2047,38 +974,34 @@ void DwarfDebug::constructGlobalVariableDIE(const MDNode *N) {
   bool isGlobalVariable = GV.getGlobal() != NULL;
 
   // Add name.
-  addString(VariableDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string,
-            GV.getDisplayName());
+  TheCU->addString(VariableDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string,
+                   GV.getDisplayName());
   StringRef LinkageName = GV.getLinkageName();
   if (!LinkageName.empty() && isGlobalVariable)
-    addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string,
-              getRealLinkageName(LinkageName));
+    TheCU->addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, 
+                     dwarf::DW_FORM_string,
+                     getRealLinkageName(LinkageName));
   // Add type.
-  addType(VariableDIE, GTy);
-  if (GTy.isCompositeType() && !GTy.getName().empty()
-      && !GTy.isForwardDecl()) {
-    DIEEntry *Entry = TheCU->getDIEEntry(GTy);
-    assert(Entry && "Missing global type!");
-    TheCU->addGlobalType(GTy.getName(), Entry->getEntry());
-  }
+  TheCU->addType(VariableDIE, GTy);
+
   // Add scoping info.
   if (!GV.isLocalToUnit()) {
-    addUInt(VariableDIE, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+    TheCU->addUInt(VariableDIE, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
     // Expose as global. 
     TheCU->addGlobal(GV.getName(), VariableDIE);
   }
   // Add line number info.
-  addSourceLine(VariableDIE, GV);
+  TheCU->addSourceLine(VariableDIE, GV);
   // Add to map.
   TheCU->insertDIE(N, VariableDIE);
   // Add to context owner.
   DIDescriptor GVContext = GV.getContext();
-  addToContextOwner(VariableDIE, GVContext);
+  TheCU->addToContextOwner(VariableDIE, GVContext);
   // Add location.
   if (isGlobalVariable) {
     DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
-    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
-    addLabel(Block, 0, dwarf::DW_FORM_udata,
+    TheCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
+    TheCU->addLabel(Block, 0, dwarf::DW_FORM_udata,
              Asm->Mang->getSymbol(GV.getGlobal()));
     // Do not create specification DIE if context is either compile unit
     // or a subprogram.
@@ -2086,28 +1009,28 @@ void DwarfDebug::constructGlobalVariableDIE(const MDNode *N) {
         !GVContext.isFile() && !isSubprogramContext(GVContext)) {
       // Create specification DIE.
       DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable);
-      addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification,
+      TheCU->addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification,
                   dwarf::DW_FORM_ref4, VariableDIE);
-      addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block);
-      addUInt(VariableDIE, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+      TheCU->addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block);
+      TheCU->addUInt(VariableDIE, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
       TheCU->addDie(VariableSpecDIE);
     } else {
-      addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
+      TheCU->addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
     } 
   } else if (ConstantInt *CI = 
              dyn_cast_or_null<ConstantInt>(GV.getConstant()))
-    addConstantValue(VariableDIE, CI, isUnsignedDIType(GTy));
+    TheCU->addConstantValue(VariableDIE, CI, isUnsignedDIType(GTy));
   else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) {
     // GV is a merged global.
     DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
-    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
-    addLabel(Block, 0, dwarf::DW_FORM_udata,
-             Asm->Mang->getSymbol(cast<GlobalValue>(CE->getOperand(0))));
+    TheCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
+    TheCU->addLabel(Block, 0, dwarf::DW_FORM_udata,
+                    Asm->Mang->getSymbol(cast<GlobalValue>(CE->getOperand(0))));
     ConstantInt *CII = cast<ConstantInt>(CE->getOperand(2));
-    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
-    addUInt(Block, 0, dwarf::DW_FORM_udata, CII->getZExtValue());
-    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
-    addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
+    TheCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+    TheCU->addUInt(Block, 0, dwarf::DW_FORM_udata, CII->getZExtValue());
+    TheCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
+    TheCU->addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
   }
 
   return;
@@ -2133,7 +1056,7 @@ void DwarfDebug::constructSubprogramDIE(const MDNode *N) {
   TheCU->insertDIE(N, SubprogramDie);
 
   // Add to context owner.
-  addToContextOwner(SubprogramDie, SP.getContext());
+  TheCU->addToContextOwner(SubprogramDie, SP.getContext());
 
   // Expose as global.
   TheCU->addGlobal(SP.getName(), SubprogramDie);
@@ -2148,52 +1071,80 @@ void DwarfDebug::beginModule(Module *M) {
   if (DisableDebugInfoPrinting)
     return;
 
-  DebugInfoFinder DbgFinder;
-  DbgFinder.processModule(*M);
-
-  bool HasDebugInfo = false;
+  // If module has named metadata anchors then use them, otherwise scan the module
+  // using debug info finder to collect debug info.
+  NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
+  if (CU_Nodes) {
+
+    NamedMDNode *GV_Nodes = M->getNamedMetadata("llvm.dbg.gv");
+    NamedMDNode *SP_Nodes = M->getNamedMetadata("llvm.dbg.sp");
+    if (!GV_Nodes && !SP_Nodes)
+      // If there are not any global variables or any functions then
+      // there is not any debug info in this module.
+      return;
+
+    for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i)
+      constructCompileUnit(CU_Nodes->getOperand(i));
+
+    if (GV_Nodes)
+      for (unsigned i = 0, e = GV_Nodes->getNumOperands(); i != e; ++i)
+        constructGlobalVariableDIE(GV_Nodes->getOperand(i));
+
+    if (SP_Nodes)
+      for (unsigned i = 0, e = SP_Nodes->getNumOperands(); i != e; ++i)
+        constructSubprogramDIE(SP_Nodes->getOperand(i));
+    
+  } else {
 
-  // Scan all the compile-units to see if there are any marked as the main unit.
-  // if not, we do not generate debug info.
-  for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
-       E = DbgFinder.compile_unit_end(); I != E; ++I) {
-    if (DICompileUnit(*I).isMain()) {
-      HasDebugInfo = true;
-      break;
+    DebugInfoFinder DbgFinder;
+    DbgFinder.processModule(*M);
+    
+    bool HasDebugInfo = false;
+    // Scan all the compile-units to see if there are any marked as the main unit.
+    // if not, we do not generate debug info.
+    for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
+           E = DbgFinder.compile_unit_end(); I != E; ++I) {
+      if (DICompileUnit(*I).isMain()) {
+        HasDebugInfo = true;
+        break;
+      }
     }
+    if (!HasDebugInfo) return;
+    
+    // Create all the compile unit DIEs.
+    for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
+           E = DbgFinder.compile_unit_end(); I != E; ++I)
+      constructCompileUnit(*I);
+    
+    // Create DIEs for each global variable.
+    for (DebugInfoFinder::iterator I = DbgFinder.global_variable_begin(),
+           E = DbgFinder.global_variable_end(); I != E; ++I)
+      constructGlobalVariableDIE(*I);
+    
+    // Create DIEs for each subprogram.
+    for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(),
+           E = DbgFinder.subprogram_end(); I != E; ++I)
+      constructSubprogramDIE(*I);
   }
-
-  if (!HasDebugInfo) return;
-
+  
   // Tell MMI that we have debug info.
   MMI->setDebugInfoAvailability(true);
-
+  
   // Emit initial sections.
   EmitSectionLabels();
 
-  // Create all the compile unit DIEs.
-  for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
-         E = DbgFinder.compile_unit_end(); I != E; ++I)
-    constructCompileUnit(*I);
-
-  // Create DIEs for each subprogram.
-  for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(),
-         E = DbgFinder.subprogram_end(); I != E; ++I)
-    constructSubprogramDIE(*I);
-
-  // Create DIEs for each global variable.
-  for (DebugInfoFinder::iterator I = DbgFinder.global_variable_begin(),
-         E = DbgFinder.global_variable_end(); I != E; ++I)
-    constructGlobalVariableDIE(*I);
-
   //getOrCreateTypeDIE
   if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.enum"))
-    for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
-      getOrCreateTypeDIE(DIType(NMD->getOperand(i)));
+    for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+      DIType Ty(NMD->getOperand(i));
+      getCompileUnit(Ty)->getOrCreateTypeDIE(Ty);
+    }
 
   if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.ty"))
-    for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
-      getOrCreateTypeDIE(DIType(NMD->getOperand(i)));
+    for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+      DIType Ty(NMD->getOperand(i));
+      getCompileUnit(Ty)->getOrCreateTypeDIE(Ty);
+    }
 
   // Prime section data.
   SectionMap.insert(Asm->getObjFileLowering().getTextSection());
@@ -2244,7 +1195,7 @@ void DwarfDebug::endModule() {
   for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(),
          AE = InlinedSubprogramDIEs.end(); AI != AE; ++AI) {
     DIE *ISP = *AI;
-    addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
+    FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
   }
 
   for (DenseMap<DIE *, const MDNode *>::iterator CI = ContainingTypeMap.begin(),
@@ -2254,7 +1205,8 @@ void DwarfDebug::endModule() {
     if (!N) continue;
     DIE *NDie = getCompileUnit(N)->getDIE(N);
     if (!NDie) continue;
-    addDIEEntry(SPDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie);
+    getCompileUnit(N)->addDIEEntry(SPDie, dwarf::DW_AT_containing_type, 
+                                   dwarf::DW_FORM_ref4, NDie);
   }
 
   // Standard sections final addresses.
@@ -2269,14 +1221,6 @@ void DwarfDebug::endModule() {
     Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_end", i));
   }
 
-  // Emit common frame information.
-  emitCommonDebugFrame();
-
-  // Emit function debug frame information
-  for (std::vector<FunctionDebugFrameInfo>::iterator I = DebugFrames.begin(),
-         E = DebugFrames.end(); I != E; ++I)
-    emitFunctionDebugFrame(*I);
-
   // Compute DIE offsets and sizes.
   computeSizeAndOffsets();
 
@@ -2464,15 +1408,10 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
            HI = History.begin(), HE = History.end(); HI != HE; ++HI) {
       const MachineInstr *Begin = *HI;
       assert(Begin->isDebugValue() && "Invalid History entry");
-      MachineLocation MLoc;
-      if (Begin->getNumOperands() == 3) {
-        if (Begin->getOperand(0).isReg() && Begin->getOperand(1).isImm())
-          MLoc.set(Begin->getOperand(0).getReg(), Begin->getOperand(1).getImm());
-      } else
-        MLoc = Asm->getDebugValueLocation(Begin);
 
-      // FIXME: emitDebugLoc only understands registers.
-      if (!MLoc.getReg())
+      // Check if DBG_VALUE is truncating a range.
+      if (Begin->getNumOperands() > 1 && Begin->getOperand(0).isReg()
+          && !Begin->getOperand(0).getReg())
         continue;
 
       // Compute the range for a register location.
@@ -2481,7 +1420,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
 
       if (HI + 1 == HE)
         // If Begin is the last instruction in History then its value is valid
-        // until the end of the funtion.
+        // until the end of the function.
         SLabel = FunctionEndSym;
       else {
         const MachineInstr *End = HI[1];
@@ -2496,7 +1435,25 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
       }
 
       // The value is valid until the next DBG_VALUE or clobber.
-      DotDebugLocEntries.push_back(DotDebugLocEntry(FLabel, SLabel, MLoc));
+      MachineLocation MLoc;
+      if (Begin->getNumOperands() == 3) {
+        if (Begin->getOperand(0).isReg() && Begin->getOperand(1).isImm()) {
+          MLoc.set(Begin->getOperand(0).getReg(), 
+                   Begin->getOperand(1).getImm());
+          DotDebugLocEntries.
+            push_back(DotDebugLocEntry(FLabel, SLabel, MLoc, Var));
+        }
+        // FIXME: Handle isFPImm also.
+        else if (Begin->getOperand(0).isImm()) {
+          DotDebugLocEntries.
+            push_back(DotDebugLocEntry(FLabel, SLabel, 
+                                       Begin->getOperand(0).getImm()));
+        }
+      } else {
+        MLoc = Asm->getDebugValueLocation(Begin);
+        DotDebugLocEntries.
+          push_back(DotDebugLocEntry(FLabel, SLabel, MLoc, Var));
+      }
     }
     DotDebugLocEntries.push_back(DotDebugLocEntry());
   }
@@ -2533,12 +1490,17 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
   if (!MI->isDebugValue()) {
     DebugLoc DL = MI->getDebugLoc();
     if (DL != PrevInstLoc && (!DL.isUnknown() || UnknownLocations)) {
+      unsigned Flags = DWARF2_FLAG_IS_STMT;
       PrevInstLoc = DL;
+      if (DL == PrologEndLoc) {
+        Flags |= DWARF2_FLAG_PROLOGUE_END;
+        PrologEndLoc = DebugLoc();
+      }
       if (!DL.isUnknown()) {
         const MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext());
-        recordSourceLine(DL.getLine(), DL.getCol(), Scope);
+        recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags);
       } else
-        recordSourceLine(0, 0, 0);
+        recordSourceLine(0, 0, 0, 0);
     }
   }
 
@@ -2850,41 +1812,22 @@ void DwarfDebug::identifyScopeMarkers() {
   }
 }
 
-/// FindFirstDebugLoc - Find the first debug location in the function. This
-/// is intended to be an approximation for the source position of the
-/// beginning of the function.
-static DebugLoc FindFirstDebugLoc(const MachineFunction *MF) {
-  for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
-       I != E; ++I)
-    for (MachineBasicBlock::const_iterator MBBI = I->begin(), MBBE = I->end();
-         MBBI != MBBE; ++MBBI) {
-      DebugLoc DL = MBBI->getDebugLoc();
-      if (!DL.isUnknown())
-        return DL;
-    }
-  return DebugLoc();
+/// getScopeNode - Get MDNode for DebugLoc's scope.
+static MDNode *getScopeNode(DebugLoc DL, const LLVMContext &Ctx) {
+  if (MDNode *InlinedAt = DL.getInlinedAt(Ctx))
+    return getScopeNode(DebugLoc::getFromDILocation(InlinedAt), Ctx);
+  return DL.getScope(Ctx);
 }
 
-#ifndef NDEBUG
-/// CheckLineNumbers - Count basicblocks whose instructions do not have any
-/// line number information.
-static void CheckLineNumbers(const MachineFunction *MF) {
-  for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
-       I != E; ++I) {
-    bool FoundLineNo = false;
-    for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
-         II != IE; ++II) {
-      const MachineInstr *MI = II;
-      if (!MI->getDebugLoc().isUnknown()) {
-        FoundLineNo = true;
-        break;
-      }
-    }
-    if (!FoundLineNo && I->size())
-      ++BlocksWithoutLineNo;      
-  }
+/// getFnDebugLoc - Walk up the scope chain of given debug loc and find
+/// line number  info for the function.
+static DebugLoc getFnDebugLoc(DebugLoc DL, const LLVMContext &Ctx) {
+  const MDNode *Scope = getScopeNode(DL, Ctx);
+  DISubprogram SP = getDISubprogram(Scope);
+  if (SP.Verify()) 
+    return DebugLoc::get(SP.getLineNumber(), 0, SP);
+  return DebugLoc();
 }
-#endif
 
 /// beginFunction - Gather pre-function debug information.  Assumes being
 /// emitted immediately after the function entry point.
@@ -2892,44 +1835,16 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
   if (!MMI->hasDebugInfo()) return;
   if (!extractScopeInformation()) return;
 
-#ifndef NDEBUG
-  CheckLineNumbers(MF);
-#endif
-
   FunctionBeginSym = Asm->GetTempSymbol("func_begin",
                                         Asm->getFunctionNumber());
   // Assumes in correct section after the entry point.
   Asm->OutStreamer.EmitLabel(FunctionBeginSym);
 
-  // Emit label for the implicitly defined dbg.stoppoint at the start of the
-  // function.
-  DebugLoc FDL = FindFirstDebugLoc(MF);
-  if (FDL.isUnknown()) return;
-
-  const MDNode *Scope = FDL.getScope(MF->getFunction()->getContext());
-  const MDNode *TheScope = 0;
-
-  DISubprogram SP = getDISubprogram(Scope);
-  unsigned Line, Col;
-  if (SP.Verify()) {
-    Line = SP.getLineNumber();
-    Col = 0;
-    TheScope = SP;
-  } else {
-    Line = FDL.getLine();
-    Col = FDL.getCol();
-    TheScope = Scope;
-  }
-
-  recordSourceLine(Line, Col, TheScope);
-
   assert(UserVariables.empty() && DbgValues.empty() && "Maps weren't cleaned");
 
   /// ProcessedArgs - Collection of arguments already processed.
   SmallPtrSet<const MDNode *, 8> ProcessedArgs;
-
   const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo();
-
   /// LiveUserVar - Map physreg numbers to the MDNode they contain.
   std::vector<const MDNode*> LiveUserVar(TRI->getNumRegs());
 
@@ -2995,6 +1910,11 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
         if (!MI->isLabel())
           AtBlockEntry = false;
 
+        // First known non DBG_VALUE location marks beginning of function
+        // body.
+        if (PrologEndLoc.isUnknown() && !MI->getDebugLoc().isUnknown())
+          PrologEndLoc = MI->getDebugLoc();
+
         // Check if the instruction clobbers any registers with debug vars.
         for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(),
                MOE = MI->operands_end(); MOI != MOE; ++MOI) {
@@ -3063,6 +1983,15 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
 
   PrevInstLoc = DebugLoc();
   PrevLabel = FunctionBeginSym;
+
+  // Record beginning of function.
+  if (!PrologEndLoc.isUnknown()) {
+    DebugLoc FnStartDL = getFnDebugLoc(PrologEndLoc,
+                                       MF->getFunction()->getContext());
+    recordSourceLine(FnStartDL.getLine(), FnStartDL.getCol(),
+                     FnStartDL.getScope(MF->getFunction()->getContext()),
+                     DWARF2_FLAG_IS_STMT);
+  }
 }
 
 /// endFunction - Gather and emit post-function debug information.
@@ -3109,8 +2038,9 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
     DIE *CurFnDIE = constructScopeDIE(CurrentFnDbgScope);
 
     if (!DisableFramePointerElim(*MF))
-      addUInt(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr,
-              dwarf::DW_FORM_flag, 1);
+      getCompileUnit(CurrentFnDbgScope->getScopeNode())->addUInt(CurFnDIE, 
+                                                                 dwarf::DW_AT_APPLE_omit_frame_ptr,
+                                                                 dwarf::DW_FORM_flag, 1);
 
 
     DebugFrames.push_back(FunctionDebugFrameInfo(Asm->getFunctionNumber(),
@@ -3119,7 +2049,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
 
   // Clear debug info
   CurrentFnDbgScope = NULL;
-  CurrentFnArguments.clear();
+  DeleteContainerPointers(CurrentFnArguments);
   DbgVariableToFrameIndexMap.clear();
   VarToAbstractVarMap.clear();
   DbgVariableToDbgInstMap.clear();
@@ -3176,7 +2106,8 @@ DbgScope *DwarfDebug::findDbgScope(const MachineInstr *MInsn) {
 /// recordSourceLine - Register a source line with debug info. Returns the
 /// unique label that was emitted and which provides correspondence to
 /// the source line list.
-void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S){
+void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
+                                  unsigned Flags) {
   StringRef Fn;
   StringRef Dir;
   unsigned Src = 1;
@@ -3204,9 +2135,8 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S){
 
     Src = GetOrCreateSourceID(Fn, Dir);
   }
-
-  Asm->OutStreamer.EmitDwarfLocDirective(Src, Line, Col, DWARF2_FLAG_IS_STMT,
-                                         0, 0);
+  Asm->OutStreamer.EmitDwarfLocDirective(Src, Line, Col, Flags,
+                                         0, 0, Fn);
 }
 
 //===----------------------------------------------------------------------===//
@@ -3264,17 +2194,15 @@ DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last) {
 /// computeSizeAndOffsets - Compute the size and offset of all the DIEs.
 ///
 void DwarfDebug::computeSizeAndOffsets() {
-  unsigned PrevOffset = 0;
   for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
          E = CUMap.end(); I != E; ++I) {
     // Compute size of compile unit header.
-    static unsigned Offset = PrevOffset +
+    unsigned Offset = 
       sizeof(int32_t) + // Length of Compilation Unit Info
       sizeof(int16_t) + // DWARF version number
       sizeof(int32_t) + // Offset Into Abbrev. Section
       sizeof(int8_t);   // Pointer Size (in bytes)
     computeSizeAndOffset(I->second->getCUDie(), Offset, true);
-    PrevOffset = Offset;
   }
 }
 
@@ -3296,11 +2224,6 @@ void DwarfDebug::EmitSectionLabels() {
   const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
 
   // Dwarf sections base addresses.
-  if (Asm->MAI->doesDwarfRequireFrameSection()) {
-    DwarfFrameSectionSym =
-      EmitSectionSym(Asm, TLOF.getDwarfFrameSection(), "section_debug_frame");
-   }
-
   DwarfInfoSectionSym =
     EmitSectionSym(Asm, TLOF.getDwarfInfoSection(), "section_info");
   DwarfAbbrevSectionSym =
@@ -3435,8 +2358,7 @@ void DwarfDebug::emitDebugInfo() {
     unsigned ContentSize = Die->getSize() +
       sizeof(int16_t) + // DWARF version number
       sizeof(int32_t) + // Offset Into Abbrev. Section
-      sizeof(int8_t) +  // Pointer Size (in bytes)
-      sizeof(int32_t);  // FIXME - extra pad for gdb bug.
+      sizeof(int8_t);   // Pointer Size (in bytes)
 
     Asm->OutStreamer.AddComment("Length of Compilation Unit Info");
     Asm->EmitInt32(ContentSize);
@@ -3449,12 +2371,6 @@ void DwarfDebug::emitDebugInfo() {
     Asm->EmitInt8(Asm->getTargetData().getPointerSize());
 
     emitDIE(Die);
-    // FIXME - extra padding for gdb bug.
-    Asm->OutStreamer.AddComment("4 extra padding bytes for GDB");
-    Asm->EmitInt8(0);
-    Asm->EmitInt8(0);
-    Asm->EmitInt8(0);
-    Asm->EmitInt8(0);
     Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_end", TheCU->getID()));
   }
 }
@@ -3515,91 +2431,6 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) {
   Asm->EmitInt8(1);
 }
 
-/// emitCommonDebugFrame - Emit common frame info into a debug frame section.
-///
-void DwarfDebug::emitCommonDebugFrame() {
-  if (!Asm->MAI->doesDwarfRequireFrameSection())
-    return;
-
-  int stackGrowth = Asm->getTargetData().getPointerSize();
-  if (Asm->TM.getFrameLowering()->getStackGrowthDirection() ==
-      TargetFrameLowering::StackGrowsDown)
-    stackGrowth *= -1;
-
-  // Start the dwarf frame section.
-  Asm->OutStreamer.SwitchSection(
-                              Asm->getObjFileLowering().getDwarfFrameSection());
-
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_frame_common"));
-  Asm->OutStreamer.AddComment("Length of Common Information Entry");
-  Asm->EmitLabelDifference(Asm->GetTempSymbol("debug_frame_common_end"),
-                           Asm->GetTempSymbol("debug_frame_common_begin"), 4);
-
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_frame_common_begin"));
-  Asm->OutStreamer.AddComment("CIE Identifier Tag");
-  Asm->EmitInt32((int)dwarf::DW_CIE_ID);
-  Asm->OutStreamer.AddComment("CIE Version");
-  Asm->EmitInt8(dwarf::DW_CIE_VERSION);
-  Asm->OutStreamer.AddComment("CIE Augmentation");
-  Asm->OutStreamer.EmitIntValue(0, 1, /*addrspace*/0); // nul terminator.
-  Asm->EmitULEB128(1, "CIE Code Alignment Factor");
-  Asm->EmitSLEB128(stackGrowth, "CIE Data Alignment Factor");
-  Asm->OutStreamer.AddComment("CIE RA Column");
-  const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
-  const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
-  Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister(), false));
-
-  std::vector<MachineMove> Moves;
-  TFI->getInitialFrameState(Moves);
-
-  Asm->EmitFrameMoves(Moves, 0, false);
-
-  Asm->EmitAlignment(2);
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_frame_common_end"));
-}
-
-/// emitFunctionDebugFrame - Emit per function frame info into a debug frame
-/// section.
-void DwarfDebug::
-emitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo) {
-  if (!Asm->MAI->doesDwarfRequireFrameSection())
-    return;
-
-  // Start the dwarf frame section.
-  Asm->OutStreamer.SwitchSection(
-                              Asm->getObjFileLowering().getDwarfFrameSection());
-
-  Asm->OutStreamer.AddComment("Length of Frame Information Entry");
-  MCSymbol *DebugFrameBegin =
-    Asm->GetTempSymbol("debug_frame_begin", DebugFrameInfo.Number);
-  MCSymbol *DebugFrameEnd =
-    Asm->GetTempSymbol("debug_frame_end", DebugFrameInfo.Number);
-  Asm->EmitLabelDifference(DebugFrameEnd, DebugFrameBegin, 4);
-
-  Asm->OutStreamer.EmitLabel(DebugFrameBegin);
-
-  Asm->OutStreamer.AddComment("FDE CIE offset");
-  Asm->EmitSectionOffset(Asm->GetTempSymbol("debug_frame_common"),
-                         DwarfFrameSectionSym);
-
-  Asm->OutStreamer.AddComment("FDE initial location");
-  MCSymbol *FuncBeginSym =
-    Asm->GetTempSymbol("func_begin", DebugFrameInfo.Number);
-  Asm->OutStreamer.EmitSymbolValue(FuncBeginSym,
-                                   Asm->getTargetData().getPointerSize(),
-                                   0/*AddrSpace*/);
-
-
-  Asm->OutStreamer.AddComment("FDE address range");
-  Asm->EmitLabelDifference(Asm->GetTempSymbol("func_end",DebugFrameInfo.Number),
-                           FuncBeginSym, Asm->getTargetData().getPointerSize());
-
-  Asm->EmitFrameMoves(DebugFrameInfo.Moves, FuncBeginSym, false);
-
-  Asm->EmitAlignment(2);
-  Asm->OutStreamer.EmitLabel(DebugFrameEnd);
-}
-
 /// emitDebugPubNames - Emit visible names into a debug pubnames section.
 ///
 void DwarfDebug::emitDebugPubNames() {
@@ -3760,33 +2591,63 @@ void DwarfDebug::emitDebugLoc() {
     } else {
       Asm->OutStreamer.EmitSymbolValue(Entry.Begin, Size, 0);
       Asm->OutStreamer.EmitSymbolValue(Entry.End, Size, 0);
-      const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
-      unsigned Reg = RI->getDwarfRegNum(Entry.Loc.getReg(), false);
-      if (int Offset =  Entry.Loc.getOffset()) {
-        // If the value is at a certain offset from frame register then
-        // use DW_OP_fbreg.
-        unsigned OffsetSize = Offset ? MCAsmInfo::getSLEB128Size(Offset) : 1;
-        Asm->OutStreamer.AddComment("Loc expr size");
-        Asm->EmitInt16(1 + OffsetSize);
-        Asm->OutStreamer.AddComment(
-          dwarf::OperationEncodingString(dwarf::DW_OP_fbreg));
-        Asm->EmitInt8(dwarf::DW_OP_fbreg);
-        Asm->OutStreamer.AddComment("Offset");
-        Asm->EmitSLEB128(Offset);
-      } else {
-        if (Reg < 32) {
-          Asm->OutStreamer.AddComment("Loc expr size");
-          Asm->EmitInt16(1);
-          Asm->OutStreamer.AddComment(
-            dwarf::OperationEncodingString(dwarf::DW_OP_reg0 + Reg));
-          Asm->EmitInt8(dwarf::DW_OP_reg0 + Reg);
+      DIVariable DV(Entry.Variable);
+      Asm->OutStreamer.AddComment("Loc expr size");
+      MCSymbol *begin = Asm->OutStreamer.getContext().CreateTempSymbol();
+      MCSymbol *end = Asm->OutStreamer.getContext().CreateTempSymbol();
+      Asm->EmitLabelDifference(end, begin, 2);
+      Asm->OutStreamer.EmitLabel(begin);
+      if (Entry.isConstant()) {
+        DIBasicType BTy(DV.getType());
+        if (BTy.Verify() &&
+            (BTy.getEncoding()  == dwarf::DW_ATE_signed 
+             || BTy.getEncoding() == dwarf::DW_ATE_signed_char)) {
+          Asm->OutStreamer.AddComment("DW_OP_consts");
+          Asm->EmitInt8(dwarf::DW_OP_consts);
+          Asm->EmitSLEB128(Entry.getConstant());
         } else {
-          Asm->OutStreamer.AddComment("Loc expr size");
-          Asm->EmitInt16(1 + MCAsmInfo::getULEB128Size(Reg));
-          Asm->EmitInt8(dwarf::DW_OP_regx);
-          Asm->EmitULEB128(Reg);
+          Asm->OutStreamer.AddComment("DW_OP_constu");
+          Asm->EmitInt8(dwarf::DW_OP_constu);
+          Asm->EmitULEB128(Entry.getConstant());
         }
+      } else if (DV.hasComplexAddress()) {
+        unsigned N = DV.getNumAddrElements();
+        unsigned i = 0;
+        if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) {
+          if (Entry.Loc.getOffset()) {
+            i = 2;
+            Asm->EmitDwarfRegOp(Entry.Loc);
+            Asm->OutStreamer.AddComment("DW_OP_deref");
+            Asm->EmitInt8(dwarf::DW_OP_deref);
+            Asm->OutStreamer.AddComment("DW_OP_plus_uconst");
+            Asm->EmitInt8(dwarf::DW_OP_plus_uconst);
+            Asm->EmitSLEB128(DV.getAddrElement(1));
+          } else {
+            // If first address element is OpPlus then emit
+            // DW_OP_breg + Offset instead of DW_OP_reg + Offset.
+            MachineLocation Loc(Entry.Loc.getReg(), DV.getAddrElement(1));
+            Asm->EmitDwarfRegOp(Loc);
+            i = 2;
+          }
+        } else {
+          Asm->EmitDwarfRegOp(Entry.Loc);
+        }
+
+        // Emit remaining complex address elements.
+        for (; i < N; ++i) {
+          uint64_t Element = DV.getAddrElement(i);
+          if (Element == DIBuilder::OpPlus) {
+            Asm->EmitInt8(dwarf::DW_OP_plus_uconst);
+            Asm->EmitULEB128(DV.getAddrElement(++i));
+          } else if (Element == DIBuilder::OpDeref)
+            Asm->EmitInt8(dwarf::DW_OP_deref);
+          else llvm_unreachable("unknown Opcode found in complex address");
+        }
+      } else {
+        // Regular entry.
+        Asm->EmitDwarfRegOp(Entry.Loc);
       }
+      Asm->OutStreamer.EmitLabel(end);
     }
   }
 }
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 4aeefde..abda2e6 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -16,6 +16,7 @@
 
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Analysis/DebugInfo.h"
 #include "DIE.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/FoldingSet.h"
@@ -39,21 +40,6 @@ class DIEAbbrev;
 class DIE;
 class DIEBlock;
 class DIEEntry;
-class DIArray;
-class DIEnumerator;
-class DIDescriptor;
-class DIVariable;
-class DIGlobal;
-class DIGlobalVariable;
-class DISubprogram;
-class DIBasicType;
-class DIDerivedType;
-class DIType;
-class DINameSpace;
-class DISubrange;
-class DICompositeType;
-class DITemplateTypeParameter;
-class DITemplateValueParameter;
 
 //===----------------------------------------------------------------------===//
 /// SrcLineInfo - This class is used to record source line correspondence.
@@ -80,10 +66,21 @@ typedef struct DotDebugLocEntry {
   const MCSymbol *Begin;
   const MCSymbol *End;
   MachineLocation Loc;
+  const MDNode *Variable;
   bool Merged;
-  DotDebugLocEntry() : Begin(0), End(0), Merged(false) {}
-  DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, MachineLocation &L) 
-    : Begin(B), End(E), Loc(L), Merged(false) {}
+  bool Constant;
+  int64_t iConstant;
+  DotDebugLocEntry() 
+    : Begin(0), End(0), Variable(0), Merged(false), 
+      Constant(false), iConstant(0) {}
+  DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, MachineLocation &L,
+                   const MDNode *V) 
+    : Begin(B), End(E), Loc(L), Variable(V), Merged(false), 
+      Constant(false), iConstant(0) {}
+  DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, int64_t i)
+    : Begin(B), End(E), Variable(0), Merged(false), 
+      Constant(true), iConstant(i) {}
+
   /// Empty entries are also used as a trigger to emit temp label. Such
   /// labels are referenced is used to find debug_loc offset for a given DIE.
   bool isEmpty() { return Begin == 0 && End == 0; }
@@ -94,8 +91,47 @@ typedef struct DotDebugLocEntry {
     Next->Begin = Begin;
     Merged = true;
   }
+  bool isConstant() { return Constant; }
+  int64_t getConstant() { return iConstant; }
 } DotDebugLocEntry;
 
+//===----------------------------------------------------------------------===//
+/// DbgVariable - This class is used to track local variable information.
+///
+class DbgVariable {
+  DIVariable Var;                    // Variable Descriptor.
+  DIE *TheDIE;                       // Variable DIE.
+  unsigned DotDebugLocOffset;        // Offset in DotDebugLocEntries.
+public:
+  // AbsVar may be NULL.
+  DbgVariable(DIVariable V) : Var(V), TheDIE(0), DotDebugLocOffset(~0U) {}
+
+  // Accessors.
+  DIVariable getVariable()           const { return Var; }
+  void setDIE(DIE *D)                      { TheDIE = D; }
+  DIE *getDIE()                      const { return TheDIE; }
+  void setDotDebugLocOffset(unsigned O)    { DotDebugLocOffset = O; }
+  unsigned getDotDebugLocOffset()    const { return DotDebugLocOffset; }
+  StringRef getName()                const { return Var.getName(); }
+  unsigned getTag()                  const { return Var.getTag(); }
+  bool variableHasComplexAddress()   const {
+    assert(Var.Verify() && "Invalid complex DbgVariable!");
+    return Var.hasComplexAddress();
+  }
+  bool isBlockByrefVariable()        const {
+    assert(Var.Verify() && "Invalid complex DbgVariable!");
+    return Var.isBlockByrefVariable();
+  }
+  unsigned getNumAddrElements()      const { 
+    assert(Var.Verify() && "Invalid complex DbgVariable!");
+    return Var.getNumAddrElements();
+  }
+  uint64_t getAddrElement(unsigned i) const {
+    return Var.getAddrElement(i);
+  }
+  DIType getType() const;
+};
+
 class DwarfDebug {
   /// Asm - Target of Dwarf emission.
   AsmPrinter *Asm;
@@ -122,12 +158,6 @@ class DwarfDebug {
   /// id mapped to a unique id.
   StringMap<unsigned> SourceIdMap;
 
-  /// DIEBlocks - A list of all the DIEBlocks in use.
-  std::vector<DIEBlock *> DIEBlocks;
-
-  // DIEValueAllocator - All DIEValues are allocated through this allocator.
-  BumpPtrAllocator DIEValueAllocator;
-
   /// StringPool - A String->Symbol mapping of strings used by indirect
   /// references.
   StringMap<std::pair<MCSymbol*, unsigned> > StringPool;
@@ -198,8 +228,6 @@ class DwarfDebug {
   /// corresponds to the MDNode mapped with the subprogram DIE.
   DenseMap<DIE *, const MDNode *> ContainingTypeMap;
 
-  typedef SmallVector<DbgScope *, 2> ScopeVector;
-
   /// InlineInfo - Keep track of inlined functions and their location.  This
   /// information is used to populate debug_inlined section.
   typedef std::pair<const MCSymbol *, DIE *> InlineInfoLabels;
@@ -236,6 +264,10 @@ class DwarfDebug {
   DebugLoc PrevInstLoc;
   MCSymbol *PrevLabel;
 
+  /// PrologEndLoc - This location indicates end of function prologue and
+  /// beginning of function body.
+  DebugLoc PrologEndLoc;
+
   struct FunctionDebugFrameInfo {
     unsigned Number;
     std::vector<MachineMove> Moves;
@@ -246,157 +278,23 @@ class DwarfDebug {
 
   std::vector<FunctionDebugFrameInfo> DebugFrames;
 
+  // DIEValueAllocator - All DIEValues are allocated through this allocator.
+  BumpPtrAllocator DIEValueAllocator;
+
   // Section Symbols: these are assembler temporary labels that are emitted at
   // the beginning of each supported dwarf section.  These are used to form
   // section offsets and are created by EmitSectionLabels.
-  MCSymbol *DwarfFrameSectionSym, *DwarfInfoSectionSym, *DwarfAbbrevSectionSym;
+  MCSymbol *DwarfInfoSectionSym, *DwarfAbbrevSectionSym;
   MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym;
   MCSymbol *DwarfDebugLocSectionSym;
   MCSymbol *FunctionBeginSym, *FunctionEndSym;
 
-  DIEInteger *DIEIntegerOne;
-
 private:
 
   /// assignAbbrevNumber - Define a unique number for the abbreviation.
   ///
   void assignAbbrevNumber(DIEAbbrev &Abbrev);
 
-  /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug
-  /// information entry.
-  DIEEntry *createDIEEntry(DIE *Entry);
-
-  /// addUInt - Add an unsigned integer attribute data and value.
-  ///
-  void addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer);
-
-  /// addSInt - Add an signed integer attribute data and value.
-  ///
-  void addSInt(DIE *Die, unsigned Attribute, unsigned Form, int64_t Integer);
-
-  /// addString - Add a string attribute data and value.
-  ///
-  void addString(DIE *Die, unsigned Attribute, unsigned Form,
-                 const StringRef Str);
-
-  /// addLabel - Add a Dwarf label attribute data and value.
-  ///
-  void addLabel(DIE *Die, unsigned Attribute, unsigned Form,
-                const MCSymbol *Label);
-
-  /// addDelta - Add a label delta attribute data and value.
-  ///
-  void addDelta(DIE *Die, unsigned Attribute, unsigned Form,
-                const MCSymbol *Hi, const MCSymbol *Lo);
-
-  /// addDIEEntry - Add a DIE attribute data and value.
-  ///
-  void addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry);
-  
-  /// addBlock - Add block data.
-  ///
-  void addBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block);
-
-  /// addSourceLine - Add location information to specified debug information
-  /// entry.
-  void addSourceLine(DIE *Die, DIVariable V);
-  void addSourceLine(DIE *Die, DIGlobalVariable G);
-  void addSourceLine(DIE *Die, DISubprogram SP);
-  void addSourceLine(DIE *Die, DIType Ty);
-  void addSourceLine(DIE *Die, DINameSpace NS);
-
-  /// addAddress - Add an address attribute to a die based on the location
-  /// provided.
-  void addAddress(DIE *Die, unsigned Attribute,
-                  const MachineLocation &Location);
-
-  /// addRegisterAddress - Add register location entry in variable DIE.
-  bool addRegisterAddress(DIE *Die, const MachineOperand &MO);
-
-  /// addConstantValue - Add constant value entry in variable DIE.
-  bool addConstantValue(DIE *Die, const MachineOperand &MO);
-  bool addConstantValue(DIE *Die, ConstantInt *CI, bool Unsigned);
-
-  /// addConstantFPValue - Add constant value entry in variable DIE.
-  bool addConstantFPValue(DIE *Die, const MachineOperand &MO);
-
-  /// addTemplateParams - Add template parameters in buffer.
-  void addTemplateParams(DIE &Buffer, DIArray TParams);
-
-  /// addComplexAddress - Start with the address based on the location provided,
-  /// and generate the DWARF information necessary to find the actual variable
-  /// (navigating the extra location information encoded in the type) based on
-  /// the starting location.  Add the DWARF information to the die.
-  ///
-  void addComplexAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute,
-                         const MachineLocation &Location);
-
-  // FIXME: Should be reformulated in terms of addComplexAddress.
-  /// addBlockByrefAddress - Start with the address based on the location
-  /// provided, and generate the DWARF information necessary to find the
-  /// actual Block variable (navigating the Block struct) based on the
-  /// starting location.  Add the DWARF information to the die.  Obsolete,
-  /// please use addComplexAddress instead.
-  ///
-  void addBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute,
-                            const MachineLocation &Location);
-
-  /// addVariableAddress - Add DW_AT_location attribute for a DbgVariable based
-  /// on provided frame index.
-  void addVariableAddress(DbgVariable *&DV, DIE *Die, int64_t FI);
-
-  /// addToContextOwner - Add Die into the list of its context owner's children.
-  void addToContextOwner(DIE *Die, DIDescriptor Context);
-
-  /// addType - Add a new type attribute to the specified entity.
-  void addType(DIE *Entity, DIType Ty);
-
- 
-  /// getOrCreateNameSpace - Create a DIE for DINameSpace.
-  DIE *getOrCreateNameSpace(DINameSpace NS);
-
-  /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
-  /// given DIType.
-  DIE *getOrCreateTypeDIE(DIType Ty);
-
-  /// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE 
-  /// for the given DITemplateTypeParameter.
-  DIE *getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP);
-
-  /// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE 
-  /// for the given DITemplateValueParameter.
-  DIE *getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TVP);
-
-  void addPubTypes(DISubprogram SP);
-
-  /// constructTypeDIE - Construct basic type die from DIBasicType.
-  void constructTypeDIE(DIE &Buffer,
-                        DIBasicType BTy);
-
-  /// constructTypeDIE - Construct derived type die from DIDerivedType.
-  void constructTypeDIE(DIE &Buffer,
-                        DIDerivedType DTy);
-
-  /// constructTypeDIE - Construct type DIE from DICompositeType.
-  void constructTypeDIE(DIE &Buffer,
-                        DICompositeType CTy);
-
-  /// constructSubrangeDIE - Construct subrange DIE from DISubrange.
-  void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy);
-
-  /// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
-  void constructArrayTypeDIE(DIE &Buffer, 
-                             DICompositeType *CTy);
-
-  /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
-  DIE *constructEnumTypeDIE(DIEnumerator ETy);
-
-  /// createMemberDIE - Create new member DIE.
-  DIE *createMemberDIE(DIDerivedType DT);
-
-  /// createSubprogramDIE - Create new DIE using SP.
-  DIE *createSubprogramDIE(DISubprogram SP);
-
   /// getOrCreateDbgScope - Create DbgScope for the scope.
   DbgScope *getOrCreateDbgScope(const MDNode *Scope, const MDNode *InlinedAt);
 
@@ -455,14 +353,6 @@ private:
   ///
   void emitEndOfLineMatrix(unsigned SectionEnd);
 
-  /// emitCommonDebugFrame - Emit common frame info into a debug frame section.
-  ///
-  void emitCommonDebugFrame();
-
-  /// emitFunctionDebugFrame - Emit per function frame info into a debug frame
-  /// section.
-  void emitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo);
-
   /// emitDebugPubNames - Emit visible names into a debug pubnames section.
   ///
   void emitDebugPubNames();
@@ -511,11 +401,6 @@ private:
   ///  inlining instance.
   void emitDebugInlineInfo();
 
-  /// GetOrCreateSourceID - Look up the source id with the given directory and
-  /// source file names. If none currently exists, create a new id and insert it
-  /// in the SourceIds map.
-  unsigned GetOrCreateSourceID(StringRef DirName, StringRef FullName);
-
   /// constructCompileUnit - Create new CompileUnit for the given 
   /// metadata node with tag DW_TAG_compile_unit.
   void constructCompileUnit(const MDNode *N);
@@ -532,7 +417,8 @@ private:
   /// recordSourceLine - Register a source line with debug info. Returns the
   /// unique label that was emitted and which provides correspondence to
   /// the source line list.
-  void recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope);
+  void recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope,
+                        unsigned Flags);
   
   /// recordVariableFrameIndex - Record a variable's index.
   void recordVariableFrameIndex(const DbgVariable *V, int Index);
@@ -611,6 +497,14 @@ public:
 
   /// endInstruction - Prcess end of an instruction.
   void endInstruction(const MachineInstr *MI);
+
+  /// GetOrCreateSourceID - Look up the source id with the given directory and
+  /// source file names. If none currently exists, create a new id and insert it
+  /// in the SourceIds map.
+  unsigned GetOrCreateSourceID(StringRef DirName, StringRef FullName);
+
+  /// createSubprogramDIE - Create new DIE using SP.
+  DIE *createSubprogramDIE(DISubprogram SP);
 };
 } // End of namespace llvm
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
index 06b1de6..b5f86ab 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -15,6 +15,7 @@
 #define LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/AsmPrinter.h"
 #include <vector>
 
 namespace llvm {
@@ -140,17 +141,20 @@ public:
 };
 
 class DwarfCFIException : public DwarfException {
-  /// shouldEmitTable - Per-function flag to indicate if EH tables should
-  /// be emitted.
-  bool shouldEmitTable;
+  /// shouldEmitPersonality - Per-function flag to indicate if .cfi_personality
+  /// should be emitted.
+  bool shouldEmitPersonality;
+
+  /// shouldEmitLSDA - Per-function flag to indicate if .cfi_lsda
+  /// should be emitted.
+  bool shouldEmitLSDA;
 
   /// shouldEmitMoves - Per-function flag to indicate if frame moves info
   /// should be emitted.
   bool shouldEmitMoves;
 
-  /// shouldEmitTableModule - Per-module flag to indicate if EH tables
-  /// should be emitted.
-  bool shouldEmitTableModule;
+  AsmPrinter::CFIMoveType moveTypeModule;
+
 public:
   //===--------------------------------------------------------------------===//
   // Main entry points.
@@ -170,7 +174,7 @@ public:
   virtual void EndFunction();
 };
 
-class DwarfTableException : public DwarfException {
+class ARMException : public DwarfException {
   /// shouldEmitTable - Per-function flag to indicate if EH tables should
   /// be emitted.
   bool shouldEmitTable;
@@ -182,48 +186,12 @@ class DwarfTableException : public DwarfException {
   /// shouldEmitTableModule - Per-module flag to indicate if EH tables
   /// should be emitted.
   bool shouldEmitTableModule;
-
-  /// shouldEmitMovesModule - Per-module flag to indicate if frame moves
-  /// should be emitted.
-  bool shouldEmitMovesModule;
-
-  struct FunctionEHFrameInfo {
-    MCSymbol *FunctionEHSym;  // L_foo.eh
-    unsigned Number;
-    unsigned PersonalityIndex;
-    bool adjustsStack;
-    bool hasLandingPads;
-    std::vector<MachineMove> Moves;
-    const Function *function;
-
-    FunctionEHFrameInfo(MCSymbol *EHSym, unsigned Num, unsigned P,
-                        bool hC, bool hL,
-                        const std::vector<MachineMove> &M,
-                        const Function *f):
-      FunctionEHSym(EHSym), Number(Num), PersonalityIndex(P),
-      adjustsStack(hC), hasLandingPads(hL), Moves(M), function (f) { }
-  };
-
-  std::vector<FunctionEHFrameInfo> EHFrames;
-
-  /// UsesLSDA - Indicates whether an FDE that uses the CIE at the given index
-  /// uses an LSDA. If so, then we need to encode that information in the CIE's
-  /// augmentation.
-  DenseMap<unsigned, bool> UsesLSDA;
-
-  /// EmitCIE - Emit a Common Information Entry (CIE). This holds information
-  /// that is shared among many Frame Description Entries.  There is at least
-  /// one CIE in every non-empty .debug_frame section.
-  void EmitCIE(const Function *Personality, unsigned Index);
-
-  /// EmitFDE - Emit the Frame Description Entry (FDE) for the function.
-  void EmitFDE(const FunctionEHFrameInfo &EHFrameInfo);
 public:
   //===--------------------------------------------------------------------===//
   // Main entry points.
   //
-  DwarfTableException(AsmPrinter *A);
-  virtual ~DwarfTableException();
+  ARMException(AsmPrinter *A);
+  virtual ~ARMException();
 
   /// EndModule - Emit all exception information that should come after the
   /// content.
@@ -237,25 +205,25 @@ public:
   virtual void EndFunction();
 };
 
+class Win64Exception : public DwarfException {
+  /// shouldEmitPersonality - Per-function flag to indicate if personality
+  /// info should be emitted.
+  bool shouldEmitPersonality;
 
-class ARMException : public DwarfException {
-  /// shouldEmitTable - Per-function flag to indicate if EH tables should
-  /// be emitted.
-  bool shouldEmitTable;
+  /// shouldEmitLSDA - Per-function flag to indicate if the LSDA
+  /// should be emitted.
+  bool shouldEmitLSDA;
 
   /// shouldEmitMoves - Per-function flag to indicate if frame moves info
   /// should be emitted.
   bool shouldEmitMoves;
 
-  /// shouldEmitTableModule - Per-module flag to indicate if EH tables
-  /// should be emitted.
-  bool shouldEmitTableModule;
 public:
   //===--------------------------------------------------------------------===//
   // Main entry points.
   //
-  ARMException(AsmPrinter *A);
-  virtual ~ARMException();
+  Win64Exception(AsmPrinter *A);
+  virtual ~Win64Exception();
 
   /// EndModule - Emit all exception information that should come after the
   /// content.
diff --git a/lib/CodeGen/AsmPrinter/DwarfTableException.cpp b/lib/CodeGen/AsmPrinter/DwarfTableException.cpp
deleted file mode 100644
index 7519011..0000000
--- a/lib/CodeGen/AsmPrinter/DwarfTableException.cpp
+++ /dev/null
@@ -1,349 +0,0 @@
-//===-- CodeGen/AsmPrinter/DwarfTableException.cpp - Dwarf Exception Impl --==//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains support for writing DWARF exception info into asm files.
-// The implementation emits all the necessary tables "by hands".
-//
-//===----------------------------------------------------------------------===//
-
-#include "DwarfException.h"
-#include "llvm/Module.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineLocation.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSection.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/Dwarf.h"
-#include "llvm/Support/FormattedStream.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Twine.h"
-using namespace llvm;
-
-DwarfTableException::DwarfTableException(AsmPrinter *A)
-  :  DwarfException(A),
-     shouldEmitTable(false), shouldEmitMoves(false),
-     shouldEmitTableModule(false), shouldEmitMovesModule(false) {}
-
-DwarfTableException::~DwarfTableException() {}
-
-/// EmitCIE - Emit a Common Information Entry (CIE). This holds information that
-/// is shared among many Frame Description Entries.  There is at least one CIE
-/// in every non-empty .debug_frame section.
-void DwarfTableException::EmitCIE(const Function *PersonalityFn, unsigned Index) {
-  // Size and sign of stack growth.
-  int stackGrowth = Asm->getTargetData().getPointerSize();
-  if (Asm->TM.getFrameLowering()->getStackGrowthDirection() ==
-      TargetFrameLowering::StackGrowsDown)
-    stackGrowth *= -1;
-
-  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
-
-  // Begin eh frame section.
-  Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection());
-
-  MCSymbol *EHFrameSym;
-  if (TLOF.isFunctionEHFrameSymbolPrivate())
-    EHFrameSym = Asm->GetTempSymbol("EH_frame", Index);
-  else
-    EHFrameSym = Asm->OutContext.GetOrCreateSymbol(Twine("EH_frame") +
-                                                   Twine(Index));
-  Asm->OutStreamer.EmitLabel(EHFrameSym);
-
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_eh_frame", Index));
-
-  // Define base labels.
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common", Index));
-
-  // Define the eh frame length.
-  Asm->OutStreamer.AddComment("Length of Common Information Entry");
-  Asm->EmitLabelDifference(Asm->GetTempSymbol("eh_frame_common_end", Index),
-                           Asm->GetTempSymbol("eh_frame_common_begin", Index),
-                           4);
-
-  // EH frame header.
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common_begin",Index));
-  Asm->OutStreamer.AddComment("CIE Identifier Tag");
-  Asm->OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
-  Asm->OutStreamer.AddComment("DW_CIE_VERSION");
-  Asm->OutStreamer.EmitIntValue(dwarf::DW_CIE_VERSION, 1/*size*/, 0/*addr*/);
-
-  // The personality presence indicates that language specific information will
-  // show up in the eh frame.  Find out how we are supposed to lower the
-  // personality function reference:
-
-  unsigned LSDAEncoding = TLOF.getLSDAEncoding();
-  unsigned FDEEncoding = TLOF.getFDEEncoding();
-  unsigned PerEncoding = TLOF.getPersonalityEncoding();
-
-  char Augmentation[6] = { 0 };
-  unsigned AugmentationSize = 0;
-  char *APtr = Augmentation + 1;
-
-  if (PersonalityFn) {
-    // There is a personality function.
-    *APtr++ = 'P';
-    AugmentationSize += 1 + Asm->GetSizeOfEncodedValue(PerEncoding);
-  }
-
-  if (UsesLSDA[Index]) {
-    // An LSDA pointer is in the FDE augmentation.
-    *APtr++ = 'L';
-    ++AugmentationSize;
-  }
-
-  if (FDEEncoding != dwarf::DW_EH_PE_absptr) {
-    // A non-default pointer encoding for the FDE.
-    *APtr++ = 'R';
-    ++AugmentationSize;
-  }
-
-  if (APtr != Augmentation + 1)
-    Augmentation[0] = 'z';
-
-  Asm->OutStreamer.AddComment("CIE Augmentation");
-  Asm->OutStreamer.EmitBytes(StringRef(Augmentation, strlen(Augmentation)+1),0);
-
-  // Round out reader.
-  Asm->EmitULEB128(1, "CIE Code Alignment Factor");
-  Asm->EmitSLEB128(stackGrowth, "CIE Data Alignment Factor");
-  Asm->OutStreamer.AddComment("CIE Return Address Column");
-
-  const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
-  const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
-  Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister(), true));
-
-  if (Augmentation[0]) {
-    Asm->EmitULEB128(AugmentationSize, "Augmentation Size");
-
-    // If there is a personality, we need to indicate the function's location.
-    if (PersonalityFn) {
-      Asm->EmitEncodingByte(PerEncoding, "Personality");
-      Asm->OutStreamer.AddComment("Personality");
-      Asm->EmitReference(PersonalityFn, PerEncoding);
-    }
-    if (UsesLSDA[Index])
-      Asm->EmitEncodingByte(LSDAEncoding, "LSDA");
-    if (FDEEncoding != dwarf::DW_EH_PE_absptr)
-      Asm->EmitEncodingByte(FDEEncoding, "FDE");
-  }
-
-  // Indicate locations of general callee saved registers in frame.
-  std::vector<MachineMove> Moves;
-  TFI->getInitialFrameState(Moves);
-  Asm->EmitFrameMoves(Moves, 0, true);
-
-  // On Darwin the linker honors the alignment of eh_frame, which means it must
-  // be 8-byte on 64-bit targets to match what gcc does.  Otherwise you get
-  // holes which confuse readers of eh_frame.
-  Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3);
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common_end", Index));
-}
-
-/// EmitFDE - Emit the Frame Description Entry (FDE) for the function.
-void DwarfTableException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) {
-  assert(!EHFrameInfo.function->hasAvailableExternallyLinkage() &&
-         "Should not emit 'available externally' functions at all");
-
-  const Function *TheFunc = EHFrameInfo.function;
-  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
-
-  unsigned LSDAEncoding = TLOF.getLSDAEncoding();
-  unsigned FDEEncoding = TLOF.getFDEEncoding();
-
-  Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection());
-
-  // Externally visible entry into the functions eh frame info. If the
-  // corresponding function is static, this should not be externally visible.
-  if (!TheFunc->hasLocalLinkage() && TLOF.isFunctionEHSymbolGlobal())
-    Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,MCSA_Global);
-
-  // If corresponding function is weak definition, this should be too.
-  if (TheFunc->isWeakForLinker() && Asm->MAI->getWeakDefDirective())
-    Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
-                                         MCSA_WeakDefinition);
-
-  // If corresponding function is hidden, this should be too.
-  if (TheFunc->hasHiddenVisibility())
-    if (MCSymbolAttr HiddenAttr = Asm->MAI->getHiddenVisibilityAttr())
-      Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
-                                           HiddenAttr);
-
-  // If there are no calls then you can't unwind.  This may mean we can omit the
-  // EH Frame, but some environments do not handle weak absolute symbols. If
-  // UnwindTablesMandatory is set we cannot do this optimization; the unwind
-  // info is to be available for non-EH uses.
-  if (!EHFrameInfo.adjustsStack && !UnwindTablesMandatory &&
-      (!TheFunc->isWeakForLinker() ||
-       !Asm->MAI->getWeakDefDirective() ||
-       TLOF.getSupportsWeakOmittedEHFrame())) {
-    Asm->OutStreamer.EmitAssignment(EHFrameInfo.FunctionEHSym,
-                                    MCConstantExpr::Create(0, Asm->OutContext));
-    // This name has no connection to the function, so it might get
-    // dead-stripped when the function is not, erroneously.  Prohibit
-    // dead-stripping unconditionally.
-    if (Asm->MAI->hasNoDeadStrip())
-      Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
-                                           MCSA_NoDeadStrip);
-  } else {
-    Asm->OutStreamer.EmitLabel(EHFrameInfo.FunctionEHSym);
-
-    // EH frame header.
-    Asm->OutStreamer.AddComment("Length of Frame Information Entry");
-    Asm->EmitLabelDifference(
-                Asm->GetTempSymbol("eh_frame_end", EHFrameInfo.Number),
-                Asm->GetTempSymbol("eh_frame_begin", EHFrameInfo.Number), 4);
-
-    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_begin",
-                                                  EHFrameInfo.Number));
-
-    Asm->OutStreamer.AddComment("FDE CIE offset");
-    Asm->EmitLabelDifference(
-                       Asm->GetTempSymbol("eh_frame_begin", EHFrameInfo.Number),
-                       Asm->GetTempSymbol("eh_frame_common",
-                                          EHFrameInfo.PersonalityIndex), 4);
-
-    MCSymbol *EHFuncBeginSym =
-      Asm->GetTempSymbol("eh_func_begin", EHFrameInfo.Number);
-
-    Asm->OutStreamer.AddComment("FDE initial location");
-    Asm->EmitReference(EHFuncBeginSym, FDEEncoding);
-
-    Asm->OutStreamer.AddComment("FDE address range");
-    Asm->EmitLabelDifference(Asm->GetTempSymbol("eh_func_end",
-                                                EHFrameInfo.Number),
-                             EHFuncBeginSym,
-                             Asm->GetSizeOfEncodedValue(FDEEncoding));
-
-    // If there is a personality and landing pads then point to the language
-    // specific data area in the exception table.
-    if (MMI->getPersonalities()[0] != NULL) {
-      unsigned Size = Asm->GetSizeOfEncodedValue(LSDAEncoding);
-
-      Asm->EmitULEB128(Size, "Augmentation size");
-      Asm->OutStreamer.AddComment("Language Specific Data Area");
-      if (EHFrameInfo.hasLandingPads)
-        Asm->EmitReference(Asm->GetTempSymbol("exception", EHFrameInfo.Number),
-                           LSDAEncoding);
-      else
-        Asm->OutStreamer.EmitIntValue(0, Size/*size*/, 0/*addrspace*/);
-
-    } else {
-      Asm->EmitULEB128(0, "Augmentation size");
-    }
-
-    // Indicate locations of function specific callee saved registers in frame.
-    Asm->EmitFrameMoves(EHFrameInfo.Moves, EHFuncBeginSym, true);
-
-    // On Darwin the linker honors the alignment of eh_frame, which means it
-    // must be 8-byte on 64-bit targets to match what gcc does.  Otherwise you
-    // get holes which confuse readers of eh_frame.
-    Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3);
-    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_end",
-                                                  EHFrameInfo.Number));
-
-    // If the function is marked used, this table should be also.  We cannot
-    // make the mark unconditional in this case, since retaining the table also
-    // retains the function in this case, and there is code around that depends
-    // on unused functions (calling undefined externals) being dead-stripped to
-    // link correctly.  Yes, there really is.
-    if (MMI->isUsedFunction(EHFrameInfo.function))
-      if (Asm->MAI->hasNoDeadStrip())
-        Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
-                                             MCSA_NoDeadStrip);
-  }
-  Asm->OutStreamer.AddBlankLine();
-}
-
-/// EndModule - Emit all exception information that should come after the
-/// content.
-void DwarfTableException::EndModule() {
-  if (!Asm->MAI->isExceptionHandlingDwarf())
-    return;
-
-  if (!shouldEmitMovesModule && !shouldEmitTableModule)
-    return;
-
-  const std::vector<const Function*> &Personalities = MMI->getPersonalities();
-
-  for (unsigned I = 0, E = Personalities.size(); I < E; ++I)
-    EmitCIE(Personalities[I], I);
-
-  for (std::vector<FunctionEHFrameInfo>::iterator
-         I = EHFrames.begin(), E = EHFrames.end(); I != E; ++I)
-    EmitFDE(*I);
-}
-
-/// BeginFunction - Gather pre-function exception information. Assumes it's
-/// being emitted immediately after the function entry point.
-void DwarfTableException::BeginFunction(const MachineFunction *MF) {
-  shouldEmitTable = shouldEmitMoves = false;
-
-  // If any landing pads survive, we need an EH table.
-  shouldEmitTable = !MMI->getLandingPads().empty();
-
-  // See if we need frame move info.
-  shouldEmitMoves =
-    !Asm->MF->getFunction()->doesNotThrow() || UnwindTablesMandatory;
-
-  if (shouldEmitMoves || shouldEmitTable)
-    // Assumes in correct section after the entry point.
-    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
-                                                  Asm->getFunctionNumber()));
-
-  shouldEmitTableModule |= shouldEmitTable;
-  shouldEmitMovesModule |= shouldEmitMoves;
-}
-
-/// EndFunction - Gather and emit post-function exception information.
-///
-void DwarfTableException::EndFunction() {
-  if (!shouldEmitMoves && !shouldEmitTable) return;
-
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
-                                                Asm->getFunctionNumber()));
-
-  // Record if this personality index uses a landing pad.
-  bool HasLandingPad = !MMI->getLandingPads().empty();
-  UsesLSDA[MMI->getPersonalityIndex()] |= HasLandingPad;
-
-  // Map all labels and get rid of any dead landing pads.
-  MMI->TidyLandingPads();
-
-  if (HasLandingPad)
-    EmitExceptionTable();
-
-  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
-  MCSymbol *FunctionEHSym =
-    Asm->GetSymbolWithGlobalValueBase(Asm->MF->getFunction(), ".eh",
-                                      TLOF.isFunctionEHFrameSymbolPrivate());
-
-  // Save EH frame information
-  EHFrames.
-    push_back(FunctionEHFrameInfo(FunctionEHSym,
-                                  Asm->getFunctionNumber(),
-                                  MMI->getPersonalityIndex(),
-                                  Asm->MF->getFrameInfo()->adjustsStack(),
-                                  !MMI->getLandingPads().empty(),
-                                  MMI->getFrameMoves(),
-                                  Asm->MF->getFunction()));
-}
diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/lib/CodeGen/AsmPrinter/Win64Exception.cpp
new file mode 100644
index 0000000..c2ad5eb
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/Win64Exception.cpp
@@ -0,0 +1,116 @@
+//===-- CodeGen/AsmPrinter/Win64Exception.cpp - Dwarf Exception Impl ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing Win64 exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfException.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+using namespace llvm;
+
+Win64Exception::Win64Exception(AsmPrinter *A)
+  : DwarfException(A),
+    shouldEmitPersonality(false), shouldEmitLSDA(false), shouldEmitMoves(false)
+    {}
+
+Win64Exception::~Win64Exception() {}
+
+/// EndModule - Emit all exception information that should come after the
+/// content.
+void Win64Exception::EndModule() {
+}
+
+/// BeginFunction - Gather pre-function exception information. Assumes it's
+/// being emitted immediately after the function entry point.
+void Win64Exception::BeginFunction(const MachineFunction *MF) {
+  shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false;
+
+  // If any landing pads survive, we need an EH table.
+  bool hasLandingPads = !MMI->getLandingPads().empty();
+
+  shouldEmitMoves = Asm->needsSEHMoves();
+
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+  unsigned PerEncoding = TLOF.getPersonalityEncoding();
+  const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()];
+
+  shouldEmitPersonality = hasLandingPads &&
+    PerEncoding != dwarf::DW_EH_PE_omit && Per;
+
+  unsigned LSDAEncoding = TLOF.getLSDAEncoding();
+  shouldEmitLSDA = shouldEmitPersonality &&
+    LSDAEncoding != dwarf::DW_EH_PE_omit;
+
+  if (!shouldEmitPersonality && !shouldEmitMoves)
+    return;
+
+  Asm->OutStreamer.EmitWin64EHStartProc(Asm->CurrentFnSym);
+
+  if (!shouldEmitPersonality)
+    return;
+
+  MCSymbol *GCCHandlerSym =
+    Asm->GetExternalSymbolSymbol("_GCC_specific_handler");
+  Asm->OutStreamer.EmitWin64EHHandler(GCCHandlerSym, true, true);
+
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
+                                                Asm->getFunctionNumber()));
+}
+
+/// EndFunction - Gather and emit post-function exception information.
+///
+void Win64Exception::EndFunction() {
+  if (!shouldEmitPersonality && !shouldEmitMoves)
+    return;
+
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
+                                                Asm->getFunctionNumber()));
+
+  // Map all labels and get rid of any dead landing pads.
+  MMI->TidyLandingPads();
+
+  if (shouldEmitPersonality) {
+    const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+    const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()];
+    const MCSymbol *Sym = TLOF.getCFIPersonalitySymbol(Per, Asm->Mang, MMI);
+
+    Asm->OutStreamer.PushSection();
+    Asm->OutStreamer.EmitWin64EHHandlerData();
+    Asm->OutStreamer.EmitValue(MCSymbolRefExpr::Create(Sym, Asm->OutContext),
+                               4);
+    EmitExceptionTable();
+    Asm->OutStreamer.PopSection();
+  }
+  Asm->OutStreamer.EmitWin64EHEndProc();
+}
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 78a8743..d95f77e 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -41,6 +41,7 @@ using namespace llvm;
 STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
 STATISTIC(NumBranchOpts, "Number of branches optimized");
 STATISTIC(NumTailMerge , "Number of block tails merged");
+STATISTIC(NumHoist     , "Number of times common instructions are hoisted");
 
 static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge",
                               cl::init(cl::BOU_UNSET), cl::Hidden);
@@ -65,7 +66,7 @@ namespace {
   public:
     static char ID;
     explicit BranchFolderPass(bool defaultEnableTailMerge)
-      : MachineFunctionPass(ID), BranchFolder(defaultEnableTailMerge) {}
+      : MachineFunctionPass(ID), BranchFolder(defaultEnableTailMerge, true) {}
 
     virtual bool runOnMachineFunction(MachineFunction &MF);
     virtual const char *getPassName() const { return "Control Flow Optimizer"; }
@@ -86,12 +87,14 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
 }
 
 
-BranchFolder::BranchFolder(bool defaultEnableTailMerge) {
+BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist) {
   switch (FlagEnableTailMerge) {
   case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break;
   case cl::BOU_TRUE: EnableTailMerge = true; break;
   case cl::BOU_FALSE: EnableTailMerge = false; break;
   }
+
+  EnableHoistCommonCode = CommonHoist;
 }
 
 /// RemoveDeadBlock - Remove the specified dead machine basic block from the
@@ -105,6 +108,9 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
   while (!MBB->succ_empty())
     MBB->removeSuccessor(MBB->succ_end()-1);
 
+  // Avoid matching if this pointer gets reused.
+  TriedMerging.erase(MBB);
+
   // Remove the block.
   MF->erase(MBB);
 }
@@ -168,6 +174,8 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
                                     MachineModuleInfo *mmi) {
   if (!tii) return false;
 
+  TriedMerging.clear();
+
   TII = tii;
   TRI = tri;
   MMI = mmi;
@@ -186,9 +194,10 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
 
   bool MadeChangeThisIteration = true;
   while (MadeChangeThisIteration) {
-    MadeChangeThisIteration = false;
-    MadeChangeThisIteration |= TailMergeBlocks(MF);
-    MadeChangeThisIteration |= OptimizeBranches(MF);
+    MadeChangeThisIteration    = TailMergeBlocks(MF);
+    MadeChangeThisIteration   |= OptimizeBranches(MF);
+    if (EnableHoistCommonCode)
+      MadeChangeThisIteration |= HoistCommonCode(MF);
     MadeChange |= MadeChangeThisIteration;
   }
 
@@ -795,14 +804,21 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
 
   // First find blocks with no successors.
   MergePotentials.clear();
-  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+       I != E && MergePotentials.size() < TailMergeThreshold; ++I) {
+    if (TriedMerging.count(I))
+      continue;
     if (I->succ_empty())
       MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(I), I));
   }
 
+  // If this is a large problem, avoid visiting the same basic blocks
+  // multiple times.
+  if (MergePotentials.size() == TailMergeThreshold)
+    for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
+      TriedMerging.insert(MergePotentials[i].getBlock());
   // See if we can do any tail merging on those.
-  if (MergePotentials.size() < TailMergeThreshold &&
-      MergePotentials.size() >= 2)
+  if (MergePotentials.size() >= 2)
     MadeChange |= TryTailMergeBlocks(NULL, NULL);
 
   // Look at blocks (IBB) with multiple predecessors (PBB).
@@ -826,15 +842,17 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
 
   for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
        I != E; ++I) {
-    if (I->pred_size() >= 2 && I->pred_size() < TailMergeThreshold) {
+    if (I->pred_size() >= 2) {
       SmallPtrSet<MachineBasicBlock *, 8> UniquePreds;
       MachineBasicBlock *IBB = I;
       MachineBasicBlock *PredBB = prior(I);
       MergePotentials.clear();
       for (MachineBasicBlock::pred_iterator P = I->pred_begin(),
                                             E2 = I->pred_end();
-           P != E2; ++P) {
+           P != E2 && MergePotentials.size() < TailMergeThreshold; ++P) {
         MachineBasicBlock *PBB = *P;
+        if (TriedMerging.count(PBB))
+          continue;
         // Skip blocks that loop to themselves, can't tail merge these.
         if (PBB == IBB)
           continue;
@@ -887,6 +905,11 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
           MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P));
         }
       }
+      // If this is a large problem, avoid visiting the same basic blocks
+      // multiple times.
+      if (MergePotentials.size() == TailMergeThreshold)
+        for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
+          TriedMerging.insert(MergePotentials[i].getBlock());
       if (MergePotentials.size() >= 2)
         MadeChange |= TryTailMergeBlocks(IBB, PredBB);
       // Reinsert an unconditional branch if needed.
@@ -910,7 +933,8 @@ bool BranchFolder::OptimizeBranches(MachineFunction &MF) {
   // Make sure blocks are numbered in order
   MF.RenumberBlocks();
 
-  for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) {
+  for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
+       I != E; ) {
     MachineBasicBlock *MBB = I++;
     MadeChange |= OptimizeBlock(MBB);
 
@@ -1048,9 +1072,25 @@ ReoptimizeBlock:
     // AnalyzeBranch.
     if (PriorCond.empty() && !PriorTBB && MBB->pred_size() == 1 &&
         PrevBB.succ_size() == 1 &&
-        !MBB->hasAddressTaken()) {
+        !MBB->hasAddressTaken() && !MBB->isLandingPad()) {
       DEBUG(dbgs() << "\nMerging into block: " << PrevBB
                    << "From MBB: " << *MBB);
+      // Remove redundant DBG_VALUEs first.
+      if (PrevBB.begin() != PrevBB.end()) {
+        MachineBasicBlock::iterator PrevBBIter = PrevBB.end();
+        --PrevBBIter;
+        MachineBasicBlock::iterator MBBIter = MBB->begin();
+        // Check if DBG_VALUE at the end of PrevBB is identical to the 
+        // DBG_VALUE at the beginning of MBB.
+        while (PrevBBIter != PrevBB.begin() && MBBIter != MBB->end()
+               && PrevBBIter->isDebugValue() && MBBIter->isDebugValue()) {
+          if (!MBBIter->isIdenticalTo(PrevBBIter))
+            break;
+          MachineInstr *DuplicateDbg = MBBIter;
+          ++MBBIter; -- PrevBBIter;
+          DuplicateDbg->eraseFromParent();
+        }
+      }
       PrevBB.splice(PrevBB.end(), MBB, MBB->begin(), MBB->end());
       PrevBB.removeSuccessor(PrevBB.succ_begin());;
       assert(PrevBB.succ_empty());
@@ -1339,3 +1379,282 @@ ReoptimizeBlock:
 
   return MadeChange;
 }
+
+//===----------------------------------------------------------------------===//
+//  Hoist Common Code
+//===----------------------------------------------------------------------===//
+
+/// HoistCommonCode - Hoist common instruction sequences at the start of basic
+/// blocks to their common predecessor.
+bool BranchFolder::HoistCommonCode(MachineFunction &MF) {
+  bool MadeChange = false;
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ) {
+    MachineBasicBlock *MBB = I++;
+    MadeChange |= HoistCommonCodeInSuccs(MBB);
+  }
+
+  return MadeChange;
+}
+
+/// findFalseBlock - BB has a fallthrough. Find its 'false' successor given
+/// its 'true' successor.
+static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB,
+                                         MachineBasicBlock *TrueBB) {
+  for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+         E = BB->succ_end(); SI != E; ++SI) {
+    MachineBasicBlock *SuccBB = *SI;
+    if (SuccBB != TrueBB)
+      return SuccBB;
+  }
+  return NULL;
+}
+
+/// findHoistingInsertPosAndDeps - Find the location to move common instructions
+/// in successors to. The location is ususally just before the terminator,
+/// however if the terminator is a conditional branch and its previous
+/// instruction is the flag setting instruction, the previous instruction is
+/// the preferred location. This function also gathers uses and defs of the
+/// instructions from the insertion point to the end of the block. The data is
+/// used by HoistCommonCodeInSuccs to ensure safety.
+static
+MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
+                                                  const TargetInstrInfo *TII,
+                                                  const TargetRegisterInfo *TRI,
+                                                  SmallSet<unsigned,4> &Uses,
+                                                  SmallSet<unsigned,4> &Defs) {
+  MachineBasicBlock::iterator Loc = MBB->getFirstTerminator();
+  if (!TII->isUnpredicatedTerminator(Loc))
+    return MBB->end();
+
+  for (unsigned i = 0, e = Loc->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = Loc->getOperand(i);
+    if (!MO.isReg())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!Reg)
+      continue;
+    if (MO.isUse()) {
+      Uses.insert(Reg);
+      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+        Uses.insert(*AS);
+    } else if (!MO.isDead())
+      // Don't try to hoist code in the rare case the terminator defines a
+      // register that is later used.
+      return MBB->end();
+  }
+
+  if (Uses.empty())
+    return Loc;
+  if (Loc == MBB->begin())
+    return MBB->end();
+
+  // The terminator is probably a conditional branch, try not to separate the
+  // branch from condition setting instruction.
+  MachineBasicBlock::iterator PI = Loc;
+  --PI;
+  while (PI != MBB->begin() && Loc->isDebugValue())
+    --PI;
+
+  bool IsDef = false;
+  for (unsigned i = 0, e = PI->getNumOperands(); !IsDef && i != e; ++i) {
+    const MachineOperand &MO = PI->getOperand(i);
+    if (!MO.isReg() || MO.isUse())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!Reg)
+      continue;
+    if (Uses.count(Reg))
+      IsDef = true;
+  }
+  if (!IsDef)
+    // The condition setting instruction is not just before the conditional
+    // branch.
+    return Loc;
+
+  // Be conservative, don't insert instruction above something that may have
+  // side-effects. And since it's potentially bad to separate flag setting
+  // instruction from the conditional branch, just abort the optimization
+  // completely.
+  // Also avoid moving code above predicated instruction since it's hard to
+  // reason about register liveness with predicated instruction.
+  bool DontMoveAcrossStore = true;
+  if (!PI->isSafeToMove(TII, 0, DontMoveAcrossStore) ||
+      TII->isPredicated(PI))
+    return MBB->end();
+
+
+  // Find out what registers are live. Note this routine is ignoring other live
+  // registers which are only used by instructions in successor blocks.
+  for (unsigned i = 0, e = PI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = PI->getOperand(i);
+    if (!MO.isReg())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!Reg)
+      continue;
+    if (MO.isUse()) {
+      Uses.insert(Reg);
+      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+        Uses.insert(*AS);
+    } else {
+      if (Uses.count(Reg)) {
+        Uses.erase(Reg);
+        for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
+          Uses.erase(*SR); // Use getSubRegisters to be conservative
+      }
+      Defs.insert(Reg);
+      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+        Defs.insert(*AS);
+    }
+  }
+
+  return PI;
+}
+
+/// HoistCommonCodeInSuccs - If the successors of MBB has common instruction
+/// sequence at the start of the function, move the instructions before MBB
+/// terminator if it's legal.
+bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
+  MachineBasicBlock *TBB = 0, *FBB = 0;
+  SmallVector<MachineOperand, 4> Cond;
+  if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true) || !TBB || Cond.empty())
+    return false;
+
+  if (!FBB) FBB = findFalseBlock(MBB, TBB);
+  if (!FBB)
+    // Malformed bcc? True and false blocks are the same?
+    return false;
+
+  // Restrict the optimization to cases where MBB is the only predecessor,
+  // it is an obvious win.
+  if (TBB->pred_size() > 1 || FBB->pred_size() > 1)
+    return false;
+
+  // Find a suitable position to hoist the common instructions to. Also figure
+  // out which registers are used or defined by instructions from the insertion
+  // point to the end of the block.
+  SmallSet<unsigned, 4> Uses, Defs;
+  MachineBasicBlock::iterator Loc =
+    findHoistingInsertPosAndDeps(MBB, TII, TRI, Uses, Defs);
+  if (Loc == MBB->end())
+    return false;
+
+  bool HasDups = false;
+  SmallVector<unsigned, 4> LocalDefs;
+  SmallSet<unsigned, 4> LocalDefsSet;
+  MachineBasicBlock::iterator TIB = TBB->begin();
+  MachineBasicBlock::iterator FIB = FBB->begin();
+  MachineBasicBlock::iterator TIE = TBB->end();
+  MachineBasicBlock::iterator FIE = FBB->end();
+  while (TIB != TIE && FIB != FIE) {
+    // Skip dbg_value instructions. These do not count.
+    if (TIB->isDebugValue()) {
+      while (TIB != TIE && TIB->isDebugValue())
+        ++TIB;
+      if (TIB == TIE)
+        break;
+    }
+    if (FIB->isDebugValue()) {
+      while (FIB != FIE && FIB->isDebugValue())
+        ++FIB;
+      if (FIB == FIE)
+        break;
+    }
+    if (!TIB->isIdenticalTo(FIB, MachineInstr::CheckKillDead))
+      break;
+
+    if (TII->isPredicated(TIB))
+      // Hard to reason about register liveness with predicated instruction.
+      break;
+
+    bool IsSafe = true;
+    for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = TIB->getOperand(i);
+      if (!MO.isReg())
+        continue;
+      unsigned Reg = MO.getReg();
+      if (!Reg)
+        continue;
+      if (MO.isDef()) {
+        if (Uses.count(Reg)) {
+          // Avoid clobbering a register that's used by the instruction at
+          // the point of insertion.
+          IsSafe = false;
+          break;
+        }
+
+        if (Defs.count(Reg) && !MO.isDead()) {
+          // Don't hoist the instruction if the def would be clobber by the
+          // instruction at the point insertion. FIXME: This is overly
+          // conservative. It should be possible to hoist the instructions
+          // in BB2 in the following example:
+          // BB1:
+          // r1, eflag = op1 r2, r3
+          // brcc eflag
+          //
+          // BB2:
+          // r1 = op2, ...
+          //    = op3, r1<kill>
+          IsSafe = false;
+          break;
+        }
+      } else if (!LocalDefsSet.count(Reg)) {
+        if (Defs.count(Reg)) {
+          // Use is defined by the instruction at the point of insertion.
+          IsSafe = false;
+          break;
+        }
+      }
+    }
+    if (!IsSafe)
+      break;
+
+    bool DontMoveAcrossStore = true;
+    if (!TIB->isSafeToMove(TII, 0, DontMoveAcrossStore))
+      break;
+
+    // Track local defs so we can update liveins.
+    for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = TIB->getOperand(i);
+      if (!MO.isReg())
+        continue;
+      unsigned Reg = MO.getReg();
+      if (!Reg)
+        continue;
+      if (MO.isDef()) {
+        if (!MO.isDead()) {
+          LocalDefs.push_back(Reg);
+          LocalDefsSet.insert(Reg);
+          for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
+            LocalDefsSet.insert(*SR);
+        }
+      } else if (MO.isKill() && LocalDefsSet.count(Reg)) {
+        LocalDefsSet.erase(Reg);
+        for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
+          LocalDefsSet.erase(*SR);
+      }
+    }
+
+    HasDups = true;;
+    ++TIB;
+    ++FIB;
+  }
+
+  if (!HasDups)
+    return false;
+
+  MBB->splice(Loc, TBB, TBB->begin(), TIB);
+  FBB->erase(FBB->begin(), FIB);
+
+  // Update livein's.
+  for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
+    unsigned Def = LocalDefs[i];
+    if (LocalDefsSet.count(Def)) {
+      TBB->addLiveIn(Def);
+      FBB->addLiveIn(Def);
+    }
+  }
+
+  ++NumHoist;
+  return true;
+}
diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h
index 15dfa7f..4ed42c0 100644
--- a/lib/CodeGen/BranchFolding.h
+++ b/lib/CodeGen/BranchFolding.h
@@ -10,6 +10,7 @@
 #ifndef LLVM_CODEGEN_BRANCHFOLDING_HPP
 #define LLVM_CODEGEN_BRANCHFOLDING_HPP
 
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include <vector>
 
@@ -19,11 +20,10 @@ namespace llvm {
   class RegScavenger;
   class TargetInstrInfo;
   class TargetRegisterInfo;
-  template<typename T> class SmallVectorImpl;
 
   class BranchFolder {
   public:
-    explicit BranchFolder(bool defaultEnableTailMerge);
+    explicit BranchFolder(bool defaultEnableTailMerge, bool CommonHoist);
 
     bool OptimizeFunction(MachineFunction &MF,
                           const TargetInstrInfo *tii,
@@ -48,6 +48,7 @@ namespace llvm {
     };
     typedef std::vector<MergePotentialsElt>::iterator MPIterator;
     std::vector<MergePotentialsElt> MergePotentials;
+    SmallPtrSet<const MachineBasicBlock*, 2> TriedMerging;
 
     class SameTailElt {
       MPIterator MPIter;
@@ -85,6 +86,7 @@ namespace llvm {
     std::vector<SameTailElt> SameTails;
 
     bool EnableTailMerge;
+    bool EnableHoistCommonCode;
     const TargetInstrInfo *TII;
     const TargetRegisterInfo *TRI;
     MachineModuleInfo *MMI;
@@ -110,6 +112,9 @@ namespace llvm {
     bool OptimizeBlock(MachineBasicBlock *MBB);
     void RemoveDeadBlock(MachineBasicBlock *MBB);
     bool OptimizeImpDefsBlock(MachineBasicBlock *MBB);
+
+    bool HoistCommonCode(MachineFunction &MF);
+    bool HoistCommonCodeInSuccs(MachineBasicBlock *MBB);
   };
 }
 
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 2ca3859..aef4ff2 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -33,6 +33,7 @@ add_llvm_library(LLVMCodeGen
   LocalStackSlotAllocation.cpp
   LowerSubregs.cpp
   MachineBasicBlock.cpp
+  MachineBranchProbabilityInfo.cpp
   MachineCSE.cpp
   MachineDominators.cpp
   MachineFunction.cpp
@@ -67,6 +68,7 @@ add_llvm_library(LLVMCodeGen
   RegAllocGreedy.cpp
   RegAllocLinearScan.cpp
   RegAllocPBQP.cpp
+  RegisterClassInfo.cpp
   RegisterCoalescer.cpp
   RegisterScavenging.cpp
   RenderMachineFunction.cpp
diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp
index 86ab2b6..5d722ee 100644
--- a/lib/CodeGen/CalcSpillWeights.cpp
+++ b/lib/CodeGen/CalcSpillWeights.cpp
@@ -87,8 +87,8 @@ static unsigned copyHint(const MachineInstr *mi, unsigned reg,
 }
 
 void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
-  MachineRegisterInfo &mri = mf_.getRegInfo();
-  const TargetRegisterInfo &tri = *mf_.getTarget().getRegisterInfo();
+  MachineRegisterInfo &mri = MF.getRegInfo();
+  const TargetRegisterInfo &tri = *MF.getTarget().getRegisterInfo();
   MachineBasicBlock *mbb = 0;
   MachineLoop *loop = 0;
   unsigned loopDepth = 0;
@@ -118,7 +118,7 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
       // Get loop info for mi.
       if (mi->getParent() != mbb) {
         mbb = mi->getParent();
-        loop = loops_.getLoopFor(mbb);
+        loop = Loops.getLoopFor(mbb);
         loopDepth = loop ? loop->getLoopDepth() : 0;
         isExiting = loop ? loop->isLoopExiting(mbb) : false;
       }
@@ -129,7 +129,7 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
       weight = LiveIntervals::getSpillWeight(writes, reads, loopDepth);
 
       // Give extra weight to what looks like a loop induction variable update.
-      if (writes && isExiting && lis_.isLiveOutOfMBB(li, mbb))
+      if (writes && isExiting && LIS.isLiveOutOfMBB(li, mbb))
         weight *= 3;
 
       totalWeight += weight;
@@ -141,9 +141,9 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
     unsigned hint = copyHint(mi, li.reg, tri, mri);
     if (!hint)
       continue;
-    float hweight = hint_[hint] += weight;
+    float hweight = Hint[hint] += weight;
     if (TargetRegisterInfo::isPhysicalRegister(hint)) {
-      if (hweight > bestPhys && lis_.isAllocatable(hint))
+      if (hweight > bestPhys && LIS.isAllocatable(hint))
         bestPhys = hweight, hintPhys = hint;
     } else {
       if (hweight > bestVirt)
@@ -151,7 +151,7 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
     }
   }
 
-  hint_.clear();
+  Hint.clear();
 
   // Always prefer the physreg hint.
   if (unsigned hint = hintPhys ? hintPhys : hintVirt) {
@@ -165,7 +165,7 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
     return;
 
   // Mark li as unspillable if all live ranges are tiny.
-  if (li.isZeroLength()) {
+  if (li.isZeroLength(LIS.getSlotIndexes())) {
     li.markNotSpillable();
     return;
   }
@@ -176,7 +176,7 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
   // FIXME: this gets much more complicated once we support non-trivial
   // re-materialization.
   bool isLoad = false;
-  if (lis_.isReMaterializable(li, 0, isLoad)) {
+  if (LIS.isReMaterializable(li, 0, isLoad)) {
     if (isLoad)
       totalWeight *= 0.9F;
     else
@@ -187,50 +187,29 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
 }
 
 void VirtRegAuxInfo::CalculateRegClass(unsigned reg) {
-  MachineRegisterInfo &mri = mf_.getRegInfo();
-  const TargetRegisterInfo *tri = mf_.getTarget().getRegisterInfo();
-  const TargetRegisterClass *orc = mri.getRegClass(reg);
-  SmallPtrSet<const TargetRegisterClass*,8> rcs;
-
-  for (MachineRegisterInfo::reg_nodbg_iterator I = mri.reg_nodbg_begin(reg),
-       E = mri.reg_nodbg_end(); I != E; ++I) {
-    // The targets don't have accurate enough regclass descriptions that we can
-    // handle subregs. We need something similar to
-    // TRI::getMatchingSuperRegClass, but returning a super class instead of a
-    // sub class.
-    if (I.getOperand().getSubReg()) {
-      DEBUG(dbgs() << "Cannot handle subregs: " << I.getOperand() << '\n');
-      return;
-    }
-    if (const TargetRegisterClass *rc =
-                                I->getDesc().getRegClass(I.getOperandNo(), tri))
-      rcs.insert(rc);
-  }
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+  const TargetRegisterClass *OldRC = MRI.getRegClass(reg);
+  const TargetRegisterClass *NewRC = TRI->getLargestLegalSuperClass(OldRC);
 
-  // If we found no regclass constraints, just leave reg as is.
-  // In theory, we could inflate to the largest superclass of reg's existing
-  // class, but that might not be legal for the current cpu setting.
-  // This could happen if reg is only used by COPY instructions, so we may need
-  // to improve on this.
-  if (rcs.empty()) {
+  // Stop early if there is no room to grow.
+  if (NewRC == OldRC)
     return;
-  }
 
-  // Compute the intersection of all classes in rcs.
-  // This ought to be independent of iteration order, but if the target register
-  // classes don't form a proper algebra, it is possible to get different
-  // results. The solution is to make sure the intersection of any two register
-  // classes is also a register class or the null set.
-  const TargetRegisterClass *rc = 0;
-  for (SmallPtrSet<const TargetRegisterClass*,8>::iterator I = rcs.begin(),
-         E = rcs.end(); I != E; ++I) {
-    rc = rc ? getCommonSubClass(rc, *I) : *I;
-    assert(rc && "Incompatible regclass constraints found");
+  // Accumulate constraints from all uses.
+  for (MachineRegisterInfo::reg_nodbg_iterator I = MRI.reg_nodbg_begin(reg),
+       E = MRI.reg_nodbg_end(); I != E; ++I) {
+    // TRI doesn't have accurate enough information to model this yet.
+    if (I.getOperand().getSubReg())
+      return;
+    const TargetRegisterClass *OpRC =
+      I->getDesc().getRegClass(I.getOperandNo(), TRI);
+    if (OpRC)
+      NewRC = getCommonSubClass(NewRC, OpRC);
+    if (!NewRC || NewRC == OldRC)
+      return;
   }
-
-  if (rc == orc)
-    return;
-  DEBUG(dbgs() << "Inflating " << orc->getName() << ':' << PrintReg(reg)
-               << " to " << rc->getName() <<".\n");
-  mri.setRegClass(reg, rc);
+  DEBUG(dbgs() << "Inflating " << OldRC->getName() << ':' << PrintReg(reg)
+               << " to " << NewRC->getName() <<".\n");
+  MRI.setRegClass(reg, NewRC);
 }
diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp
index ecd69a0..14eb054 100644
--- a/lib/CodeGen/CallingConvLower.cpp
+++ b/lib/CodeGen/CallingConvLower.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -22,19 +23,22 @@
 #include "llvm/Target/TargetLowering.h"
 using namespace llvm;
 
-CCState::CCState(CallingConv::ID CC, bool isVarArg, const TargetMachine &tm,
-                 SmallVector<CCValAssign, 16> &locs, LLVMContext &C)
-  : CallingConv(CC), IsVarArg(isVarArg), TM(tm),
-    TRI(*TM.getRegisterInfo()), Locs(locs), Context(C) {
+CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf,
+                 const TargetMachine &tm, SmallVector<CCValAssign, 16> &locs,
+                 LLVMContext &C)
+  : CallingConv(CC), IsVarArg(isVarArg), MF(mf), TM(tm),
+    TRI(*TM.getRegisterInfo()), Locs(locs), Context(C),
+    CallOrPrologue(Unknown) {
   // No stack is used.
   StackOffset = 0;
-  
+
+  clearFirstByValReg();
   UsedRegs.resize((TRI.getNumRegs()+31)/32);
 }
 
-// HandleByVal - Allocate a stack slot large enough to pass an argument by
-// value. The size and alignment information of the argument is encoded in its
-// parameter attribute.
+// HandleByVal - Allocate space on the stack large enough to pass an argument
+// by value. The size and alignment information of the argument is encoded in
+// its parameter attribute.
 void CCState::HandleByVal(unsigned ValNo, MVT ValVT,
                           MVT LocVT, CCValAssign::LocInfo LocInfo,
                           int MinSize, int MinAlign,
@@ -45,10 +49,11 @@ void CCState::HandleByVal(unsigned ValNo, MVT ValVT,
     Size = MinSize;
   if (MinAlign > (int)Align)
     Align = MinAlign;
+  if (MF.getFrameInfo()->getMaxAlignment() < Align)
+    MF.getFrameInfo()->setMaxAlignment(Align);
+  TM.getTargetLowering()->HandleByVal(this, Size);
   unsigned Offset = AllocateStack(Size, Align);
-
   addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
-  TM.getTargetLowering()->HandleByVal(const_cast<CCState*>(this));
 }
 
 /// MarkAllocated - Mark a register and all of its aliases as allocated.
diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp
index e37356a..270c337 100644
--- a/lib/CodeGen/CodePlacementOpt.cpp
+++ b/lib/CodeGen/CodePlacementOpt.cpp
@@ -254,7 +254,7 @@ bool CodePlacementOpt::MoveDiscontiguousLoopBlocks(MachineFunction &MF,
 
   // Determine a position to move orphaned loop blocks to. If TopMBB is not
   // entered via fallthrough and BotMBB is exited via fallthrough, prepend them
-  // to the top of the loop to avoid loosing that fallthrough. Otherwise append
+  // to the top of the loop to avoid losing that fallthrough. Otherwise append
   // them to the bottom, even if it previously had a fallthrough, on the theory
   // that it's worth an extra branch to keep the loop contiguous.
   MachineFunction::iterator InsertPt =
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index f79598d..4cac453 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -27,12 +27,12 @@
 using namespace llvm;
 
 CriticalAntiDepBreaker::
-CriticalAntiDepBreaker(MachineFunction& MFi) :
+CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo &RCI) :
   AntiDepBreaker(), MF(MFi),
   MRI(MF.getRegInfo()),
   TII(MF.getTarget().getInstrInfo()),
   TRI(MF.getTarget().getRegisterInfo()),
-  AllocatableSet(TRI->getAllocatableSet(MF)),
+  RegClassInfo(RCI),
   Classes(TRI->getNumRegs(), static_cast<const TargetRegisterClass *>(0)),
   KillIndices(TRI->getNumRegs(), 0),
   DefIndices(TRI->getNumRegs(), 0) {}
@@ -385,11 +385,9 @@ CriticalAntiDepBreaker::findSuitableFreeRegister(RegRefIter RegRefBegin,
                                                  unsigned LastNewReg,
                                                  const TargetRegisterClass *RC)
 {
-  for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF),
-       RE = RC->allocation_order_end(MF); R != RE; ++R) {
-    unsigned NewReg = *R;
-    // Don't consider non-allocatable registers
-    if (!AllocatableSet.test(NewReg)) continue;
+  ArrayRef<unsigned> Order = RegClassInfo.getOrder(RC);
+  for (unsigned i = 0; i != Order.size(); ++i) {
+    unsigned NewReg = Order[i];
     // Don't replace a register with itself.
     if (NewReg == AntiDepReg) continue;
     // Don't replace a register with one that was recently used to repair
@@ -421,7 +419,8 @@ unsigned CriticalAntiDepBreaker::
 BreakAntiDependencies(const std::vector<SUnit>& SUnits,
                       MachineBasicBlock::iterator Begin,
                       MachineBasicBlock::iterator End,
-                      unsigned InsertPosIndex) {
+                      unsigned InsertPosIndex,
+                      DbgValueVector &DbgValues) {
   // The code below assumes that there is at least one instruction,
   // so just duck out immediately if the block is empty.
   if (SUnits.empty()) return 0;
@@ -533,7 +532,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
         if (Edge->getKind() == SDep::Anti) {
           AntiDepReg = Edge->getReg();
           assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
-          if (!AllocatableSet.test(AntiDepReg))
+          if (!RegClassInfo.isAllocatable(AntiDepReg))
             // Don't break anti-dependencies on non-allocatable registers.
             AntiDepReg = 0;
           else if (KeepRegs.count(AntiDepReg))
@@ -628,14 +627,10 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
           // as well.
           const SUnit *SU = MISUnitMap[Q->second->getParent()];
           if (!SU) continue;
-          for (unsigned i = 0, e = SU->DbgInstrList.size() ; i < e ; ++i) {
-            MachineInstr *DI = SU->DbgInstrList[i];
-            assert (DI->getNumOperands()==3 && DI->getOperand(0).isReg() &&
-                    DI->getOperand(0).getReg()
-                    && "Non register dbg_value attached to SUnit!");
-            if (DI->getOperand(0).getReg() == AntiDepReg)
-              DI->getOperand(0).setReg(NewReg);
-          }
+          for (DbgValueVector::iterator DVI = DbgValues.begin(),
+                 DVE = DbgValues.end(); DVI != DVE; ++DVI)
+            if (DVI->second == Q->second->getParent())
+              UpdateDbgValue(DVI->first, AntiDepReg, NewReg);
         }
 
         // We just went back in time and modified history; the
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
index 0daaef2..0710780 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -17,6 +17,7 @@
 #define LLVM_CODEGEN_CRITICALANTIDEPBREAKER_H
 
 #include "AntiDepBreaker.h"
+#include "RegisterClassInfo.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -27,6 +28,7 @@
 #include <map>
 
 namespace llvm {
+class RegisterClassInfo;
 class TargetInstrInfo;
 class TargetRegisterInfo;
 
@@ -35,6 +37,7 @@ class TargetRegisterInfo;
     MachineRegisterInfo &MRI;
     const TargetInstrInfo *TII;
     const TargetRegisterInfo *TRI;
+    const RegisterClassInfo &RegClassInfo;
 
     /// AllocatableSet - The set of allocatable registers.
     /// We'll be ignoring anti-dependencies on non-allocatable registers,
@@ -66,7 +69,7 @@ class TargetRegisterInfo;
     SmallSet<unsigned, 4> KeepRegs;
 
   public:
-    CriticalAntiDepBreaker(MachineFunction& MFi);
+    CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo&);
     ~CriticalAntiDepBreaker();
 
     /// Start - Initialize anti-dep breaking for a new basic block.
@@ -79,7 +82,8 @@ class TargetRegisterInfo;
     unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits,
                                    MachineBasicBlock::iterator Begin,
                                    MachineBasicBlock::iterator End,
-                                   unsigned InsertPosIndex);
+                                   unsigned InsertPosIndex,
+                                   DbgValueVector &DbgValues);
 
     /// Observe - Update liveness information to account for the current
     /// instruction, which will not be scheduled.
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index 34b1a39..22c5465 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -30,6 +30,7 @@ using namespace llvm;
 
 STATISTIC(NumLandingPadsSplit,     "Number of landing pads split");
 STATISTIC(NumUnwindsLowered,       "Number of unwind instructions lowered");
+STATISTIC(NumResumesLowered,       "Number of eh.resume calls lowered");
 STATISTIC(NumExceptionValuesMoved, "Number of eh.exception calls moved");
 
 namespace {
@@ -63,7 +64,7 @@ namespace {
     BBSet LandingPads;
 
     bool NormalizeLandingPads();
-    bool LowerUnwinds();
+    bool LowerUnwindsAndResumes();
     bool MoveExceptionValueCalls();
 
     Instruction *CreateExceptionValueCall(BasicBlock *BB);
@@ -251,10 +252,7 @@ bool DwarfEHPrepare::HandleURoRInvokes() {
 
   if (!URoR) {
     URoR = F->getParent()->getFunction("_Unwind_Resume_or_Rethrow");
-    if (!URoR) {
-      URoR = F->getParent()->getFunction("_Unwind_SjLj_Resume");
-      if (!URoR) return CleanupSelectors(CatchAllSels);
-    }
+    if (!URoR) return CleanupSelectors(CatchAllSels);
   }
 
   SmallPtrSet<InvokeInst*, 32> URoRInvokes;
@@ -480,20 +478,25 @@ bool DwarfEHPrepare::NormalizeLandingPads() {
 /// rethrowing any previously caught exception.  This will crash horribly
 /// at runtime if there is no such exception: using unwind to throw a new
 /// exception is currently not supported.
-bool DwarfEHPrepare::LowerUnwinds() {
-  SmallVector<TerminatorInst*, 16> UnwindInsts;
-
-  for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
-    TerminatorInst *TI = I->getTerminator();
-    if (isa<UnwindInst>(TI))
-      UnwindInsts.push_back(TI);
+bool DwarfEHPrepare::LowerUnwindsAndResumes() {
+  SmallVector<Instruction*, 16> ResumeInsts;
+
+  for (Function::iterator fi = F->begin(), fe = F->end(); fi != fe; ++fi) {
+    for (BasicBlock::iterator bi = fi->begin(), be = fi->end(); bi != be; ++bi){
+      if (isa<UnwindInst>(bi))
+        ResumeInsts.push_back(bi);
+      else if (CallInst *call = dyn_cast<CallInst>(bi))
+        if (Function *fn = dyn_cast<Function>(call->getCalledValue()))
+          if (fn->getName() == "llvm.eh.resume")
+            ResumeInsts.push_back(bi);
+    }
   }
 
-  if (UnwindInsts.empty()) return false;
+  if (ResumeInsts.empty()) return false;
 
   // Find the rewind function if we didn't already.
   if (!RewindFunction) {
-    LLVMContext &Ctx = UnwindInsts[0]->getContext();
+    LLVMContext &Ctx = ResumeInsts[0]->getContext();
     std::vector<const Type*>
       Params(1, Type::getInt8PtrTy(Ctx));
     FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx),
@@ -504,24 +507,36 @@ bool DwarfEHPrepare::LowerUnwinds() {
 
   bool Changed = false;
 
-  for (SmallVectorImpl<TerminatorInst*>::iterator
-         I = UnwindInsts.begin(), E = UnwindInsts.end(); I != E; ++I) {
-    TerminatorInst *TI = *I;
+  for (SmallVectorImpl<Instruction*>::iterator
+         I = ResumeInsts.begin(), E = ResumeInsts.end(); I != E; ++I) {
+    Instruction *RI = *I;
 
-    // Replace the unwind instruction with a call to _Unwind_Resume (or the
-    // appropriate target equivalent) followed by an UnreachableInst.
+    // Replace the resuming instruction with a call to _Unwind_Resume (or the
+    // appropriate target equivalent).
+
+    llvm::Value *ExnValue;
+    if (isa<UnwindInst>(RI))
+      ExnValue = CreateExceptionValueCall(RI->getParent());
+    else
+      ExnValue = cast<CallInst>(RI)->getArgOperand(0);
 
     // Create the call...
-    CallInst *CI = CallInst::Create(RewindFunction,
-                                    CreateExceptionValueCall(TI->getParent()),
-                                    "", TI);
+    CallInst *CI = CallInst::Create(RewindFunction, ExnValue, "", RI);
     CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME));
-    // ...followed by an UnreachableInst.
-    new UnreachableInst(TI->getContext(), TI);
 
-    // Nuke the unwind instruction.
-    TI->eraseFromParent();
-    ++NumUnwindsLowered;
+    // ...followed by an UnreachableInst, if it was an unwind.
+    // Calls to llvm.eh.resume are typically already followed by this.
+    if (isa<UnwindInst>(RI))
+      new UnreachableInst(RI->getContext(), RI);
+
+    if (isa<UnwindInst>(RI))
+      ++NumUnwindsLowered;
+    else
+      ++NumResumesLowered;
+
+    // Nuke the resume instruction.
+    RI->eraseFromParent();
+
     Changed = true;
   }
 
@@ -657,8 +672,8 @@ bool DwarfEHPrepare::runOnFunction(Function &Fn) {
   // basic block where an invoke unwind edge ends).
   Changed |= NormalizeLandingPads();
 
-  // Turn unwind instructions into libcalls.
-  Changed |= LowerUnwinds();
+  // Turn unwind instructions and eh.resume calls into libcalls.
+  Changed |= LowerUnwindsAndResumes();
 
   // TODO: Move eh.selector calls to landing pads and combine them.
 
diff --git a/lib/CodeGen/ELF.h b/lib/CodeGen/ELF.h
index e08feeb..5b63468 100644
--- a/lib/CodeGen/ELF.h
+++ b/lib/CodeGen/ELF.h
@@ -173,7 +173,7 @@ namespace llvm {
     unsigned Offset;    // sh_offset - Offset from the file start
     unsigned Size;      // sh_size - The section size.
     unsigned Link;      // sh_link - Section header table index link.
-    unsigned Info;      // sh_info - Auxillary information.
+    unsigned Info;      // sh_info - Auxiliary information.
     unsigned Align;     // sh_addralign - Alignment of section.
     unsigned EntSize;   // sh_entsize - Size of entries in the section e
 
diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp
index 25c2e02..fa2319b 100644
--- a/lib/CodeGen/ELFWriter.cpp
+++ b/lib/CodeGen/ELFWriter.cpp
@@ -77,7 +77,7 @@ ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm)
   // Create the object code emitter object for this target.
   ElfCE = new ELFCodeEmitter(*this);
 
-  // Inital number of sections
+  // Initial number of sections
   NumSections = 0;
 }
 
@@ -662,12 +662,16 @@ bool ELFWriter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
 void ELFWriter::EmitXXStructorList(Constant *List, ELFSection &Xtor) {
   // Should be an array of '{ i32, void ()* }' structs.  The first value is the
   // init priority, which we ignore.
+  if (List->isNullValue()) return;
   ConstantArray *InitList = cast<ConstantArray>(List);
   for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
+    if (InitList->getOperand(i)->isNullValue())
+      continue;
     ConstantStruct *CS = cast<ConstantStruct>(InitList->getOperand(i));
 
     if (CS->getOperand(1)->isNullValue())
-      return;  // Found a null terminator, exit printing.
+      continue;
+
     // Emit the function pointer.
     EmitGlobalConstant(CS->getOperand(1), Xtor);
   }
diff --git a/lib/CodeGen/EdgeBundles.cpp b/lib/CodeGen/EdgeBundles.cpp
index aed8bc9..a7aba89 100644
--- a/lib/CodeGen/EdgeBundles.cpp
+++ b/lib/CodeGen/EdgeBundles.cpp
@@ -39,7 +39,7 @@ void EdgeBundles::getAnalysisUsage(AnalysisUsage &AU) const {
 bool EdgeBundles::runOnMachineFunction(MachineFunction &mf) {
   MF = &mf;
   EC.clear();
-  EC.grow(2 * MF->size());
+  EC.grow(2 * MF->getNumBlockIDs());
 
   for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E;
        ++I) {
@@ -53,6 +53,19 @@ bool EdgeBundles::runOnMachineFunction(MachineFunction &mf) {
   EC.compress();
   if (ViewEdgeBundles)
     view();
+
+  // Compute the reverse mapping.
+  Blocks.clear();
+  Blocks.resize(getNumBundles());
+
+  for (unsigned i = 0, e = MF->getNumBlockIDs(); i != e; ++i) {
+    unsigned b0 = getBundle(i, 0);
+    unsigned b1 = getBundle(i, 1);
+    Blocks[b0].push_back(i);
+    if (b1 != b0)
+      Blocks[b1].push_back(i);
+  }
+
   return false;
 }
 
@@ -82,5 +95,3 @@ raw_ostream &llvm::WriteGraph(raw_ostream &O, const EdgeBundles &G,
   O << "}\n";
   return O;
 }
-
-
diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/ExpandISelPseudos.cpp
index b5ec303..ebc2fc9 100644
--- a/lib/CodeGen/ExpandISelPseudos.cpp
+++ b/lib/CodeGen/ExpandISelPseudos.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// Expand Psuedo-instructions produced by ISel. These are usually to allow
+// Expand Pseudo-instructions produced by ISel. These are usually to allow
 // the expansion to contain control flow, such as a conditional move
 // implemented with a conditional branch and a phi, or an atomic operation
 // implemented with a loop.
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index db53b04..8b2c981 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -27,7 +27,6 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
@@ -146,10 +145,6 @@ namespace {
         : BBI(b), Kind(k), NeedSubsumption(s), NumDups(d), NumDups2(d2) {}
     };
 
-    /// Roots - Basic blocks that do not have successors. These are the starting
-    /// points of Graph traversal.
-    std::vector<MachineBasicBlock*> Roots;
-
     /// BBAnalysis - Results of if-conversion feasibility analysis indexed by
     /// basic block number.
     std::vector<BBInfo> BBAnalysis;
@@ -270,7 +265,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
   if (!TII) return false;
 
   // Tail merge tend to expose more if-conversion opportunities.
-  BranchFolder BF(true);
+  BranchFolder BF(true, false);
   bool BFChange = BF.OptimizeFunction(MF, TII,
                                    MF.getTarget().getRegisterInfo(),
                                    getAnalysisIfAvailable<MachineModuleInfo>());
@@ -287,11 +282,6 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
   MF.RenumberBlocks();
   BBAnalysis.resize(MF.getNumBlockIDs());
 
-  // Look for root nodes, i.e. blocks without successors.
-  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
-    if (I->succ_empty())
-      Roots.push_back(I);
-
   std::vector<IfcvtToken*> Tokens;
   MadeChange = false;
   unsigned NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle +
@@ -406,11 +396,10 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
   }
 
   Tokens.clear();
-  Roots.clear();
   BBAnalysis.clear();
 
   if (MadeChange && IfCvtBranchFold) {
-    BranchFolder BF(false);
+    BranchFolder BF(false, false);
     BF.OptimizeFunction(MF, TII,
                         MF.getTarget().getRegisterInfo(),
                         getAnalysisIfAvailable<MachineModuleInfo>());
@@ -924,13 +913,9 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
 /// candidates.
 void IfConverter::AnalyzeBlocks(MachineFunction &MF,
                                 std::vector<IfcvtToken*> &Tokens) {
-  std::set<MachineBasicBlock*> Visited;
-  for (unsigned i = 0, e = Roots.size(); i != e; ++i) {
-    for (idf_ext_iterator<MachineBasicBlock*> I=idf_ext_begin(Roots[i],Visited),
-           E = idf_ext_end(Roots[i], Visited); I != E; ++I) {
-      MachineBasicBlock *BB = *I;
-      AnalyzeBlock(BB, Tokens);
-    }
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+    MachineBasicBlock *BB = I;
+    AnalyzeBlock(BB, Tokens);
   }
 
   // Sort to favor more complex ifcvt scheme.
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index 86f4cfc..19ae333 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -16,6 +16,7 @@
 #include "Spiller.h"
 #include "LiveRangeEdit.h"
 #include "VirtRegMap.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
@@ -31,6 +32,18 @@
 
 using namespace llvm;
 
+STATISTIC(NumSpilledRanges,   "Number of spilled live ranges");
+STATISTIC(NumSnippets,        "Number of snippets included in spills");
+STATISTIC(NumSpills,          "Number of spills inserted");
+STATISTIC(NumReloads,         "Number of reloads inserted");
+STATISTIC(NumFolded,          "Number of folded stack accesses");
+STATISTIC(NumFoldedLoads,     "Number of folded loads");
+STATISTIC(NumRemats,          "Number of rematerialized defs for spilling");
+STATISTIC(NumOmitReloadSpill, "Number of omitted spills after reloads");
+STATISTIC(NumHoistLocal,      "Number of locally hoisted spills");
+STATISTIC(NumHoistGlobal,     "Number of globally hoisted spills");
+STATISTIC(NumRedundantSpills, "Number of redundant spills identified");
+
 namespace {
 class InlineSpiller : public Spiller {
   MachineFunctionPass &Pass;
@@ -134,9 +147,10 @@ private:
   bool foldMemoryOperand(MachineBasicBlock::iterator MI,
                          const SmallVectorImpl<unsigned> &Ops,
                          MachineInstr *LoadMI = 0);
-  void insertReload(LiveInterval &NewLI, MachineBasicBlock::iterator MI);
+  void insertReload(LiveInterval &NewLI, SlotIndex,
+                    MachineBasicBlock::iterator MI);
   void insertSpill(LiveInterval &NewLI, const LiveInterval &OldLI,
-                   MachineBasicBlock::iterator MI);
+                   SlotIndex, MachineBasicBlock::iterator MI);
 
   void spillAroundUses(unsigned Reg);
   void spillAll();
@@ -246,10 +260,11 @@ void InlineSpiller::collectRegsToSpill() {
     if (!isSnippet(SnipLI))
       continue;
     SnippetCopies.insert(MI);
-    if (!isRegToSpill(SnipReg))
-      RegsToSpill.push_back(SnipReg);
-
+    if (isRegToSpill(SnipReg))
+      continue;
+    RegsToSpill.push_back(SnipReg);
     DEBUG(dbgs() << "\talso spill snippet " << SnipLI << '\n');
+    ++NumSnippets;
   }
 }
 
@@ -419,8 +434,10 @@ void InlineSpiller::analyzeSiblingValues() {
       }
       if (!DefMI && !VNI->isPHIDef())
         DefMI = LIS.getInstructionFromIndex(VNI->def);
-      if (DefMI)
-        Edit->checkRematerializable(VNI, DefMI, TII, AA);
+      if (DefMI && Edit->checkRematerializable(VNI, DefMI, TII, AA)) {
+        DEBUG(dbgs() << "Value " << PrintReg(Reg) << ':' << VNI->id << '@'
+                     << VNI->def << " may remat from " << *DefMI);
+      }
     }
   }
 }
@@ -431,7 +448,7 @@ bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI) {
   SlotIndex Idx = LIS.getInstructionIndex(CopyMI);
   VNInfo *VNI = SpillLI.getVNInfoAt(Idx.getDefIndex());
   assert(VNI && VNI->def == Idx.getDefIndex() && "Not defined by copy");
-  SibValueMap::const_iterator I = SibValues.find(VNI);
+  SibValueMap::iterator I = SibValues.find(VNI);
   if (I == SibValues.end())
     return false;
 
@@ -441,6 +458,20 @@ bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI) {
   if (!SVI.AllDefsAreReloads && SVI.SpillVNI == VNI)
     return false;
 
+  // SpillReg may have been deleted by remat and DCE.
+  if (!LIS.hasInterval(SVI.SpillReg)) {
+    DEBUG(dbgs() << "Stale interval: " << PrintReg(SVI.SpillReg) << '\n');
+    SibValues.erase(I);
+    return false;
+  }
+
+  LiveInterval &SibLI = LIS.getInterval(SVI.SpillReg);
+  if (!SibLI.containsValue(SVI.SpillVNI)) {
+    DEBUG(dbgs() << "Stale value: " << PrintReg(SVI.SpillReg) << '\n');
+    SibValues.erase(I);
+    return false;
+  }
+
   // Conservatively extend the stack slot range to the range of the original
   // value. We may be able to do better with stack slot coloring by being more
   // careful here.
@@ -452,19 +483,22 @@ bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI) {
                << *StackInt << '\n');
 
   // Already spilled everywhere.
-  if (SVI.AllDefsAreReloads)
+  if (SVI.AllDefsAreReloads) {
+    ++NumOmitReloadSpill;
     return true;
-
+  }
   // We are going to spill SVI.SpillVNI immediately after its def, so clear out
   // any later spills of the same value.
-  eliminateRedundantSpills(LIS.getInterval(SVI.SpillReg), SVI.SpillVNI);
+  eliminateRedundantSpills(SibLI, SVI.SpillVNI);
 
   MachineBasicBlock *MBB = LIS.getMBBFromIndex(SVI.SpillVNI->def);
   MachineBasicBlock::iterator MII;
   if (SVI.SpillVNI->isPHIDef())
     MII = MBB->SkipPHIsAndLabels(MBB->begin());
   else {
-    MII = LIS.getInstructionFromIndex(SVI.SpillVNI->def);
+    MachineInstr *DefMI = LIS.getInstructionFromIndex(SVI.SpillVNI->def);
+    assert(DefMI && "Defining instruction disappeared");
+    MII = DefMI;
     ++MII;
   }
   // Insert spill without kill flag immediately after def.
@@ -474,6 +508,11 @@ bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI) {
   LIS.InsertMachineInstrInMaps(MII);
   VRM.addSpillSlotUse(StackSlot, MII);
   DEBUG(dbgs() << "\thoisted: " << SVI.SpillVNI->def << '\t' << *MII);
+
+  if (MBB == CopyMI->getParent())
+    ++NumHoistLocal;
+  else
+    ++NumHoistGlobal;
   return true;
 }
 
@@ -489,8 +528,8 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) {
     LiveInterval *LI;
     tie(LI, VNI) = WorkList.pop_back_val();
     unsigned Reg = LI->reg;
-    DEBUG(dbgs() << "Checking redundant spills for " << PrintReg(Reg) << ':'
-                 << VNI->id << '@' << VNI->def << '\n');
+    DEBUG(dbgs() << "Checking redundant spills for "
+                 << VNI->id << '@' << VNI->def << " in " << *LI << '\n');
 
     // Regs to spill are taken care of.
     if (isRegToSpill(Reg))
@@ -528,6 +567,7 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) {
         // eliminateDeadDefs won't normally remove stores, so switch opcode.
         MI->setDesc(TII.get(TargetOpcode::KILL));
         DeadDefs.push_back(MI);
+        ++NumRedundantSpills;
       }
     }
   } while (!WorkList.empty());
@@ -623,6 +663,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
   if (RM.OrigMI->getDesc().canFoldAsLoad() &&
       foldMemoryOperand(MI, Ops, RM.OrigMI)) {
     Edit->markRematerialized(RM.ParentVNI);
+    ++NumFoldedLoads;
     return true;
   }
 
@@ -649,6 +690,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
   VNInfo *DefVNI = NewLI.getNextValue(DefIdx, 0, LIS.getVNInfoAllocator());
   NewLI.addRange(LiveRange(DefIdx, UseIdx.getDefIndex(), DefVNI));
   DEBUG(dbgs() << "\tinterval: " << NewLI << '\n');
+  ++NumRemats;
   return true;
 }
 
@@ -775,14 +817,15 @@ bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI,
     VRM.addSpillSlotUse(StackSlot, FoldMI);
   MI->eraseFromParent();
   DEBUG(dbgs() << "\tfolded: " << *FoldMI);
+  ++NumFolded;
   return true;
 }
 
 /// insertReload - Insert a reload of NewLI.reg before MI.
 void InlineSpiller::insertReload(LiveInterval &NewLI,
+                                 SlotIndex Idx,
                                  MachineBasicBlock::iterator MI) {
   MachineBasicBlock &MBB = *MI->getParent();
-  SlotIndex Idx = LIS.getInstructionIndex(MI).getDefIndex();
   TII.loadRegFromStackSlot(MBB, MI, NewLI.reg, StackSlot,
                            MRI.getRegClass(NewLI.reg), &TRI);
   --MI; // Point to load instruction.
@@ -792,19 +835,13 @@ void InlineSpiller::insertReload(LiveInterval &NewLI,
   VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, 0,
                                        LIS.getVNInfoAllocator());
   NewLI.addRange(LiveRange(LoadIdx, Idx, LoadVNI));
+  ++NumReloads;
 }
 
 /// insertSpill - Insert a spill of NewLI.reg after MI.
 void InlineSpiller::insertSpill(LiveInterval &NewLI, const LiveInterval &OldLI,
-                                MachineBasicBlock::iterator MI) {
+                                SlotIndex Idx, MachineBasicBlock::iterator MI) {
   MachineBasicBlock &MBB = *MI->getParent();
-
-  // Get the defined value. It could be an early clobber so keep the def index.
-  SlotIndex Idx = LIS.getInstructionIndex(MI).getDefIndex();
-  VNInfo *VNI = OldLI.getVNInfoAt(Idx);
-  assert(VNI && VNI->def.getDefIndex() == Idx && "Inconsistent VNInfo");
-  Idx = VNI->def;
-
   TII.storeRegToStackSlot(MBB, ++MI, NewLI.reg, true, StackSlot,
                           MRI.getRegClass(NewLI.reg), &TRI);
   --MI; // Point to store instruction.
@@ -813,10 +850,12 @@ void InlineSpiller::insertSpill(LiveInterval &NewLI, const LiveInterval &OldLI,
   DEBUG(dbgs() << "\tspilled: " << StoreIdx << '\t' << *MI);
   VNInfo *StoreVNI = NewLI.getNextValue(Idx, 0, LIS.getVNInfoAllocator());
   NewLI.addRange(LiveRange(Idx, StoreIdx, StoreVNI));
+  ++NumSpills;
 }
 
 /// spillAroundUses - insert spill code around each use of Reg.
 void InlineSpiller::spillAroundUses(unsigned Reg) {
+  DEBUG(dbgs() << "spillAroundUses " << PrintReg(Reg) << '\n');
   LiveInterval &OldLI = LIS.getInterval(Reg);
 
   // Iterate over instructions using Reg.
@@ -854,9 +893,22 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
     SmallVector<unsigned, 8> Ops;
     tie(Reads, Writes) = MI->readsWritesVirtualRegister(Reg, &Ops);
 
+    // Find the slot index where this instruction reads and writes OldLI.
+    // This is usually the def slot, except for tied early clobbers.
+    SlotIndex Idx = LIS.getInstructionIndex(MI).getDefIndex();
+    if (VNInfo *VNI = OldLI.getVNInfoAt(Idx.getUseIndex()))
+      if (SlotIndex::isSameInstr(Idx, VNI->def))
+        Idx = VNI->def;
+
     // Check for a sibling copy.
     unsigned SibReg = isFullCopyOf(MI, Reg);
     if (SibReg && isSibling(SibReg)) {
+      // This may actually be a copy between snippets.
+      if (isRegToSpill(SibReg)) {
+        DEBUG(dbgs() << "Found new snippet copy: " << *MI);
+        SnippetCopies.insert(MI);
+        continue;
+      }
       if (Writes) {
         // Hoist the spill of a sib-reg copy.
         if (hoistSpill(OldLI, MI)) {
@@ -867,7 +919,6 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
         }
       } else {
         // This is a reload for a sib-reg copy. Drop spills downstream.
-        SlotIndex Idx = LIS.getInstructionIndex(MI).getDefIndex();
         LiveInterval &SibLI = LIS.getInterval(SibReg);
         eliminateRedundantSpills(SibLI, SibLI.getVNInfoAt(Idx));
         // The COPY will fold to a reload below.
@@ -884,7 +935,7 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
     NewLI.markNotSpillable();
 
     if (Reads)
-      insertReload(NewLI, MI);
+      insertReload(NewLI, Idx, MI);
 
     // Rewrite instruction operands.
     bool hasLiveDef = false;
@@ -899,10 +950,11 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
           hasLiveDef = true;
       }
     }
+    DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI);
 
     // FIXME: Use a second vreg if instruction has no tied ops.
     if (Writes && hasLiveDef)
-      insertSpill(NewLI, OldLI, MI);
+      insertSpill(NewLI, OldLI, Idx, MI);
 
     DEBUG(dbgs() << "\tinterval: " << NewLI << '\n');
   }
@@ -938,13 +990,15 @@ void InlineSpiller::spillAll() {
   }
 
   // Finally delete the SnippetCopies.
-  for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Edit->getReg());
-       MachineInstr *MI = RI.skipInstruction();) {
-    assert(SnippetCopies.count(MI) && "Remaining use wasn't a snippet copy");
-    // FIXME: Do this with a LiveRangeEdit callback.
-    VRM.RemoveMachineInstrFromMaps(MI);
-    LIS.RemoveMachineInstrFromMaps(MI);
-    MI->eraseFromParent();
+  for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
+    for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(RegsToSpill[i]);
+         MachineInstr *MI = RI.skipInstruction();) {
+      assert(SnippetCopies.count(MI) && "Remaining use wasn't a snippet copy");
+      // FIXME: Do this with a LiveRangeEdit callback.
+      VRM.RemoveMachineInstrFromMaps(MI);
+      LIS.RemoveMachineInstrFromMaps(MI);
+      MI->eraseFromParent();
+    }
   }
 
   // Delete all spilled registers.
@@ -953,6 +1007,7 @@ void InlineSpiller::spillAll() {
 }
 
 void InlineSpiller::spill(LiveRangeEdit &edit) {
+  ++NumSpilledRanges;
   Edit = &edit;
   assert(!TargetRegisterInfo::isStackSlot(edit.getReg())
          && "Trying to spill a stack slot.");
diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp
index 0aff128..b1014a9 100644
--- a/lib/CodeGen/InterferenceCache.cpp
+++ b/lib/CodeGen/InterferenceCache.cpp
@@ -26,7 +26,7 @@ void InterferenceCache::init(MachineFunction *mf,
   TRI = tri;
   PhysRegEntries.assign(TRI->getNumRegs(), 0);
   for (unsigned i = 0; i != CacheEntries; ++i)
-    Entries[i].clear(indexes);
+    Entries[i].clear(mf, indexes);
 }
 
 InterferenceCache::Entry *InterferenceCache::get(unsigned PhysReg) {
@@ -91,10 +91,6 @@ bool InterferenceCache::Entry::valid(LiveIntervalUnion *LIUArray,
 }
 
 void InterferenceCache::Entry::update(unsigned MBBNum) {
-  BlockInterference *BI = &Blocks[MBBNum];
-  BI->Tag = Tag;
-  BI->First = BI->Last = SlotIndex();
-
   SlotIndex Start, Stop;
   tie(Start, Stop) = Indexes->getMBBRange(MBBNum);
 
@@ -109,23 +105,39 @@ void InterferenceCache::Entry::update(unsigned MBBNum) {
     PrevPos = Start;
   }
 
-  // Check for first interference.
-  for (unsigned i = 0, e = Iters.size(); i != e; ++i) {
-    Iter &I = Iters[i];
-    if (!I.valid())
-      continue;
-    SlotIndex StartI = I.start();
-    if (StartI >= Stop)
-      continue;
-    if (!BI->First.isValid() || StartI < BI->First)
-      BI->First = StartI;
-  }
+  MachineFunction::const_iterator MFI = MF->getBlockNumbered(MBBNum);
+  BlockInterference *BI = &Blocks[MBBNum];
+  for (;;) {
+    BI->Tag = Tag;
+    BI->First = BI->Last = SlotIndex();
+
+    // Check for first interference.
+    for (unsigned i = 0, e = Iters.size(); i != e; ++i) {
+      Iter &I = Iters[i];
+      if (!I.valid())
+        continue;
+      SlotIndex StartI = I.start();
+      if (StartI >= Stop)
+        continue;
+      if (!BI->First.isValid() || StartI < BI->First)
+        BI->First = StartI;
+    }
 
-  // No interference in block.
-  if (!BI->First.isValid())
-    return;
+    PrevPos = Stop;
+    if (BI->First.isValid())
+      break;
+
+    // No interference in this block? Go ahead and precompute the next block.
+    if (++MFI == MF->end())
+      return;
+    MBBNum = MFI->getNumber();
+    BI = &Blocks[MBBNum];
+    if (BI->Tag == Tag)
+      return;
+    tie(Start, Stop) = Indexes->getMBBRange(MBBNum);
+  }
 
-  // Check for last interference.
+  // Check for last interference in block.
   for (unsigned i = 0, e = Iters.size(); i != e; ++i) {
     Iter &I = Iters[i];
     if (!I.valid() || I.start() >= Stop)
@@ -140,5 +152,4 @@ void InterferenceCache::Entry::update(unsigned MBBNum) {
     if (Backup)
       ++I;
   }
-  PrevPos = Stop;
 }
diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h
index 2e613ae..6c36fa4 100644
--- a/lib/CodeGen/InterferenceCache.h
+++ b/lib/CodeGen/InterferenceCache.h
@@ -43,6 +43,9 @@ class InterferenceCache {
     /// change.
     unsigned Tag;
 
+    /// MF - The current function.
+    MachineFunction *MF;
+
     /// Indexes - Mapping block numbers to SlotIndex ranges.
     SlotIndexes *Indexes;
 
@@ -67,8 +70,9 @@ class InterferenceCache {
   public:
     Entry() : PhysReg(0), Tag(0), Indexes(0) {}
 
-    void clear(SlotIndexes *indexes) {
+    void clear(MachineFunction *mf, SlotIndexes *indexes) {
       PhysReg = 0;
+      MF = mf;
       Indexes = indexes;
     }
 
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 8c2794a..b98fbed 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -13,6 +13,7 @@
 
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/PassManager.h"
+#include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/Verifier.h"
 #include "llvm/Assembly/PrintModulePass.h"
 #include "llvm/CodeGen/AsmPrinter.h"
@@ -32,7 +33,6 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/StandardPasses.h"
 using namespace llvm;
 
 namespace llvm {
@@ -149,6 +149,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
     MCStreamer *S = getTarget().createAsmStreamer(*Context, Out,
                                                   getVerboseAsm(),
                                                   hasMCUseLoc(),
+                                                  hasMCUseCFI(),
                                                   InstPrinter,
                                                   MCE, TAB,
                                                   ShowMCInst);
@@ -291,13 +292,21 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
   // Standard LLVM-Level Passes.
 
   // Basic AliasAnalysis support.
-  createStandardAliasAnalysisPasses(&PM);
+  // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
+  // BasicAliasAnalysis wins if they disagree. This is intended to help
+  // support "obvious" type-punning idioms.
+  PM.add(createTypeBasedAliasAnalysisPass());
+  PM.add(createBasicAliasAnalysisPass());
 
   // Before running any passes, run the verifier to determine if the input
   // coming from the front-end and/or optimizer is valid.
   if (!DisableVerify)
     PM.add(createVerifierPass());
 
+  // Simplify ObjC ARC code. This is done late because it makes re-optimization
+  // difficult.
+  PM.add(createObjCARCContractPass());
+
   // Run loop strength reduction before anything else.
   if (OptLevel != CodeGenOpt::None && !DisableLSR) {
     PM.add(createLoopStrengthReducePass(getTargetLowering()));
@@ -323,8 +332,8 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
     PM.add(createSjLjEHPass(getTargetLowering()));
     // FALLTHROUGH
   case ExceptionHandling::DwarfCFI:
-  case ExceptionHandling::DwarfTable:
   case ExceptionHandling::ARM:
+  case ExceptionHandling::Win64:
     PM.add(createDwarfEHPass(this));
     break;
   case ExceptionHandling::None:
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index 333d15f..292928f 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -101,9 +101,13 @@ class UserValue {
   void insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx, unsigned LocNo,
                         LiveIntervals &LIS, const TargetInstrInfo &TII);
 
+  /// splitLocation - Replace OldLocNo ranges with NewRegs ranges where NewRegs
+  /// is live. Returns true if any changes were made.
+  bool splitLocation(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs);
+
 public:
   /// UserValue - Create a new UserValue.
-  UserValue(const MDNode *var, unsigned o, DebugLoc L, 
+  UserValue(const MDNode *var, unsigned o, DebugLoc L,
             LocMap::Allocator &alloc)
     : variable(var), offset(o), dl(L), leader(this), next(0), locInts(alloc)
   {}
@@ -215,6 +219,10 @@ public:
   void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx,
                       const TargetRegisterInfo *TRI);
 
+  /// splitRegister - Replace OldReg ranges with NewRegs ranges where NewRegs is
+  /// live. Returns true if any changes were made.
+  bool splitRegister(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs);
+
   /// rewriteLocations - Rewrite virtual register locations according to the
   /// provided virtual register map.
   void rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI);
@@ -228,7 +236,7 @@ public:
   /// Only first one needs DebugLoc to identify variable's lexical scope
   /// in source file.
   DebugLoc findDebugLoc();
-  void print(raw_ostream&, const TargetRegisterInfo*);
+  void print(raw_ostream&, const TargetMachine*);
 };
 } // namespace
 
@@ -290,9 +298,12 @@ public:
   /// mapVirtReg - Map virtual register to an equivalence class.
   void mapVirtReg(unsigned VirtReg, UserValue *EC);
 
-  /// renameRegister - Replace all references to OldReg wiht NewReg:SubIdx.
+  /// renameRegister - Replace all references to OldReg with NewReg:SubIdx.
   void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx);
 
+  /// splitRegister -  Replace all references to OldReg with NewRegs.
+  void splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs);
+
   /// emitDebugVariables - Recreate DBG_VALUE instruction from data structures.
   void emitDebugValues(VirtRegMap *VRM);
 
@@ -300,7 +311,7 @@ public:
 };
 } // namespace
 
-void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) {
+void UserValue::print(raw_ostream &OS, const TargetMachine *TM) {
   if (const MDString *MDS = dyn_cast<MDString>(variable->getOperand(2)))
     OS << "!\"" << MDS->getString() << "\"\t";
   if (offset)
@@ -312,15 +323,17 @@ void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) {
     else
       OS << I.value();
   }
-  for (unsigned i = 0, e = locations.size(); i != e; ++i)
-    OS << " Loc" << i << '=' << locations[i];
+  for (unsigned i = 0, e = locations.size(); i != e; ++i) {
+    OS << " Loc" << i << '=';
+    locations[i].print(OS, TM);
+  }
   OS << '\n';
 }
 
 void LDVImpl::print(raw_ostream &OS) {
   OS << "********** DEBUG VARIABLES **********\n";
   for (unsigned i = 0, e = userValues.size(); i != e; ++i)
-    userValues[i]->print(OS, TRI);
+    userValues[i]->print(OS, &MF->getTarget());
 }
 
 void UserValue::coalesceLocation(unsigned LocNo) {
@@ -677,6 +690,143 @@ renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx) {
     static_cast<LDVImpl*>(pImpl)->renameRegister(OldReg, NewReg, SubIdx);
 }
 
+//===----------------------------------------------------------------------===//
+//                           Live Range Splitting
+//===----------------------------------------------------------------------===//
+
+bool
+UserValue::splitLocation(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs) {
+  DEBUG({
+    dbgs() << "Splitting Loc" << OldLocNo << '\t';
+    print(dbgs(), 0);
+  });
+  bool DidChange = false;
+  LocMap::iterator LocMapI;
+  LocMapI.setMap(locInts);
+  for (unsigned i = 0; i != NewRegs.size(); ++i) {
+    LiveInterval *LI = NewRegs[i];
+    if (LI->empty())
+      continue;
+
+    // Don't allocate the new LocNo until it is needed.
+    unsigned NewLocNo = ~0u;
+
+    // Iterate over the overlaps between locInts and LI.
+    LocMapI.find(LI->beginIndex());
+    if (!LocMapI.valid())
+      continue;
+    LiveInterval::iterator LII = LI->advanceTo(LI->begin(), LocMapI.start());
+    LiveInterval::iterator LIE = LI->end();
+    while (LocMapI.valid() && LII != LIE) {
+      // At this point, we know that LocMapI.stop() > LII->start.
+      LII = LI->advanceTo(LII, LocMapI.start());
+      if (LII == LIE)
+        break;
+
+      // Now LII->end > LocMapI.start(). Do we have an overlap?
+      if (LocMapI.value() == OldLocNo && LII->start < LocMapI.stop()) {
+        // Overlapping correct location. Allocate NewLocNo now.
+        if (NewLocNo == ~0u) {
+          MachineOperand MO = MachineOperand::CreateReg(LI->reg, false);
+          MO.setSubReg(locations[OldLocNo].getSubReg());
+          NewLocNo = getLocationNo(MO);
+          DidChange = true;
+        }
+
+        SlotIndex LStart = LocMapI.start();
+        SlotIndex LStop  = LocMapI.stop();
+
+        // Trim LocMapI down to the LII overlap.
+        if (LStart < LII->start)
+          LocMapI.setStartUnchecked(LII->start);
+        if (LStop > LII->end)
+          LocMapI.setStopUnchecked(LII->end);
+
+        // Change the value in the overlap. This may trigger coalescing.
+        LocMapI.setValue(NewLocNo);
+
+        // Re-insert any removed OldLocNo ranges.
+        if (LStart < LocMapI.start()) {
+          LocMapI.insert(LStart, LocMapI.start(), OldLocNo);
+          ++LocMapI;
+          assert(LocMapI.valid() && "Unexpected coalescing");
+        }
+        if (LStop > LocMapI.stop()) {
+          ++LocMapI;
+          LocMapI.insert(LII->end, LStop, OldLocNo);
+          --LocMapI;
+        }
+      }
+
+      // Advance to the next overlap.
+      if (LII->end < LocMapI.stop()) {
+        if (++LII == LIE)
+          break;
+        LocMapI.advanceTo(LII->start);
+      } else {
+        ++LocMapI;
+        if (!LocMapI.valid())
+          break;
+        LII = LI->advanceTo(LII, LocMapI.start());
+      }
+    }
+  }
+
+  // Finally, remove any remaining OldLocNo intervals and OldLocNo itself.
+  locations.erase(locations.begin() + OldLocNo);
+  LocMapI.goToBegin();
+  while (LocMapI.valid()) {
+    unsigned v = LocMapI.value();
+    if (v == OldLocNo) {
+      DEBUG(dbgs() << "Erasing [" << LocMapI.start() << ';'
+                   << LocMapI.stop() << ")\n");
+      LocMapI.erase();
+    } else {
+      if (v > OldLocNo)
+        LocMapI.setValueUnchecked(v-1);
+      ++LocMapI;
+    }
+  }
+
+  DEBUG({dbgs() << "Split result: \t"; print(dbgs(), 0);});
+  return DidChange;
+}
+
+bool
+UserValue::splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) {
+  bool DidChange = false;
+  // Split locations referring to OldReg. Iterate backwards so splitLocation can
+  // safely erase unuused locations.
+  for (unsigned i = locations.size(); i ; --i) {
+    unsigned LocNo = i-1;
+    const MachineOperand *Loc = &locations[LocNo];
+    if (!Loc->isReg() || Loc->getReg() != OldReg)
+      continue;
+    DidChange |= splitLocation(LocNo, NewRegs);
+  }
+  return DidChange;
+}
+
+void LDVImpl::splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) {
+  bool DidChange = false;
+  for (UserValue *UV = lookupVirtReg(OldReg); UV; UV = UV->getNext())
+    DidChange |= UV->splitRegister(OldReg, NewRegs);
+
+  if (!DidChange)
+    return;
+
+  // Map all of the new virtual registers.
+  UserValue *UV = lookupVirtReg(OldReg);
+  for (unsigned i = 0; i != NewRegs.size(); ++i)
+    mapVirtReg(NewRegs[i]->reg, UV);
+}
+
+void LiveDebugVariables::
+splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) {
+  if (pImpl)
+    static_cast<LDVImpl*>(pImpl)->splitRegister(OldReg, NewRegs);
+}
+
 void
 UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI) {
   // Iterate over locations in reverse makes it easier to handle coalescing.
@@ -690,6 +840,9 @@ UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI) {
     unsigned VirtReg = Loc.getReg();
     if (VRM.isAssignedReg(VirtReg) &&
         TargetRegisterInfo::isPhysicalRegister(VRM.getPhys(VirtReg))) {
+      // This can create a %noreg operand in rare cases when the sub-register
+      // index is no longer available. That means the user value is in a
+      // non-existent sub-register, and %noreg is exactly what we want.
       Loc.substPhysReg(VRM.getPhys(VirtReg), TRI);
     } else if (VRM.getStackSlot(VirtReg) != VirtRegMap::NO_STACK_SLOT &&
                VRM.isSpillSlotUsed(VRM.getStackSlot(VirtReg))) {
@@ -701,7 +854,6 @@ UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI) {
     }
     coalesceLocation(LocNo);
   }
-  DEBUG(print(dbgs(), &TRI));
 }
 
 /// findInsertLocation - Find an iterator for inserting a DBG_VALUE
@@ -793,6 +945,7 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) {
   DEBUG(dbgs() << "********** EMITTING LIVE DEBUG VARIABLES **********\n");
   const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
   for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
+    DEBUG(userValues[i]->print(dbgs(), &MF->getTarget()));
     userValues[i]->rewriteLocations(*VRM, *TRI);
     userValues[i]->emitDebugValues(VRM, *LIS, *TII);
   }
diff --git a/lib/CodeGen/LiveDebugVariables.h b/lib/CodeGen/LiveDebugVariables.h
index a6e40a1..3ce3c39 100644
--- a/lib/CodeGen/LiveDebugVariables.h
+++ b/lib/CodeGen/LiveDebugVariables.h
@@ -21,10 +21,12 @@
 #ifndef LLVM_CODEGEN_LIVEDEBUGVARIABLES_H
 #define LLVM_CODEGEN_LIVEDEBUGVARIABLES_H
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 
 namespace llvm {
 
+class LiveInterval;
 class VirtRegMap;
 
 class LiveDebugVariables : public MachineFunctionPass {
@@ -42,6 +44,11 @@ public:
   ///               register.
   void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx);
 
+  /// splitRegister - Move any user variables in OldReg to the live ranges in
+  /// NewRegs where they are live. Mark the values as unavailable where no new
+  /// register is live.
+  void splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs);
+
   /// emitDebugValues - Emit new DBG_VALUE instructions reflecting the changes
   /// that happened during register allocation.
   /// @param VRM Rename virtual registers according to map.
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index c77ae1b..9257191 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -578,13 +578,6 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB,
       CopyMI = MI;
     handlePhysicalRegisterDef(MBB, MI, MIIdx, MO,
                               getOrCreateInterval(MO.getReg()), CopyMI);
-    // Def of a register also defines its sub-registers.
-    for (const unsigned* AS = tri_->getSubRegisters(MO.getReg()); *AS; ++AS)
-      // If MI also modifies the sub-register explicitly, avoid processing it
-      // more than once. Do not pass in TRI here so it checks for exact match.
-      if (!MI->definesRegister(*AS))
-        handlePhysicalRegisterDef(MBB, MI, MIIdx, MO,
-                                  getOrCreateInterval(*AS), 0);
   }
 }
 
@@ -645,7 +638,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
       end = MIIdx.getStoreIndex();
     } else {
       DEBUG(dbgs() << " live through");
-      end = baseIndex;
+      end = getMBBEndIdx(MBB);
     }
   }
 
@@ -1514,7 +1507,7 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
         // ...
         // def = ...
         //     = use
-        // It's better to start a new interval to avoid artifically
+        // It's better to start a new interval to avoid artificially
         // extend the new interval.
         if (MI->readsWritesVirtualRegister(li.reg) ==
             std::make_pair(false,true)) {
diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp
index 205f28a..b67f966 100644
--- a/lib/CodeGen/LiveIntervalUnion.cpp
+++ b/lib/CodeGen/LiveIntervalUnion.cpp
@@ -35,12 +35,20 @@ void LiveIntervalUnion::unify(LiveInterval &VirtReg) {
   LiveInterval::iterator RegEnd = VirtReg.end();
   SegmentIter SegPos = Segments.find(RegPos->start);
 
-  for (;;) {
+  while (SegPos.valid()) {
     SegPos.insert(RegPos->start, RegPos->end, &VirtReg);
     if (++RegPos == RegEnd)
       return;
     SegPos.advanceTo(RegPos->start);
   }
+
+  // We have reached the end of Segments, so it is no longer necessary to search
+  // for the insertion position.
+  // It is faster to insert the end first.
+  --RegEnd;
+  SegPos.insert(RegEnd->start, RegEnd->end, &VirtReg);
+  for (; RegPos != RegEnd; ++RegPos, ++SegPos)
+    SegPos.insert(RegPos->start, RegPos->end, &VirtReg);
 }
 
 // Remove a live virtual register's segments from this union.
@@ -168,6 +176,7 @@ LiveIntervalUnion::Query::firstInterference() {
     return FirstInterference;
   CheckedFirstInterference = true;
   InterferenceResult &IR = FirstInterference;
+  IR.LiveUnionI.setMap(LiveUnion->getMap());
 
   // Quickly skip interference check for empty sets.
   if (VirtReg->empty() || LiveUnion->empty()) {
@@ -176,10 +185,10 @@ LiveIntervalUnion::Query::firstInterference() {
     // VirtReg starts first, perform double binary search.
     IR.VirtRegI = VirtReg->find(LiveUnion->startIndex());
     if (IR.VirtRegI != VirtReg->end())
-      IR.LiveUnionI = LiveUnion->find(IR.VirtRegI->start);
+      IR.LiveUnionI.find(IR.VirtRegI->start);
   } else {
     // LiveUnion starts first, perform double binary search.
-    IR.LiveUnionI = LiveUnion->find(VirtReg->beginIndex());
+    IR.LiveUnionI.find(VirtReg->beginIndex());
     if (IR.LiveUnionI.valid())
       IR.VirtRegI = VirtReg->find(IR.LiveUnionI.start());
     else
@@ -235,7 +244,7 @@ bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const {
 //
 // For comments on how to speed it up, see Query::findIntersection().
 unsigned LiveIntervalUnion::Query::
-collectInterferingVRegs(unsigned MaxInterferingRegs) {
+collectInterferingVRegs(unsigned MaxInterferingRegs, float MaxWeight) {
   InterferenceResult IR = firstInterference();
   LiveInterval::iterator VirtRegEnd = VirtReg->end();
   LiveInterval *RecentInterferingVReg = NULL;
@@ -277,6 +286,11 @@ collectInterferingVRegs(unsigned MaxInterferingRegs) {
       // Cache the most recent interfering vreg to bypass isSeenInterference.
       RecentInterferingVReg = IR.LiveUnionI.value();
       ++IR.LiveUnionI;
+
+      // Stop collecting when the max weight is exceeded.
+      if (RecentInterferingVReg->weight >= MaxWeight)
+        return InterferingVRegs.size();
+
       continue;
     }
     // VirtRegI may have advanced far beyond LiveUnionI,
diff --git a/lib/CodeGen/LiveIntervalUnion.h b/lib/CodeGen/LiveIntervalUnion.h
index 0964ecd..c83578e 100644
--- a/lib/CodeGen/LiveIntervalUnion.h
+++ b/lib/CodeGen/LiveIntervalUnion.h
@@ -95,6 +95,9 @@ public:
   // Remove a live virtual register's segments from this union.
   void extract(LiveInterval &VirtReg);
 
+  // Remove all inserted virtual registers.
+  void clear() { Segments.clear(); ++Tag; }
+
   // Print union, using TRI to translate register names
   void print(raw_ostream &OS, const TargetRegisterInfo *TRI) const;
 
@@ -226,7 +229,8 @@ public:
 
     // Count the virtual registers in this union that interfere with this
     // query's live virtual register, up to maxInterferingRegs.
-    unsigned collectInterferingVRegs(unsigned MaxInterferingRegs = UINT_MAX);
+    unsigned collectInterferingVRegs(unsigned MaxInterferingRegs = UINT_MAX,
+                                     float MaxWeight = HUGE_VALF);
 
     // Was this virtual register visited during collectInterferingVRegs?
     bool isSeenInterference(LiveInterval *VReg) const;
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index b96575e..052abad 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -15,6 +15,7 @@
 #include "LiveRangeEdit.h"
 #include "VirtRegMap.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -24,6 +25,10 @@
 
 using namespace llvm;
 
+STATISTIC(NumDCEDeleted,     "Number of instructions deleted by DCE");
+STATISTIC(NumDCEFoldedLoads, "Number of single use loads folded after DCE");
+STATISTIC(NumFracRanges,     "Number of live ranges fractured by DCE");
+
 LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg,
                                         LiveIntervals &LIS,
                                         VirtRegMap &VRM) {
@@ -36,14 +41,16 @@ LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg,
   return LI;
 }
 
-void LiveRangeEdit::checkRematerializable(VNInfo *VNI,
+bool LiveRangeEdit::checkRematerializable(VNInfo *VNI,
                                           const MachineInstr *DefMI,
                                           const TargetInstrInfo &tii,
                                           AliasAnalysis *aa) {
   assert(DefMI && "Missing instruction");
-  if (tii.isTriviallyReMaterializable(DefMI, aa))
-    remattable_.insert(VNI);
   scannedRemattable_ = true;
+  if (!tii.isTriviallyReMaterializable(DefMI, aa))
+    return false;
+  remattable_.insert(VNI);
+  return true;
 }
 
 void LiveRangeEdit::scanRemattable(LiveIntervals &lis,
@@ -59,6 +66,7 @@ void LiveRangeEdit::scanRemattable(LiveIntervals &lis,
       continue;
     checkRematerializable(VNI, DefMI, tii, aa);
   }
+  scannedRemattable_ = true;
 }
 
 bool LiveRangeEdit::anyRematerializable(LiveIntervals &lis,
@@ -137,11 +145,13 @@ SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB,
                                          const Remat &RM,
                                          LiveIntervals &lis,
                                          const TargetInstrInfo &tii,
-                                         const TargetRegisterInfo &tri) {
+                                         const TargetRegisterInfo &tri,
+                                         bool Late) {
   assert(RM.OrigMI && "Invalid remat");
   tii.reMaterialize(MBB, MI, DestReg, 0, RM.OrigMI, tri);
   rematted_.insert(RM.ParentVNI);
-  return lis.InsertMachineInstrInMaps(--MI).getDefIndex();
+  return lis.getSlotIndexes()->insertMachineInstrInMaps(--MI, Late)
+           .getDefIndex();
 }
 
 void LiveRangeEdit::eraseVirtReg(unsigned Reg, LiveIntervals &LIS) {
@@ -194,6 +204,7 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
   UseMI->eraseFromParent();
   DefMI->addRegisterDead(LI->reg, 0);
   Dead.push_back(DefMI);
+  ++NumDCEFoldedLoads;
   return true;
 }
 
@@ -203,6 +214,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
   SetVector<LiveInterval*,
             SmallVector<LiveInterval*, 8>,
             SmallPtrSet<LiveInterval*, 8> > ToShrink;
+  MachineRegisterInfo &MRI = VRM.getRegInfo();
 
   for (;;) {
     // Erase all dead defs.
@@ -236,8 +248,13 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
           continue;
         LiveInterval &LI = LIS.getInterval(Reg);
 
-        // Shrink read registers.
-        if (MI->readsVirtualRegister(Reg))
+        // Shrink read registers, unless it is likely to be expensive and
+        // unlikely to change anything. We typically don't want to shrink the
+        // PIC base register that has lots of uses everywhere.
+        // Always shrink COPY uses that probably come from live range splitting.
+        if (MI->readsVirtualRegister(Reg) &&
+            (MI->isCopy() || MOI->isDef() || MRI.hasOneNonDBGUse(Reg) ||
+             LI.killedAt(Idx)))
           ToShrink.insert(&LI);
 
         // Remove defined value.
@@ -258,6 +275,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
         delegate_->LRE_WillEraseInstruction(MI);
       LIS.RemoveMachineInstrFromMaps(MI);
       MI->eraseFromParent();
+      ++NumDCEDeleted;
     }
 
     if (ToShrink.empty())
@@ -266,7 +284,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
     // Shrink just one live interval. Then delete new dead defs.
     LiveInterval *LI = ToShrink.back();
     ToShrink.pop_back();
-    if (foldAsLoad(LI, Dead, VRM.getRegInfo(), LIS, TII))
+    if (foldAsLoad(LI, Dead, MRI, LIS, TII))
       continue;
     if (delegate_)
       delegate_->LRE_WillShrinkVirtReg(LI->reg);
@@ -279,6 +297,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
     unsigned NumComp = ConEQ.Classify(LI);
     if (NumComp <= 1)
       continue;
+    ++NumFracRanges;
     DEBUG(dbgs() << NumComp << " components: " << *LI << '\n');
     SmallVector<LiveInterval*, 8> Dups(1, LI);
     for (unsigned i = 1; i != NumComp; ++i) {
@@ -286,7 +305,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
       if (delegate_)
         delegate_->LRE_DidCloneVirtReg(Dups.back()->reg, LI->reg);
     }
-    ConEQ.Distribute(&Dups[0], VRM.getRegInfo());
+    ConEQ.Distribute(&Dups[0], MRI);
   }
 }
 
diff --git a/lib/CodeGen/LiveRangeEdit.h b/lib/CodeGen/LiveRangeEdit.h
index 54a2c83..db6740c 100644
--- a/lib/CodeGen/LiveRangeEdit.h
+++ b/lib/CodeGen/LiveRangeEdit.h
@@ -18,8 +18,9 @@
 #ifndef LLVM_CODEGEN_LIVERANGEEDIT_H
 #define LLVM_CODEGEN_LIVERANGEEDIT_H
 
-#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/LiveInterval.h"
 
 namespace llvm {
 
@@ -113,6 +114,10 @@ public:
   bool empty() const { return size() == 0; }
   LiveInterval *get(unsigned idx) const { return newRegs_[idx+firstNew_]; }
 
+  ArrayRef<LiveInterval*> regs() const {
+    return ArrayRef<LiveInterval*>(newRegs_).slice(firstNew_);
+  }
+
   /// FIXME: Temporary accessors until we can get rid of
   /// LiveIntervals::AddIntervalsForSpills
   SmallVectorImpl<LiveInterval*> *getNewVRegs() { return &newRegs_; }
@@ -137,7 +142,7 @@ public:
 
   /// checkRematerializable - Manually add VNI to the list of rematerializable
   /// values if DefMI may be rematerializable.
-  void checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI,
+  bool checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI,
                              const TargetInstrInfo&, AliasAnalysis*);
 
   /// Remat - Information needed to rematerialize at a specific location.
@@ -165,7 +170,8 @@ public:
                             const Remat &RM,
                             LiveIntervals&,
                             const TargetInstrInfo&,
-                            const TargetRegisterInfo&);
+                            const TargetRegisterInfo&,
+                            bool Late = false);
 
   /// markRematerialized - explicitly mark a value as rematerialized after doing
   /// it manually.
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index ccbff0a..613f0c4 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -61,7 +61,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineBasicBlock &MBB) {
   return OS;
 }
 
-/// addNodeToList (MBB) - When an MBB is added to an MF, we need to update the 
+/// addNodeToList (MBB) - When an MBB is added to an MF, we need to update the
 /// parent pointer of the MBB, the MBB numbering, and any instructions in the
 /// MBB to be on the right operand list for registers.
 ///
@@ -93,7 +93,7 @@ void ilist_traits<MachineBasicBlock>::removeNodeFromList(MachineBasicBlock *N) {
 void ilist_traits<MachineInstr>::addNodeToList(MachineInstr *N) {
   assert(N->getParent() == 0 && "machine instruction already in a basic block");
   N->setParent(Parent);
-  
+
   // Add the instruction's register operands to their corresponding
   // use/def lists.
   MachineFunction *MF = Parent->getParent();
@@ -110,7 +110,7 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) {
 
   // Remove from the use/def lists.
   N->RemoveRegOperandsFromUseLists();
-  
+
   N->setParent(0);
 
   LeakDetector::addGarbageObject(N);
@@ -339,32 +339,70 @@ void MachineBasicBlock::updateTerminator() {
   }
 }
 
-void MachineBasicBlock::addSuccessor(MachineBasicBlock *succ) {
-  Successors.push_back(succ);
-  succ->addPredecessor(this);
-}
+void MachineBasicBlock::addSuccessor(MachineBasicBlock *succ, uint32_t weight) {
+
+  // If we see non-zero value for the first time it means we actually use Weight
+  // list, so we fill all Weights with 0's.
+  if (weight != 0 && Weights.empty())
+    Weights.resize(Successors.size());
+
+  if (weight != 0 || !Weights.empty())
+    Weights.push_back(weight);
+
+   Successors.push_back(succ);
+   succ->addPredecessor(this);
+ }
 
 void MachineBasicBlock::removeSuccessor(MachineBasicBlock *succ) {
   succ->removePredecessor(this);
   succ_iterator I = std::find(Successors.begin(), Successors.end(), succ);
   assert(I != Successors.end() && "Not a current successor!");
+
+  // If Weight list is empty it means we don't use it (disabled optimization).
+  if (!Weights.empty()) {
+    weight_iterator WI = getWeightIterator(I);
+    Weights.erase(WI);
+  }
+
   Successors.erase(I);
 }
 
-MachineBasicBlock::succ_iterator 
+MachineBasicBlock::succ_iterator
 MachineBasicBlock::removeSuccessor(succ_iterator I) {
   assert(I != Successors.end() && "Not a current successor!");
+
+  // If Weight list is empty it means we don't use it (disabled optimization).
+  if (!Weights.empty()) {
+    weight_iterator WI = getWeightIterator(I);
+    Weights.erase(WI);
+  }
+
   (*I)->removePredecessor(this);
   return Successors.erase(I);
 }
 
+void MachineBasicBlock::replaceSuccessor(MachineBasicBlock *Old,
+                                         MachineBasicBlock *New) {
+  uint32_t weight = 0;
+  succ_iterator SI = std::find(Successors.begin(), Successors.end(), Old);
+
+  // If Weight list is empty it means we don't use it (disabled optimization).
+  if (!Weights.empty()) {
+    weight_iterator WI = getWeightIterator(SI);
+    weight = *WI;
+  }
+
+  // Update the successor information.
+  removeSuccessor(SI);
+  addSuccessor(New, weight);
+}
+
 void MachineBasicBlock::addPredecessor(MachineBasicBlock *pred) {
   Predecessors.push_back(pred);
 }
 
 void MachineBasicBlock::removePredecessor(MachineBasicBlock *pred) {
-  std::vector<MachineBasicBlock *>::iterator I =
-    std::find(Predecessors.begin(), Predecessors.end(), pred);
+  pred_iterator I = std::find(Predecessors.begin(), Predecessors.end(), pred);
   assert(I != Predecessors.end() && "Pred is not a predecessor of this block!");
   Predecessors.erase(I);
 }
@@ -372,10 +410,17 @@ void MachineBasicBlock::removePredecessor(MachineBasicBlock *pred) {
 void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB) {
   if (this == fromMBB)
     return;
-  
+
   while (!fromMBB->succ_empty()) {
     MachineBasicBlock *Succ = *fromMBB->succ_begin();
-    addSuccessor(Succ);
+    uint32_t weight = 0;
+
+
+    // If Weight list is empty it means we don't use it (disabled optimization).
+    if (!fromMBB->Weights.empty())
+      weight = *fromMBB->Weights.begin();
+
+    addSuccessor(Succ, weight);
     fromMBB->removeSuccessor(Succ);
   }
 }
@@ -384,7 +429,7 @@ void
 MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) {
   if (this == fromMBB)
     return;
-  
+
   while (!fromMBB->succ_empty()) {
     MachineBasicBlock *Succ = *fromMBB->succ_begin();
     addSuccessor(Succ);
@@ -402,8 +447,7 @@ MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) {
 }
 
 bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const {
-  std::vector<MachineBasicBlock *>::const_iterator I =
-    std::find(Successors.begin(), Successors.end(), MBB);
+  const_succ_iterator I = std::find(Successors.begin(), Successors.end(), MBB);
   return I != Successors.end();
 }
 
@@ -487,6 +531,30 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
         << " -- BB#" << NMBB->getNumber()
         << " -- BB#" << Succ->getNumber() << '\n');
 
+  // On some targets like Mips, branches may kill virtual registers. Make sure
+  // that LiveVariables is properly updated after updateTerminator replaces the
+  // terminators.
+  LiveVariables *LV = P->getAnalysisIfAvailable<LiveVariables>();
+
+  // Collect a list of virtual registers killed by the terminators.
+  SmallVector<unsigned, 4> KilledRegs;
+  if (LV)
+    for (iterator I = getFirstTerminator(), E = end(); I != E; ++I) {
+      MachineInstr *MI = I;
+      for (MachineInstr::mop_iterator OI = MI->operands_begin(),
+           OE = MI->operands_end(); OI != OE; ++OI) {
+        if (!OI->isReg() || !OI->isUse() || !OI->isKill() || OI->isUndef())
+          continue;
+        unsigned Reg = OI->getReg();
+        if (TargetRegisterInfo::isVirtualRegister(Reg) &&
+            LV->getVarInfo(Reg).removeKill(MI)) {
+          KilledRegs.push_back(Reg);
+          DEBUG(dbgs() << "Removing terminator kill: " << *MI);
+          OI->setIsKill(false);
+        }
+      }
+    }
+
   ReplaceUsesOfBlockWith(Succ, NMBB);
   updateTerminator();
 
@@ -504,9 +572,22 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
       if (i->getOperand(ni+1).getMBB() == this)
         i->getOperand(ni+1).setMBB(NMBB);
 
-  if (LiveVariables *LV =
-        P->getAnalysisIfAvailable<LiveVariables>())
+  // Update LiveVariables.
+  if (LV) {
+    // Restore kills of virtual registers that were killed by the terminators.
+    while (!KilledRegs.empty()) {
+      unsigned Reg = KilledRegs.pop_back_val();
+      for (iterator I = end(), E = begin(); I != E;) {
+        if (!(--I)->addRegisterKilled(Reg, NULL, /* addIfNotFound= */ false))
+          continue;
+        LV->getVarInfo(Reg).Kills.push_back(I);
+        DEBUG(dbgs() << "Restored terminator kill: " << *I);
+        break;
+      }
+    }
+    // Update relevant live-through information.
     LV->addNewBlock(NMBB, this, Succ);
+  }
 
   if (MachineDominatorTree *MDT =
       P->getAnalysisIfAvailable<MachineDominatorTree>()) {
@@ -602,15 +683,14 @@ void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old,
   }
 
   // Update the successor information.
-  removeSuccessor(Old);
-  addSuccessor(New);
+  replaceSuccessor(Old, New);
 }
 
 /// CorrectExtraCFGEdges - Various pieces of code can cause excess edges in the
 /// CFG to be inserted.  If we have proven that MBB can only branch to DestA and
 /// DestB, remove any other MBB successors from the CFG.  DestA and DestB can be
 /// null.
-/// 
+///
 /// Besides DestA and DestB, retain other edges leading to LandingPads
 /// (currently there can be only one; we don't check or require that here).
 /// Note it is possible that DestA and/or DestB are LandingPads.
@@ -685,6 +765,23 @@ MachineBasicBlock::findDebugLoc(MachineBasicBlock::iterator &MBBI) {
   return DL;
 }
 
+/// getSuccWeight - Return weight of the edge from this block to MBB.
+///
+uint32_t MachineBasicBlock::getSuccWeight(MachineBasicBlock *succ) {
+  succ_iterator I = std::find(Successors.begin(), Successors.end(), succ);
+  return *getWeightIterator(I);
+}
+
+/// getWeightIterator - Return wight iterator corresonding to the I successor
+/// iterator
+MachineBasicBlock::weight_iterator MachineBasicBlock::
+getWeightIterator(MachineBasicBlock::succ_iterator I) {
+  assert(Weights.size() == Successors.size() && "Async weight list!");
+  size_t index = std::distance(Successors.begin(), I);
+  assert(index < Weights.size() && "Not a current successor!");
+  return Weights.begin() + index;
+}
+
 void llvm::WriteAsOperand(raw_ostream &OS, const MachineBasicBlock *MBB,
                           bool t) {
   OS << "BB#" << MBB->getNumber();
diff --git a/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/lib/CodeGen/MachineBranchProbabilityInfo.cpp
new file mode 100644
index 0000000..c13fa6b
--- /dev/null
+++ b/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -0,0 +1,113 @@
+//===- MachineBranchProbabilityInfo.cpp - Machine Branch Probability Info -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This analysis uses probability info stored in Machine Basic Blocks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Instructions.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+INITIALIZE_PASS_BEGIN(MachineBranchProbabilityInfo, "machine-branch-prob",
+                      "Machine Branch Probability Analysis", false, true)
+INITIALIZE_PASS_END(MachineBranchProbabilityInfo, "machine-branch-prob",
+                    "Machine Branch Probability Analysis", false, true)
+
+char MachineBranchProbabilityInfo::ID = 0;
+
+uint32_t MachineBranchProbabilityInfo::
+getSumForBlock(MachineBasicBlock *MBB) const {
+  uint32_t Sum = 0;
+
+  for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
+       E = MBB->succ_end(); I != E; ++I) {
+    MachineBasicBlock *Succ = *I;
+    uint32_t Weight = getEdgeWeight(MBB, Succ);
+    uint32_t PrevSum = Sum;
+
+    Sum += Weight;
+    assert(Sum > PrevSum); (void) PrevSum;
+  }
+
+  return Sum;
+}
+
+uint32_t
+MachineBranchProbabilityInfo::getEdgeWeight(MachineBasicBlock *Src,
+                                            MachineBasicBlock *Dst) const {
+  uint32_t Weight = Src->getSuccWeight(Dst);
+  if (!Weight)
+    return DEFAULT_WEIGHT;
+  return Weight;
+}
+
+bool MachineBranchProbabilityInfo::isEdgeHot(MachineBasicBlock *Src,
+                                             MachineBasicBlock *Dst) const {
+  // Hot probability is at least 4/5 = 80%
+  uint32_t Weight = getEdgeWeight(Src, Dst);
+  uint32_t Sum = getSumForBlock(Src);
+
+  // FIXME: Implement BranchProbability::compare then change this code to
+  // compare this BranchProbability against a static "hot" BranchProbability.
+  return (uint64_t)Weight * 5 > (uint64_t)Sum * 4;
+}
+
+MachineBasicBlock *
+MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const {
+  uint32_t Sum = 0;
+  uint32_t MaxWeight = 0;
+  MachineBasicBlock *MaxSucc = 0;
+
+  for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
+       E = MBB->succ_end(); I != E; ++I) {
+    MachineBasicBlock *Succ = *I;
+    uint32_t Weight = getEdgeWeight(MBB, Succ);
+    uint32_t PrevSum = Sum;
+
+    Sum += Weight;
+    assert(Sum > PrevSum); (void) PrevSum;
+
+    if (Weight > MaxWeight) {
+      MaxWeight = Weight;
+      MaxSucc = Succ;
+    }
+  }
+
+  // FIXME: Use BranchProbability::compare.
+  if ((uint64_t)MaxWeight * 5 >= (uint64_t)Sum * 4)
+    return MaxSucc;
+
+  return 0;
+}
+
+BranchProbability
+MachineBranchProbabilityInfo::getEdgeProbability(MachineBasicBlock *Src,
+                                                 MachineBasicBlock *Dst) const {
+  uint32_t N = getEdgeWeight(Src, Dst);
+  uint32_t D = getSumForBlock(Src);
+
+  return BranchProbability(N, D);
+}
+
+raw_ostream &MachineBranchProbabilityInfo::
+printEdgeProbability(raw_ostream &OS, MachineBasicBlock *Src,
+                     MachineBasicBlock *Dst) const {
+
+  const BranchProbability Prob = getEdgeProbability(Src, Dst);
+  OS << "edge MBB#" << Src->getNumber() << " -> MBB#" << Dst->getNumber()
+     << " probability is "  << Prob 
+     << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n");
+
+  return OS;
+}
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 07a7d27..f97ccf6 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -365,6 +365,8 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
     if (!FoundCSE) {
       // Look for trivial copy coalescing opportunities.
       if (PerformTrivialCoalescing(MI, MBB)) {
+        Changed = true;
+
         // After coalescing MI itself may become a copy.
         if (MI->isCopyLike())
           continue;
@@ -379,10 +381,11 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
       if (NewMI) {
         Commuted = true;
         FoundCSE = VNT.count(NewMI);
-        if (NewMI != MI)
+        if (NewMI != MI) {
           // New instruction. It doesn't need to be kept.
           NewMI->eraseFromParent();
-        else if (!FoundCSE)
+          Changed = true;
+        } else if (!FoundCSE)
           // MI was changed but it didn't help, commute it back!
           (void)TII->commuteInstruction(MI);
       }
@@ -450,6 +453,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
         ++NumPhysCSEs;
       if (Commuted)
         ++NumCommutes;
+      Changed = true;
     } else {
       DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
       VNT.insert(MI, CurrVN++);
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index d81e4a1..50750a5 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -65,7 +65,11 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
     FrameInfo->setMaxAlignment(Attribute::getStackAlignmentFromAttrs(
         Fn->getAttributes().getFnAttributes()));
   ConstantPool = new (Allocator) MachineConstantPool(TM.getTargetData());
-  Alignment = TM.getTargetLowering()->getFunctionAlignment(F);
+  Alignment = TM.getTargetLowering()->getMinFunctionAlignment();
+  // FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn.
+  if (!Fn->hasFnAttr(Attribute::OptimizeForSize))
+    Alignment = std::max(Alignment,
+                         TM.getTargetLowering()->getPrefFunctionAlignment());
   FunctionNumber = FunctionNum;
   JumpTableInfo = 0;
 }
@@ -300,31 +304,19 @@ void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const {
     OS << "Function Live Ins: ";
     for (MachineRegisterInfo::livein_iterator
          I = RegInfo->livein_begin(), E = RegInfo->livein_end(); I != E; ++I) {
-      if (TRI)
-        OS << "%" << TRI->getName(I->first);
-      else
-        OS << " %physreg" << I->first;
-      
+      OS << PrintReg(I->first, TRI);
       if (I->second)
-        OS << " in reg%" << I->second;
-
+        OS << " in " << PrintReg(I->second, TRI);
       if (llvm::next(I) != E)
         OS << ", ";
     }
     OS << '\n';
   }
   if (RegInfo && !RegInfo->liveout_empty()) {
-    OS << "Function Live Outs: ";
+    OS << "Function Live Outs:";
     for (MachineRegisterInfo::liveout_iterator
-         I = RegInfo->liveout_begin(), E = RegInfo->liveout_end(); I != E; ++I){
-      if (TRI)
-        OS << '%' << TRI->getName(*I);
-      else
-        OS << "%physreg" << *I;
-
-      if (llvm::next(I) != E)
-        OS << " ";
-    }
+         I = RegInfo->liveout_begin(), E = RegInfo->liveout_end(); I != E; ++I)
+      OS << ' ' << PrintReg(*I, TRI);
     OS << '\n';
   }
   
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 0d137eb..36b0b83 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -125,7 +125,8 @@ void MachineOperand::substPhysReg(unsigned Reg, const TargetRegisterInfo &TRI) {
   assert(TargetRegisterInfo::isPhysicalRegister(Reg));
   if (getSubReg()) {
     Reg = TRI.getSubReg(Reg, getSubReg());
-    assert(Reg && "Invalid SubReg for physical register");
+    // Note that getSubReg() may return 0 if the sub-register doesn't exist.
+    // That won't happen in legal code.
     setSubReg(0);
   }
   setReg(Reg);
@@ -441,6 +442,10 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) {
     OS << ")";
   }
 
+  // Print nontemporal info.
+  if (MMO.isNonTemporal())
+    OS << "(nontemporal)";
+
   return OS;
 }
 
@@ -759,19 +764,35 @@ bool MachineInstr::isIdenticalTo(const MachineInstr *Other,
   for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = getOperand(i);
     const MachineOperand &OMO = Other->getOperand(i);
+    if (!MO.isReg()) {
+      if (!MO.isIdenticalTo(OMO))
+        return false;
+      continue;
+    }
+
     // Clients may or may not want to ignore defs when testing for equality.
     // For example, machine CSE pass only cares about finding common
     // subexpressions, so it's safe to ignore virtual register defs.
-    if (Check != CheckDefs && MO.isReg() && MO.isDef()) {
+    if (MO.isDef()) {
       if (Check == IgnoreDefs)
         continue;
-      // Check == IgnoreVRegDefs
-      if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) ||
-          TargetRegisterInfo::isPhysicalRegister(OMO.getReg()))
-        if (MO.getReg() != OMO.getReg())
+      else if (Check == IgnoreVRegDefs) {
+        if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) ||
+            TargetRegisterInfo::isPhysicalRegister(OMO.getReg()))
+          if (MO.getReg() != OMO.getReg())
+            return false;
+      } else {
+        if (!MO.isIdenticalTo(OMO))
           return false;
-    } else if (!MO.isIdenticalTo(OMO))
-      return false;
+        if (Check == CheckKillDead && MO.isDead() != OMO.isDead())
+          return false;
+      }
+    } else {
+      if (!MO.isIdenticalTo(OMO))
+        return false;
+      if (Check == CheckKillDead && MO.isKill() != OMO.isKill())
+        return false;
+    }
   }
   return true;
 }
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 1c0f6ad..b315702 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -39,7 +39,6 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-
 using namespace llvm;
 
 STATISTIC(NumHoisted,
@@ -169,6 +168,10 @@ namespace {
     /// 
     bool IsLoopInvariantInst(MachineInstr &I);
 
+    /// HasAnyPHIUse - Return true if the specified register is used by any
+    /// phi node.
+    bool HasAnyPHIUse(unsigned Reg) const;
+
     /// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
     /// and an use in the current loop, return true if the target considered
     /// it 'high'.
@@ -758,18 +761,25 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
 }
 
 
-/// HasPHIUses - Return true if the specified register has any PHI use.
-static bool HasPHIUses(unsigned Reg, MachineRegisterInfo *MRI) {
+/// HasAnyPHIUse - Return true if the specified register is used by any
+/// phi node.
+bool MachineLICM::HasAnyPHIUse(unsigned Reg) const {
   for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
          UE = MRI->use_end(); UI != UE; ++UI) {
     MachineInstr *UseMI = &*UI;
     if (UseMI->isPHI())
       return true;
+    // Look pass copies as well.
+    if (UseMI->isCopy()) {
+      unsigned Def = UseMI->getOperand(0).getReg();
+      if (TargetRegisterInfo::isVirtualRegister(Def) &&
+          HasAnyPHIUse(Def))
+        return true;
+    }
   }
   return false;
 }
 
-
 /// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
 /// and an use in the current loop, return true if the target considered
 /// it 'high'.
@@ -976,14 +986,13 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
       return false;
   }
 
-  // If result(s) of this instruction is used by PHIs, then don't hoist it.
-  // The presence of joins makes it difficult for current register allocator
-  // implementation to perform remat.
+  // If result(s) of this instruction is used by PHIs outside of the loop, then
+  // don't hoist it if the instruction because it will introduce an extra copy.
   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI.getOperand(i);
     if (!MO.isReg() || !MO.isDef())
       continue;
-    if (HasPHIUses(MO.getReg(), MRI))
+    if (HasAnyPHIUse(MO.getReg()))
       return false;
   }
 
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 7244d5f..08ff5bb 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -79,6 +79,8 @@ MachineRegisterInfo::constrainRegClass(unsigned Reg,
 unsigned
 MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){
   assert(RegClass && "Cannot create register without RegClass!");
+  assert(RegClass->isAllocatable() &&
+         "Virtual register RegClass must be allocatable.");
 
   // New virtual register number.
   unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs());
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index 8a93a24..916dff7 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -265,8 +265,11 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
     if (MI->isDebugValue())
       continue;
 
-    if (PerformTrivialForwardCoalescing(MI, &MBB))
+    bool Joined = PerformTrivialForwardCoalescing(MI, &MBB);
+    if (Joined) {
+      MadeChange = true;
       continue;
+    }
 
     if (SinkInstruction(MI, SawStore))
       ++NumSunk, MadeChange = true;
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index f95f411..471463b 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -23,6 +23,7 @@
 // the verifier errors.
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Instructions.h"
 #include "llvm/Function.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveVariables.h"
@@ -32,6 +33,7 @@
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
@@ -394,7 +396,13 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
     if ((*I)->isLandingPad())
       LandingPadSuccs.insert(*I);
   }
-  if (LandingPadSuccs.size() > 1)
+
+  const MCAsmInfo *AsmInfo = TM->getMCAsmInfo();
+  const BasicBlock *BB = MBB->getBasicBlock();
+  if (LandingPadSuccs.size() > 1 &&
+      !(AsmInfo &&
+        AsmInfo->getExceptionHandlingType() == ExceptionHandling::SjLj &&
+        BB && isa<SwitchInst>(BB->getTerminator())))
     report("MBB has more than one landing pad successor", MBB);
 
   // Call AnalyzeBranch. If it succeeds, there several more conditions to check.
@@ -402,11 +410,6 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
   SmallVector<MachineOperand, 4> Cond;
   if (!TII->AnalyzeBranch(*const_cast<MachineBasicBlock *>(MBB),
                           TBB, FBB, Cond)) {
-    // If the block branches directly to a landing pad successor, pretend that
-    // the landing pad is a normal block.
-    LandingPadSuccs.erase(TBB);
-    LandingPadSuccs.erase(FBB);
-
     // Ok, AnalyzeBranch thinks it knows what's going on with this block. Let's
     // check whether its answers match up with reality.
     if (!TBB && !FBB) {
@@ -741,7 +744,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
           RC = SRC;
         }
         if (const TargetRegisterClass *DRC = TOI.getRegClass(TRI)) {
-          if (RC != DRC && !RC->hasSuperClass(DRC)) {
+          if (!RC->hasSuperClassEq(DRC)) {
             report("Illegal virtual register for instruction", MO, MONum);
             *OS << "Expected a " << DRC->getName() << " register, but got a "
                 << RC->getName() << " register\n";
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index 9fd5b0e..af65f13 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -32,7 +32,6 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include <algorithm>
-#include <map>
 using namespace llvm;
 
 static cl::opt<bool>
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index 3489db2..315aedd 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -55,6 +55,11 @@ FunctionPass *llvm::createRegisterAllocator(CodeGenOpt::Level OptLevel) {
     RegisterRegAlloc::setDefault(RegAlloc);
   }
 
+  // This forces linking of the linear scan register allocator,
+  // so -regalloc=linearscan still works in clang.
+  if (Ctor == createLinearScanRegisterAllocator)
+    return createLinearScanRegisterAllocator();
+
   if (Ctor != createDefaultRegisterAllocator)
     return Ctor();
 
@@ -63,6 +68,6 @@ FunctionPass *llvm::createRegisterAllocator(CodeGenOpt::Level OptLevel) {
   case CodeGenOpt::None:
     return createFastRegisterAllocator();
   default:
-    return createLinearScanRegisterAllocator();
+    return createGreedyRegisterAllocator();
   }
 }
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index 60c24b7..982a2a5 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -22,6 +22,7 @@
 #include "AntiDepBreaker.h"
 #include "AggressiveAntiDepBreaker.h"
 #include "CriticalAntiDepBreaker.h"
+#include "RegisterClassInfo.h"
 #include "ScheduleDAGInstrs.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/LatencyPriorityQueue.h"
@@ -80,6 +81,7 @@ namespace {
   class PostRAScheduler : public MachineFunctionPass {
     AliasAnalysis *AA;
     const TargetInstrInfo *TII;
+    RegisterClassInfo RegClassInfo;
     CodeGenOpt::Level OptLevel;
 
   public:
@@ -135,7 +137,8 @@ namespace {
   public:
     SchedulePostRATDList(
       MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
-      AliasAnalysis *AA, TargetSubtarget::AntiDepBreakMode AntiDepMode,
+      AliasAnalysis *AA, const RegisterClassInfo&,
+      TargetSubtarget::AntiDepBreakMode AntiDepMode,
       SmallVectorImpl<TargetRegisterClass*> &CriticalPathRCs);
 
     ~SchedulePostRATDList();
@@ -179,7 +182,8 @@ namespace {
 
 SchedulePostRATDList::SchedulePostRATDList(
   MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
-  AliasAnalysis *AA, TargetSubtarget::AntiDepBreakMode AntiDepMode,
+  AliasAnalysis *AA, const RegisterClassInfo &RCI,
+  TargetSubtarget::AntiDepBreakMode AntiDepMode,
   SmallVectorImpl<TargetRegisterClass*> &CriticalPathRCs)
   : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), AA(AA),
     KillIndices(TRI->getNumRegs())
@@ -190,9 +194,9 @@ SchedulePostRATDList::SchedulePostRATDList(
     TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins, this);
   AntiDepBreak =
     ((AntiDepMode == TargetSubtarget::ANTIDEP_ALL) ?
-     (AntiDepBreaker *)new AggressiveAntiDepBreaker(MF, CriticalPathRCs) :
+     (AntiDepBreaker *)new AggressiveAntiDepBreaker(MF, RCI, CriticalPathRCs) :
      ((AntiDepMode == TargetSubtarget::ANTIDEP_CRITICAL) ?
-      (AntiDepBreaker *)new CriticalAntiDepBreaker(MF) : NULL));
+      (AntiDepBreaker *)new CriticalAntiDepBreaker(MF, RCI) : NULL));
 }
 
 SchedulePostRATDList::~SchedulePostRATDList() {
@@ -205,6 +209,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
   MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
   MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
   AliasAnalysis *AA = &getAnalysis<AliasAnalysis>();
+  RegClassInfo.runOnMachineFunction(Fn);
 
   // Check for explicit enable/disable of post-ra scheduling.
   TargetSubtarget::AntiDepBreakMode AntiDepMode = TargetSubtarget::ANTIDEP_NONE;
@@ -230,7 +235,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
 
   DEBUG(dbgs() << "PostRAScheduler\n");
 
-  SchedulePostRATDList Scheduler(Fn, MLI, MDT, AA, AntiDepMode,
+  SchedulePostRATDList Scheduler(Fn, MLI, MDT, AA, RegClassInfo, AntiDepMode,
                                  CriticalPathRCs);
 
   // Loop over all of the basic blocks
@@ -304,7 +309,7 @@ void SchedulePostRATDList::Schedule() {
   if (AntiDepBreak != NULL) {
     unsigned Broken =
       AntiDepBreak->BreakAntiDependencies(SUnits, Begin, InsertPos,
-                                          InsertPosIndex);
+                                          InsertPosIndex, DbgValues);
 
     if (Broken != 0) {
       // We made changes. Update the dependency graph.
@@ -540,10 +545,16 @@ void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) {
 #endif
   --SuccSU->NumPredsLeft;
 
-  // Compute how many cycles it will be before this actually becomes
-  // available.  This is the max of the start time of all predecessors plus
-  // their latencies.
-  SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency());
+  // Standard scheduler algorithms will recompute the depth of the successor
+  // here as such:
+  //   SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency());
+  //
+  // However, we lazily compute node depth instead. Note that
+  // ScheduleNodeTopDown has already updated the depth of this node which causes
+  // all descendents to be marked dirty. Setting the successor depth explicitly
+  // here would cause depth to be recomputed for all its ancestors. If the
+  // successor is not yet ready (because of a transitively redundant edge) then
+  // this causes depth computation to be quadratic in the size of the DAG.
 
   // If all the node's predecessors are scheduled, this node is ready
   // to be scheduled. Ignore the special ExitSU node.
@@ -655,6 +666,12 @@ void SchedulePostRATDList::ListScheduleTopDown() {
       ScheduleNodeTopDown(FoundSUnit, CurCycle);
       HazardRec->EmitInstruction(FoundSUnit);
       CycleHasInsts = true;
+      if (HazardRec->atIssueLimit()) {
+        DEBUG(dbgs() << "*** Max instructions per cycle " << CurCycle << '\n');
+        HazardRec->AdvanceCycle();
+        ++CurCycle;
+        CycleHasInsts = false;
+      }
     } else {
       if (CycleHasInsts) {
         DEBUG(dbgs() << "*** Finished cycle " << CurCycle << '\n');
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 92e25e1..f1f3c99 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -337,7 +337,7 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
         --BeforeI;
 
       // Restore all registers immediately before the return and any
-      // terminators that preceed it.
+      // terminators that precede it.
       if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) {
         for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
           unsigned Reg = CSI[i].getReg();
@@ -437,7 +437,7 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
       --BeforeI;
 
     // Restore all registers immediately before the return and any
-    // terminators that preceed it.
+    // terminators that precede it.
     for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) {
       unsigned Reg = blockCSI[i].getReg();
       const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
diff --git a/lib/CodeGen/README.txt b/lib/CodeGen/README.txt
index b655dda..7f75f65 100644
--- a/lib/CodeGen/README.txt
+++ b/lib/CodeGen/README.txt
@@ -26,7 +26,7 @@ and then "merge" mul and mov:
         sxth r3, r3
         mla r4, r3, lr, r4
 
-It also increase the likelyhood the store may become dead.
+It also increase the likelihood the store may become dead.
 
 //===---------------------------------------------------------------------===//
 
@@ -162,7 +162,7 @@ synthesize the various copy insertion/inspection methods in TargetInstrInfo.
 
 //===---------------------------------------------------------------------===//
 
-Stack coloring improvments:
+Stack coloring improvements:
 
 1. Do proper LiveStackAnalysis on all stack objects including those which are
    not spill slots.
diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h
index f431d5a..0316421 100644
--- a/lib/CodeGen/RegAllocBase.h
+++ b/lib/CodeGen/RegAllocBase.h
@@ -39,6 +39,7 @@
 
 #include "llvm/ADT/OwningPtr.h"
 #include "LiveIntervalUnion.h"
+#include "RegisterClassInfo.h"
 
 namespace llvm {
 
@@ -91,6 +92,7 @@ protected:
   MachineRegisterInfo *MRI;
   VirtRegMap *VRM;
   LiveIntervals *LIS;
+  RegisterClassInfo RegClassInfo;
   LiveUnionArray PhysReg2LiveUnion;
 
   // Current queries, one per physreg. They must be reinitialized each time we
@@ -113,6 +115,10 @@ protected:
     return Queries[PhysReg];
   }
 
+  // Invalidate all cached information about virtual registers - live ranges may
+  // have changed.
+  void invalidateVirtRegs() { ++UserTag; }
+
   // The top-level driver. The output is a VirtRegMap that us updated with
   // physical register assignments.
   //
diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp
index 0e218a7..1d77b29 100644
--- a/lib/CodeGen/RegAllocBasic.cpp
+++ b/lib/CodeGen/RegAllocBasic.cpp
@@ -13,10 +13,10 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "regalloc"
+#include "RegAllocBase.h"
 #include "LiveDebugVariables.h"
 #include "LiveIntervalUnion.h"
 #include "LiveRangeEdit.h"
-#include "RegAllocBase.h"
 #include "RenderMachineFunction.h"
 #include "Spiller.h"
 #include "VirtRegMap.h"
@@ -85,7 +85,6 @@ class RABasic : public MachineFunctionPass, public RegAllocBase
 {
   // context
   MachineFunction *MF;
-  BitVector ReservedRegs;
 
   // analyses
   LiveStacks *LS;
@@ -235,9 +234,14 @@ void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis) {
   MRI = &vrm.getRegInfo();
   VRM = &vrm;
   LIS = &lis;
-  PhysReg2LiveUnion.init(UnionAllocator, TRI->getNumRegs());
-  // Cache an interferece query for each physical reg
-  Queries.reset(new LiveIntervalUnion::Query[PhysReg2LiveUnion.numRegs()]);
+  RegClassInfo.runOnMachineFunction(vrm.getMachineFunction());
+
+  const unsigned NumRegs = TRI->getNumRegs();
+  if (NumRegs != PhysReg2LiveUnion.numRegs()) {
+    PhysReg2LiveUnion.init(UnionAllocator, NumRegs);
+    // Cache an interferece query for each physical reg
+    Queries.reset(new LiveIntervalUnion::Query[PhysReg2LiveUnion.numRegs()]);
+  }
 }
 
 void RegAllocBase::LiveUnionArray::clear() {
@@ -251,13 +255,15 @@ void RegAllocBase::LiveUnionArray::clear() {
 }
 
 void RegAllocBase::releaseMemory() {
-  PhysReg2LiveUnion.clear();
+  for (unsigned r = 0, e = PhysReg2LiveUnion.numRegs(); r != e; ++r)
+    PhysReg2LiveUnion[r].clear();
 }
 
 // Visit all the live registers. If they are already assigned to a physical
 // register, unify them with the corresponding LiveIntervalUnion, otherwise push
 // them on the priority queue for later assignment.
 void RegAllocBase::seedLiveRegs() {
+  NamedRegionTimer T("Seed Live Regs", TimerGroupName, TimePassesIsEnabled);
   for (LiveIntervals::iterator I = LIS->begin(), E = LIS->end(); I != E; ++I) {
     unsigned RegNum = I->first;
     LiveInterval &VirtReg = *I->second;
@@ -273,6 +279,7 @@ void RegAllocBase::assign(LiveInterval &VirtReg, unsigned PhysReg) {
                << " to " << PrintReg(PhysReg, TRI) << '\n');
   assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment");
   VRM->assignVirt2Phys(VirtReg.reg, PhysReg);
+  MRI->setPhysRegUsed(PhysReg);
   PhysReg2LiveUnion[PhysReg].unify(VirtReg);
   ++NumAssigned;
 }
@@ -303,7 +310,7 @@ void RegAllocBase::allocatePhysRegs() {
     }
 
     // Invalidate all interference queries, live ranges could have changed.
-    ++UserTag;
+    invalidateVirtRegs();
 
     // selectOrSplit requests the allocator to return an available physical
     // register if possible and populate a list of new live intervals that
@@ -315,6 +322,23 @@ void RegAllocBase::allocatePhysRegs() {
     VirtRegVec SplitVRegs;
     unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs);
 
+    if (AvailablePhysReg == ~0u) {
+      // selectOrSplit failed to find a register!
+      std::string msg;
+      raw_string_ostream Msg(msg);
+      Msg << "Ran out of registers during register allocation!"
+             "\nCannot allocate: " << *VirtReg;
+      for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(VirtReg->reg);
+      MachineInstr *MI = I.skipInstruction();) {
+        if (!MI->isInlineAsm())
+          continue;
+        Msg << "\nPlease check your inline asm statement for "
+          "invalid constraints:\n";
+        MI->print(Msg, &VRM->getMachineFunction().getTarget());
+      }
+      report_fatal_error(Msg.str());
+    }
+
     if (AvailablePhysReg)
       assign(*VirtReg, AvailablePhysReg);
 
@@ -404,29 +428,31 @@ RegAllocBase::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
 // Add newly allocated physical registers to the MBB live in sets.
 void RegAllocBase::addMBBLiveIns(MachineFunction *MF) {
   NamedRegionTimer T("MBB Live Ins", TimerGroupName, TimePassesIsEnabled);
-  typedef SmallVector<MachineBasicBlock*, 8> MBBVec;
-  MBBVec liveInMBBs;
-  MachineBasicBlock &entryMBB = *MF->begin();
+  SlotIndexes *Indexes = LIS->getSlotIndexes();
+  if (MF->size() <= 1)
+    return;
 
+  LiveIntervalUnion::SegmentIter SI;
   for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) {
     LiveIntervalUnion &LiveUnion = PhysReg2LiveUnion[PhysReg];
     if (LiveUnion.empty())
       continue;
-    for (LiveIntervalUnion::SegmentIter SI = LiveUnion.begin(); SI.valid();
-         ++SI) {
-
-      // Find the set of basic blocks which this range is live into...
-      liveInMBBs.clear();
-      if (!LIS->findLiveInMBBs(SI.start(), SI.stop(), liveInMBBs)) continue;
-
-      // And add the physreg for this interval to their live-in sets.
-      for (MBBVec::iterator I = liveInMBBs.begin(), E = liveInMBBs.end();
-           I != E; ++I) {
-        MachineBasicBlock *MBB = *I;
-        if (MBB == &entryMBB) continue;
-        if (MBB->isLiveIn(PhysReg)) continue;
-        MBB->addLiveIn(PhysReg);
-      }
+    MachineFunction::iterator MBB = llvm::next(MF->begin());
+    MachineFunction::iterator MFE = MF->end();
+    SlotIndex Start, Stop;
+    tie(Start, Stop) = Indexes->getMBBRange(MBB);
+    SI.setMap(LiveUnion.getMap());
+    SI.find(Start);
+    while (SI.valid()) {
+      if (SI.start() <= Start) {
+        if (!MBB->isLiveIn(PhysReg))
+          MBB->addLiveIn(PhysReg);
+      } else if (SI.start() > Stop)
+        MBB = Indexes->getMBBFromIndex(SI.start().getPrevIndex());
+      if (++MBB == MFE)
+        break;
+      tie(Start, Stop) = Indexes->getMBBRange(MBB);
+      SI.advanceTo(Start);
     }
   }
 }
@@ -454,14 +480,11 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
   SmallVector<unsigned, 8> PhysRegSpillCands;
 
   // Check for an available register in this class.
-  const TargetRegisterClass *TRC = MRI->getRegClass(VirtReg.reg);
-
-  for (TargetRegisterClass::iterator I = TRC->allocation_order_begin(*MF),
-         E = TRC->allocation_order_end(*MF);
-       I != E; ++I) {
-
+  ArrayRef<unsigned> Order =
+    RegClassInfo.getOrder(MRI->getRegClass(VirtReg.reg));
+  for (ArrayRef<unsigned>::iterator I = Order.begin(), E = Order.end(); I != E;
+       ++I) {
     unsigned PhysReg = *I;
-    if (ReservedRegs.test(PhysReg)) continue;
 
     // Check interference and as a side effect, intialize queries for this
     // VirtReg and its aliases.
@@ -490,8 +513,11 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
     // Tell the caller to allocate to this newly freed physical register.
     return *PhysRegI;
   }
+
   // No other spill candidates were found, so spill the current VirtReg.
   DEBUG(dbgs() << "spilling: " << VirtReg << '\n');
+  if (!VirtReg.isSpillable())
+    return ~0u;
   LiveRangeEdit LRE(VirtReg, SplitVRegs);
   spiller().spill(LRE);
 
@@ -509,9 +535,6 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) {
   DEBUG(RMF = &getAnalysis<RenderMachineFunction>());
 
   RegAllocBase::init(getAnalysis<VirtRegMap>(), getAnalysis<LiveIntervals>());
-
-  ReservedRegs = TRI->getReservedRegs(*MF);
-
   SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
 
   allocatePhysRegs();
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index 15036e3..65ebdf8 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "regalloc"
+#include "RegisterClassInfo.h"
 #include "llvm/BasicBlock.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -58,6 +59,7 @@ namespace {
     MachineRegisterInfo *MRI;
     const TargetRegisterInfo *TRI;
     const TargetInstrInfo *TII;
+    RegisterClassInfo RegClassInfo;
 
     // Basic block currently being allocated.
     MachineBasicBlock *MBB;
@@ -97,7 +99,7 @@ namespace {
       // immediately without checking aliases.
       regFree,
 
-      // A reserved register has been assigned expolicitly (e.g., setting up a
+      // A reserved register has been assigned explicitly (e.g., setting up a
       // call parameter), and it remains reserved until it is used.
       regReserved
 
@@ -113,9 +115,6 @@ namespace {
     // instruction, and so cannot be allocated.
     BitVector UsedInInstr;
 
-    // Allocatable - vector of allocatable physical registers.
-    BitVector Allocatable;
-
     // SkippedInstrs - Descriptors of instructions whose clobber list was
     // ignored because all registers were spilled. It is still necessary to
     // mark all the clobbered registers as used by the function.
@@ -396,7 +395,6 @@ void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg,
   PhysRegState[PhysReg] = NewState;
   for (const unsigned *AS = TRI->getAliasSet(PhysReg);
        unsigned Alias = *AS; ++AS) {
-    UsedInInstr.set(Alias);
     switch (unsigned VirtReg = PhysRegState[Alias]) {
     case regDisabled:
       break;
@@ -420,20 +418,25 @@ void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg,
 // can be allocated directly.
 // Returns spillImpossible when PhysReg or an alias can't be spilled.
 unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
-  if (UsedInInstr.test(PhysReg))
+  if (UsedInInstr.test(PhysReg)) {
+    DEBUG(dbgs() << "PhysReg: " << PhysReg << " is already used in instr.\n");
     return spillImpossible;
+  }
   switch (unsigned VirtReg = PhysRegState[PhysReg]) {
   case regDisabled:
     break;
   case regFree:
     return 0;
   case regReserved:
+    DEBUG(dbgs() << "VirtReg: " << VirtReg << " corresponding to PhysReg: "
+          << PhysReg << " is reserved already.\n");
     return spillImpossible;
   default:
     return LiveVirtRegs.lookup(VirtReg).Dirty ? spillDirty : spillClean;
   }
 
-  // This is a disabled register, add up const of aliases.
+  // This is a disabled register, add up cost of aliases.
+  DEBUG(dbgs() << "\tRegister: " << PhysReg << " is disabled.\n");
   unsigned Cost = 0;
   for (const unsigned *AS = TRI->getAliasSet(PhysReg);
        unsigned Alias = *AS; ++AS) {
@@ -479,30 +482,26 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) {
 
   // Ignore invalid hints.
   if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) ||
-               !RC->contains(Hint) || !Allocatable.test(Hint)))
+               !RC->contains(Hint) || !RegClassInfo.isAllocatable(Hint)))
     Hint = 0;
 
   // Take hint when possible.
   if (Hint) {
-    switch(calcSpillCost(Hint)) {
-    default:
-      definePhysReg(MI, Hint, regFree);
-      // Fall through.
-    case 0:
+    // Ignore the hint if we would have to spill a dirty register.
+    unsigned Cost = calcSpillCost(Hint);
+    if (Cost < spillDirty) {
+      if (Cost)
+        definePhysReg(MI, Hint, regFree);
       return assignVirtToPhysReg(LRE, Hint);
-    case spillImpossible:
-      break;
     }
   }
 
-  TargetRegisterClass::iterator AOB = RC->allocation_order_begin(*MF);
-  TargetRegisterClass::iterator AOE = RC->allocation_order_end(*MF);
+  ArrayRef<unsigned> AO = RegClassInfo.getOrder(RC);
 
   // First try to find a completely free register.
-  for (TargetRegisterClass::iterator I = AOB; I != AOE; ++I) {
+  for (ArrayRef<unsigned>::iterator I = AO.begin(), E = AO.end(); I != E; ++I) {
     unsigned PhysReg = *I;
-    if (PhysRegState[PhysReg] == regFree && !UsedInInstr.test(PhysReg) &&
-        Allocatable.test(PhysReg))
+    if (PhysRegState[PhysReg] == regFree && !UsedInInstr.test(PhysReg))
       return assignVirtToPhysReg(LRE, PhysReg);
   }
 
@@ -510,10 +509,11 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) {
                << RC->getName() << "\n");
 
   unsigned BestReg = 0, BestCost = spillImpossible;
-  for (TargetRegisterClass::iterator I = AOB; I != AOE; ++I) {
-    if (!Allocatable.test(*I))
-      continue;
+  for (ArrayRef<unsigned>::iterator I = AO.begin(), E = AO.end(); I != E; ++I) {
     unsigned Cost = calcSpillCost(*I);
+    DEBUG(dbgs() << "\tRegister: " << *I << "\n");
+    DEBUG(dbgs() << "\tCost: " << Cost << "\n");
+    DEBUG(dbgs() << "\tBestCost: " << BestCost << "\n");
     // Cost is 0 when all aliases are already disabled.
     if (Cost == 0)
       return assignVirtToPhysReg(LRE, *I);
@@ -722,9 +722,8 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
     if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue;
     unsigned Reg = MO.getReg();
     if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+    DEBUG(dbgs() << "\tSetting reg " << Reg << " as used in instr\n");
     UsedInInstr.set(Reg);
-    for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
-      UsedInInstr.set(*AS);
   }
 
   // Also mark PartialDefs as used to avoid reallocation.
@@ -764,7 +763,7 @@ void RAFast::AllocateBasicBlock() {
   // Add live-in registers as live.
   for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
          E = MBB->livein_end(); I != E; ++I)
-    if (Allocatable.test(*I))
+    if (RegClassInfo.isAllocatable(*I))
       definePhysReg(MII, *I, regReserved);
 
   SmallVector<unsigned, 8> VirtDead;
@@ -895,7 +894,7 @@ void RAFast::AllocateBasicBlock() {
         }
         continue;
       }
-      if (!Allocatable.test(Reg)) continue;
+      if (!RegClassInfo.isAllocatable(Reg)) continue;
       if (MO.isUse()) {
         usePhysReg(MO);
       } else if (MO.isEarlyClobber()) {
@@ -984,7 +983,7 @@ void RAFast::AllocateBasicBlock() {
       unsigned Reg = MO.getReg();
 
       if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
-        if (!Allocatable.test(Reg)) continue;
+        if (!RegClassInfo.isAllocatable(Reg)) continue;
         definePhysReg(MI, Reg, (MO.isImplicit() || MO.isDead()) ?
                                regFree : regReserved);
         continue;
@@ -1040,9 +1039,8 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
   TM = &Fn.getTarget();
   TRI = TM->getRegisterInfo();
   TII = TM->getInstrInfo();
-
+  RegClassInfo.runOnMachineFunction(Fn);
   UsedInInstr.resize(TRI->getNumRegs());
-  Allocatable = TRI->getAllocatableSet(*MF);
 
   // initialize the virtual->physical register map to have a 'null'
   // mapping for all virtual registers
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index 889bca3..8d06325 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -62,7 +62,6 @@ class RAGreedy : public MachineFunctionPass,
 
   // context
   MachineFunction *MF;
-  BitVector ReservedRegs;
 
   // analyses
   SlotIndexes *Indexes;
@@ -72,6 +71,7 @@ class RAGreedy : public MachineFunctionPass,
   MachineLoopRanges *LoopRanges;
   EdgeBundles *Bundles;
   SpillPlacement *SpillPlacer;
+  LiveDebugVariables *DebugVars;
 
   // state
   std::auto_ptr<Spiller> SpillerInstance;
@@ -94,12 +94,13 @@ class RAGreedy : public MachineFunctionPass,
     RS_New,      ///< Never seen before.
     RS_First,    ///< First time in the queue.
     RS_Second,   ///< Second time in the queue.
-    RS_Region,   ///< Produced by region splitting.
-    RS_Block,    ///< Produced by per-block splitting.
+    RS_Global,   ///< Produced by global splitting.
     RS_Local,    ///< Produced by local splitting.
     RS_Spill     ///< Produced by spilling.
   };
 
+  static const char *const StageName[];
+
   IndexedMap<unsigned char, VirtReg2IndexFunctor> LRStage;
 
   LiveRangeStage getStage(const LiveInterval &VirtReg) const {
@@ -116,6 +117,15 @@ class RAGreedy : public MachineFunctionPass,
     }
   }
 
+  // Eviction. Sometimes an assigned live range can be evicted without
+  // conditions, but other times it must be split after being evicted to avoid
+  // infinite loops.
+  enum CanEvict {
+    CE_Never,    ///< Can never evict.
+    CE_Always,   ///< Can always evict.
+    CE_WithSplit ///< Can evict only if range is also split or spilled.
+  };
+
   // splitting state.
   std::auto_ptr<SplitAnalysis> SA;
   std::auto_ptr<SplitEditor> SE;
@@ -126,14 +136,17 @@ class RAGreedy : public MachineFunctionPass,
   /// All basic blocks where the current register has uses.
   SmallVector<SpillPlacement::BlockConstraint, 8> SplitConstraints;
 
-  /// All basic blocks where the current register is live-through and
-  /// interference free.
-  SmallVector<unsigned, 8> TransparentBlocks;
-
   /// Global live range splitting candidate info.
   struct GlobalSplitCandidate {
     unsigned PhysReg;
     BitVector LiveBundles;
+    SmallVector<unsigned, 8> ActiveBlocks;
+
+    void reset(unsigned Reg) {
+      PhysReg = Reg;
+      LiveBundles.clear();
+      ActiveBlocks.clear();
+    }
   };
 
   /// Candidate info for for each PhysReg in AllocationOrder.
@@ -141,10 +154,6 @@ class RAGreedy : public MachineFunctionPass,
   /// class.
   SmallVector<GlobalSplitCandidate, 32> GlobalCand;
 
-  /// For every instruction in SA->UseSlots, store the previous non-copy
-  /// instruction.
-  SmallVector<SlotIndex, 8> PrevSlot;
-
 public:
   RAGreedy();
 
@@ -173,18 +182,21 @@ private:
   void LRE_WillShrinkVirtReg(unsigned);
   void LRE_DidCloneVirtReg(unsigned, unsigned);
 
-  bool addSplitConstraints(unsigned, float&);
-  float calcGlobalSplitCost(unsigned, const BitVector&);
-  void splitAroundRegion(LiveInterval&, unsigned, const BitVector&,
+  float calcSpillCost();
+  bool addSplitConstraints(InterferenceCache::Cursor, float&);
+  void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>);
+  void growRegion(GlobalSplitCandidate &Cand, InterferenceCache::Cursor);
+  float calcGlobalSplitCost(GlobalSplitCandidate&, InterferenceCache::Cursor);
+  void splitAroundRegion(LiveInterval&, GlobalSplitCandidate&,
                          SmallVectorImpl<LiveInterval*>&);
   void calcGapWeights(unsigned, SmallVectorImpl<float>&);
-  SlotIndex getPrevMappedIndex(const MachineInstr*);
-  void calcPrevSlots();
-  unsigned nextSplitPoint(unsigned);
+  CanEvict canEvict(LiveInterval &A, LiveInterval &B);
   bool canEvictInterference(LiveInterval&, unsigned, float&);
 
+  unsigned tryAssign(LiveInterval&, AllocationOrder&,
+                     SmallVectorImpl<LiveInterval*>&);
   unsigned tryEvict(LiveInterval&, AllocationOrder&,
-                    SmallVectorImpl<LiveInterval*>&);
+                    SmallVectorImpl<LiveInterval*>&, unsigned = ~0u);
   unsigned tryRegionSplit(LiveInterval&, AllocationOrder&,
                           SmallVectorImpl<LiveInterval*>&);
   unsigned tryLocalSplit(LiveInterval&, AllocationOrder&,
@@ -196,6 +208,22 @@ private:
 
 char RAGreedy::ID = 0;
 
+#ifndef NDEBUG
+const char *const RAGreedy::StageName[] = {
+  "RS_New",
+  "RS_First",
+  "RS_Second",
+  "RS_Global",
+  "RS_Local",
+  "RS_Spill"
+};
+#endif
+
+// Hysteresis to use when comparing floats.
+// This helps stabilize decisions based on float comparisons.
+const float Hysteresis = 0.98f;
+
+
 FunctionPass* llvm::createGreedyRegisterAllocator() {
   return new RAGreedy();
 }
@@ -287,6 +315,7 @@ void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) {
 void RAGreedy::releaseMemory() {
   SpillerInstance.reset(0);
   LRStage.clear();
+  GlobalCand.clear();
   RegAllocBase::releaseMemory();
 }
 
@@ -329,28 +358,85 @@ LiveInterval *RAGreedy::dequeue() {
   return LI;
 }
 
+
+//===----------------------------------------------------------------------===//
+//                            Direct Assignment
+//===----------------------------------------------------------------------===//
+
+/// tryAssign - Try to assign VirtReg to an available register.
+unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
+                             AllocationOrder &Order,
+                             SmallVectorImpl<LiveInterval*> &NewVRegs) {
+  Order.rewind();
+  unsigned PhysReg;
+  while ((PhysReg = Order.next()))
+    if (!checkPhysRegInterference(VirtReg, PhysReg))
+      break;
+  if (!PhysReg || Order.isHint(PhysReg))
+    return PhysReg;
+
+  // PhysReg is available. Try to evict interference from a cheaper alternative.
+  unsigned Cost = TRI->getCostPerUse(PhysReg);
+
+  // Most registers have 0 additional cost.
+  if (!Cost)
+    return PhysReg;
+
+  DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is available at cost " << Cost
+               << '\n');
+  unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost);
+  return CheapReg ? CheapReg : PhysReg;
+}
+
+
 //===----------------------------------------------------------------------===//
 //                         Interference eviction
 //===----------------------------------------------------------------------===//
 
+/// canEvict - determine if A can evict the assigned live range B. The eviction
+/// policy defined by this function together with the allocation order defined
+/// by enqueue() decides which registers ultimately end up being split and
+/// spilled.
+///
+/// This function must define a non-circular relation when it returns CE_Always,
+/// otherwise infinite eviction loops are possible. When evicting a <= RS_Second
+/// range, it is possible to return CE_WithSplit which forces the evicted
+/// register to be split or spilled before it can evict anything again. That
+/// guarantees progress.
+RAGreedy::CanEvict RAGreedy::canEvict(LiveInterval &A, LiveInterval &B) {
+  return A.weight > B.weight ? CE_Always : CE_Never;
+}
+
 /// canEvict - Return true if all interferences between VirtReg and PhysReg can
-/// be evicted. Set maxWeight to the maximal spill weight of an interference.
+/// be evicted.
+/// Return false if any interference is heavier than MaxWeight.
+/// On return, set MaxWeight to the maximal spill weight of an interference.
 bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
                                     float &MaxWeight) {
   float Weight = 0;
   for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
     LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI);
-    // If there is 10 or more interferences, chances are one is smaller.
-    if (Q.collectInterferingVRegs(10) >= 10)
+    // If there is 10 or more interferences, chances are one is heavier.
+    if (Q.collectInterferingVRegs(10, MaxWeight) >= 10)
       return false;
 
-    // Check if any interfering live range is heavier than VirtReg.
-    for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) {
-      LiveInterval *Intf = Q.interferingVRegs()[i];
+    // Check if any interfering live range is heavier than MaxWeight.
+    for (unsigned i = Q.interferingVRegs().size(); i; --i) {
+      LiveInterval *Intf = Q.interferingVRegs()[i - 1];
       if (TargetRegisterInfo::isPhysicalRegister(Intf->reg))
         return false;
-      if (Intf->weight >= VirtReg.weight)
+      if (Intf->weight >= MaxWeight)
         return false;
+      switch (canEvict(VirtReg, *Intf)) {
+      case CE_Always:
+        break;
+      case CE_Never:
+        return false;
+      case CE_WithSplit:
+        if (getStage(*Intf) > RS_Second)
+          return false;
+        break;
+      }
       Weight = std::max(Weight, Intf->weight);
     }
   }
@@ -364,21 +450,28 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
 /// @return         Physreg to assign VirtReg, or 0.
 unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
                             AllocationOrder &Order,
-                            SmallVectorImpl<LiveInterval*> &NewVRegs){
+                            SmallVectorImpl<LiveInterval*> &NewVRegs,
+                            unsigned CostPerUseLimit) {
   NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled);
 
   // Keep track of the lightest single interference seen so far.
-  float BestWeight = 0;
+  float BestWeight = HUGE_VALF;
   unsigned BestPhys = 0;
 
   Order.rewind();
   while (unsigned PhysReg = Order.next()) {
-    float Weight = 0;
+    if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit)
+      continue;
+    // The first use of a register in a function has cost 1.
+    if (CostPerUseLimit == 1 && !MRI->isPhysRegUsed(PhysReg))
+      continue;
+
+    float Weight = BestWeight;
     if (!canEvictInterference(VirtReg, PhysReg, Weight))
       continue;
 
     // This is an eviction candidate.
-    DEBUG(dbgs() << "max " << PrintReg(PhysReg, TRI) << " interference = "
+    DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " interference = "
                  << Weight << '\n');
     if (BestPhys && Weight >= BestWeight)
       continue;
@@ -403,6 +496,11 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
       unassign(*Intf, VRM->getPhys(Intf->reg));
       ++NumEvicted;
       NewVRegs.push_back(Intf);
+      // Prevent looping by forcing the evicted ranges to be split before they
+      // can evict anything else.
+      if (getStage(*Intf) < RS_Second &&
+          canEvict(VirtReg, *Intf) == CE_WithSplit)
+        LRStage[Intf->reg] = RS_Second;
     }
   }
   return BestPhys;
@@ -417,9 +515,9 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
 /// interference pattern in Physreg and its aliases. Add the constraints to
 /// SpillPlacement and return the static cost of this split in Cost, assuming
 /// that all preferences in SplitConstraints are met.
-/// If it is evident that no bundles will be live, abort early and return false.
-bool RAGreedy::addSplitConstraints(unsigned PhysReg, float &Cost) {
-  InterferenceCache::Cursor Intf(IntfCache, PhysReg);
+/// Return false if there are no bundles with positive bias.
+bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
+                                   float &Cost) {
   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
 
   // Reset interference dependent info.
@@ -446,7 +544,7 @@ bool RAGreedy::addSplitConstraints(unsigned PhysReg, float &Cost) {
         BC.Entry = SpillPlacement::MustSpill, ++Ins;
       else if (Intf.first() < BI.FirstUse)
         BC.Entry = SpillPlacement::PrefSpill, ++Ins;
-      else if (Intf.first() < (BI.LiveThrough ? BI.LastUse : BI.Kill))
+      else if (Intf.first() < BI.LastUse)
         ++Ins;
     }
 
@@ -456,7 +554,7 @@ bool RAGreedy::addSplitConstraints(unsigned PhysReg, float &Cost) {
         BC.Exit = SpillPlacement::MustSpill, ++Ins;
       else if (Intf.last() > BI.LastUse)
         BC.Exit = SpillPlacement::PrefSpill, ++Ins;
-      else if (Intf.last() > (BI.LiveThrough ? BI.FirstUse : BI.Def))
+      else if (Intf.last() > BI.FirstUse)
         ++Ins;
     }
 
@@ -464,35 +562,41 @@ bool RAGreedy::addSplitConstraints(unsigned PhysReg, float &Cost) {
     if (Ins)
       StaticCost += Ins * SpillPlacer->getBlockFrequency(BC.Number);
   }
+  Cost = StaticCost;
 
   // Add constraints for use-blocks. Note that these are the only constraints
   // that may add a positive bias, it is downhill from here.
   SpillPlacer->addConstraints(SplitConstraints);
-  if (SpillPlacer->getPositiveNodes() == 0)
-    return false;
+  return SpillPlacer->scanActiveBundles();
+}
 
-  Cost = StaticCost;
 
-  // Now handle the live-through blocks without uses. These can only add
-  // negative bias, so we can abort whenever there are no more positive nodes.
-  // Compute constraints for a group of 8 blocks at a time.
+/// addThroughConstraints - Add constraints and links to SpillPlacer from the
+/// live-through blocks in Blocks.
+void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
+                                     ArrayRef<unsigned> Blocks) {
   const unsigned GroupSize = 8;
   SpillPlacement::BlockConstraint BCS[GroupSize];
-  unsigned B = 0;
-  TransparentBlocks.clear();
+  unsigned TBS[GroupSize];
+  unsigned B = 0, T = 0;
 
-  ArrayRef<unsigned> ThroughBlocks = SA->getThroughBlocks();
-  for (unsigned i = 0; i != ThroughBlocks.size(); ++i) {
-    unsigned Number = ThroughBlocks[i];
-    assert(B < GroupSize && "Array overflow");
-    BCS[B].Number = Number;
+  for (unsigned i = 0; i != Blocks.size(); ++i) {
+    unsigned Number = Blocks[i];
     Intf.moveToBlock(Number);
 
     if (!Intf.hasInterference()) {
-      TransparentBlocks.push_back(Number);
+      assert(T < GroupSize && "Array overflow");
+      TBS[T] = Number;
+      if (++T == GroupSize) {
+        SpillPlacer->addLinks(ArrayRef<unsigned>(TBS, T));
+        T = 0;
+      }
       continue;
     }
 
+    assert(B < GroupSize && "Array overflow");
+    BCS[B].Number = Number;
+
     // Interference for the live-in value.
     if (Intf.first() <= Indexes->getMBBStartIdx(Number))
       BCS[B].Entry = SpillPlacement::MustSpill;
@@ -509,30 +613,94 @@ bool RAGreedy::addSplitConstraints(unsigned PhysReg, float &Cost) {
       ArrayRef<SpillPlacement::BlockConstraint> Array(BCS, B);
       SpillPlacer->addConstraints(Array);
       B = 0;
-      // Abort early when all hope is lost.
-      if (SpillPlacer->getPositiveNodes() == 0)
-        return false;
     }
   }
 
   ArrayRef<SpillPlacement::BlockConstraint> Array(BCS, B);
   SpillPlacer->addConstraints(Array);
-  if (SpillPlacer->getPositiveNodes() == 0)
-    return false;
+  SpillPlacer->addLinks(ArrayRef<unsigned>(TBS, T));
+}
 
-  // There is still some positive bias. Add all the links.
-  SpillPlacer->addLinks(TransparentBlocks);
-  return true;
+void RAGreedy::growRegion(GlobalSplitCandidate &Cand,
+                          InterferenceCache::Cursor Intf) {
+  // Keep track of through blocks that have not been added to SpillPlacer.
+  BitVector Todo = SA->getThroughBlocks();
+  SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks;
+  unsigned AddedTo = 0;
+#ifndef NDEBUG
+  unsigned Visited = 0;
+#endif
+
+  for (;;) {
+    ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive();
+    if (NewBundles.empty())
+      break;
+    // Find new through blocks in the periphery of PrefRegBundles.
+    for (int i = 0, e = NewBundles.size(); i != e; ++i) {
+      unsigned Bundle = NewBundles[i];
+      // Look at all blocks connected to Bundle in the full graph.
+      ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle);
+      for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end();
+           I != E; ++I) {
+        unsigned Block = *I;
+        if (!Todo.test(Block))
+          continue;
+        Todo.reset(Block);
+        // This is a new through block. Add it to SpillPlacer later.
+        ActiveBlocks.push_back(Block);
+#ifndef NDEBUG
+        ++Visited;
+#endif
+      }
+    }
+    // Any new blocks to add?
+    if (ActiveBlocks.size() > AddedTo) {
+      ArrayRef<unsigned> Add(&ActiveBlocks[AddedTo],
+                             ActiveBlocks.size() - AddedTo);
+      addThroughConstraints(Intf, Add);
+      AddedTo = ActiveBlocks.size();
+    }
+    // Perhaps iterating can enable more bundles?
+    SpillPlacer->iterate();
+  }
+  DEBUG(dbgs() << ", v=" << Visited);
 }
 
+/// calcSpillCost - Compute how expensive it would be to split the live range in
+/// SA around all use blocks instead of forming bundle regions.
+float RAGreedy::calcSpillCost() {
+  float Cost = 0;
+  const LiveInterval &LI = SA->getParent();
+  ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
+  for (unsigned i = 0; i != UseBlocks.size(); ++i) {
+    const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
+    unsigned Number = BI.MBB->getNumber();
+    // We normally only need one spill instruction - a load or a store.
+    Cost += SpillPlacer->getBlockFrequency(Number);
+
+    // Unless the value is redefined in the block.
+    if (BI.LiveIn && BI.LiveOut) {
+      SlotIndex Start, Stop;
+      tie(Start, Stop) = Indexes->getMBBRange(Number);
+      LiveInterval::const_iterator I = LI.find(Start);
+      assert(I != LI.end() && "Expected live-in value");
+      // Is there a different live-out value? If so, we need an extra spill
+      // instruction.
+      if (I->end < Stop)
+        Cost += SpillPlacer->getBlockFrequency(Number);
+    }
+  }
+  return Cost;
+}
 
 /// calcGlobalSplitCost - Return the global split cost of following the split
 /// pattern in LiveBundles. This cost should be added to the local cost of the
 /// interference pattern in SplitConstraints.
 ///
-float RAGreedy::calcGlobalSplitCost(unsigned PhysReg,
-                                    const BitVector &LiveBundles) {
+float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand,
+                                    InterferenceCache::Cursor Intf) {
   float GlobalCost = 0;
+  const BitVector &LiveBundles = Cand.LiveBundles;
   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
   for (unsigned i = 0; i != UseBlocks.size(); ++i) {
     const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
@@ -549,11 +717,8 @@ float RAGreedy::calcGlobalSplitCost(unsigned PhysReg,
       GlobalCost += Ins * SpillPlacer->getBlockFrequency(BC.Number);
   }
 
-  InterferenceCache::Cursor Intf(IntfCache, PhysReg);
-  ArrayRef<unsigned> ThroughBlocks = SA->getThroughBlocks();
-  SplitConstraints.resize(UseBlocks.size() + ThroughBlocks.size());
-  for (unsigned i = 0; i != ThroughBlocks.size(); ++i) {
-    unsigned Number = ThroughBlocks[i];
+  for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) {
+    unsigned Number = Cand.ActiveBlocks[i];
     bool RegIn  = LiveBundles[Bundles->getBundle(Number, 0)];
     bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)];
     if (!RegIn && !RegOut)
@@ -578,23 +743,25 @@ float RAGreedy::calcGlobalSplitCost(unsigned PhysReg,
 /// avoiding interference. The 'stack' interval is the complement constructed by
 /// SplitEditor. It will contain the rest.
 ///
-void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg,
-                                 const BitVector &LiveBundles,
+void RAGreedy::splitAroundRegion(LiveInterval &VirtReg,
+                                 GlobalSplitCandidate &Cand,
                                  SmallVectorImpl<LiveInterval*> &NewVRegs) {
+  const BitVector &LiveBundles = Cand.LiveBundles;
+
   DEBUG({
-    dbgs() << "Splitting around region for " << PrintReg(PhysReg, TRI)
+    dbgs() << "Splitting around region for " << PrintReg(Cand.PhysReg, TRI)
            << " with bundles";
     for (int i = LiveBundles.find_first(); i>=0; i = LiveBundles.find_next(i))
       dbgs() << " EB#" << i;
     dbgs() << ".\n";
   });
 
-  InterferenceCache::Cursor Intf(IntfCache, PhysReg);
+  InterferenceCache::Cursor Intf(IntfCache, Cand.PhysReg);
   LiveRangeEdit LREdit(VirtReg, NewVRegs, this);
   SE->reset(LREdit);
 
   // Create the main cross-block interval.
-  SE->openIntv();
+  const unsigned MainIntv = SE->openIntv();
 
   // First add all defs that are live out of a block.
   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
@@ -603,6 +770,14 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg,
     bool RegIn  = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)];
     bool RegOut = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)];
 
+    // Create separate intervals for isolated blocks with multiple uses.
+    if (!RegIn && !RegOut && BI.FirstUse != BI.LastUse) {
+      DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " isolated.\n");
+      SE->splitSingleBlock(BI);
+      SE->selectIntv(MainIntv);
+      continue;
+    }
+
     // Should the register be live out?
     if (!BI.LiveOut || !RegOut)
       continue;
@@ -628,7 +803,7 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg,
       DEBUG(dbgs() << ", no interference");
       if (!BI.LiveThrough) {
         DEBUG(dbgs() << ", not live-through.\n");
-        SE->useIntv(SE->enterIntvBefore(BI.Def), Stop);
+        SE->useIntv(SE->enterIntvBefore(BI.FirstUse), Stop);
         continue;
       }
       if (!RegIn) {
@@ -645,10 +820,10 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg,
     // Block has interference.
     DEBUG(dbgs() << ", interference to " << Intf.last());
 
-    if (!BI.LiveThrough && Intf.last() <= BI.Def) {
+    if (!BI.LiveThrough && Intf.last() <= BI.FirstUse) {
       // The interference doesn't reach the outgoing segment.
-      DEBUG(dbgs() << " doesn't affect def from " << BI.Def << '\n');
-      SE->useIntv(BI.Def, Stop);
+      DEBUG(dbgs() << " doesn't affect def from " << BI.FirstUse << '\n');
+      SE->useIntv(BI.FirstUse, Stop);
       continue;
     }
 
@@ -704,7 +879,7 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg,
       DEBUG(dbgs() << ", no interference");
       if (!BI.LiveThrough) {
         DEBUG(dbgs() << ", killed in block.\n");
-        SE->useIntv(Start, SE->leaveIntvAfter(BI.Kill));
+        SE->useIntv(Start, SE->leaveIntvAfter(BI.LastUse));
         continue;
       }
       if (!RegOut) {
@@ -737,10 +912,10 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg,
     // Block has interference.
     DEBUG(dbgs() << ", interference from " << Intf.first());
 
-    if (!BI.LiveThrough && Intf.first() >= BI.Kill) {
+    if (!BI.LiveThrough && Intf.first() >= BI.LastUse) {
       // The interference doesn't reach the outgoing segment.
-      DEBUG(dbgs() << " doesn't affect kill at " << BI.Kill << '\n');
-      SE->useIntv(Start, BI.Kill);
+      DEBUG(dbgs() << " doesn't affect kill at " << BI.LastUse << '\n');
+      SE->useIntv(Start, BI.LastUse);
       continue;
     }
 
@@ -766,9 +941,8 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg,
   }
 
   // Handle live-through blocks.
-  ArrayRef<unsigned> ThroughBlocks = SA->getThroughBlocks();
-  for (unsigned i = 0; i != ThroughBlocks.size(); ++i) {
-    unsigned Number = ThroughBlocks[i];
+  for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) {
+    unsigned Number = Cand.ActiveBlocks[i];
     bool RegIn  = LiveBundles[Bundles->getBundle(Number, 0)];
     bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)];
     DEBUG(dbgs() << "Live through BB#" << Number << '\n');
@@ -787,70 +961,113 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg,
       SE->enterIntvAtEnd(*MBB);
   }
 
-  SE->closeIntv();
-
-  // FIXME: Should we be more aggressive about splitting the stack region into
-  // per-block segments? The current approach allows the stack region to
-  // separate into connected components. Some components may be allocatable.
-  SE->finish();
   ++NumGlobalSplits;
 
+  SmallVector<unsigned, 8> IntvMap;
+  SE->finish(&IntvMap);
+  DebugVars->splitRegister(VirtReg.reg, LREdit.regs());
+
+  LRStage.resize(MRI->getNumVirtRegs());
+  unsigned OrigBlocks = SA->getNumLiveBlocks();
+
+  // Sort out the new intervals created by splitting. We get four kinds:
+  // - Remainder intervals should not be split again.
+  // - Candidate intervals can be assigned to Cand.PhysReg.
+  // - Block-local splits are candidates for local splitting.
+  // - DCE leftovers should go back on the queue.
+  for (unsigned i = 0, e = LREdit.size(); i != e; ++i) {
+    unsigned Reg = LREdit.get(i)->reg;
+
+    // Ignore old intervals from DCE.
+    if (LRStage[Reg] != RS_New)
+      continue;
+
+    // Remainder interval. Don't try splitting again, spill if it doesn't
+    // allocate.
+    if (IntvMap[i] == 0) {
+      LRStage[Reg] = RS_Global;
+      continue;
+    }
+
+    // Main interval. Allow repeated splitting as long as the number of live
+    // blocks is strictly decreasing.
+    if (IntvMap[i] == MainIntv) {
+      if (SA->countLiveBlocks(LREdit.get(i)) >= OrigBlocks) {
+        DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks
+                     << " blocks as original.\n");
+        // Don't allow repeated splitting as a safe guard against looping.
+        LRStage[Reg] = RS_Global;
+      }
+      continue;
+    }
+
+    // Other intervals are treated as new. This includes local intervals created
+    // for blocks with multiple uses, and anything created by DCE.
+  }
+
   if (VerifyEnabled)
     MF->verify(this, "After splitting live range around region");
 }
 
 unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
                                   SmallVectorImpl<LiveInterval*> &NewVRegs) {
-  BitVector LiveBundles, BestBundles;
-  float BestCost = 0;
-  unsigned BestReg = 0;
+  float BestCost = Hysteresis * calcSpillCost();
+  DEBUG(dbgs() << "Cost of isolating all blocks = " << BestCost << '\n');
+  const unsigned NoCand = ~0u;
+  unsigned BestCand = NoCand;
 
   Order.rewind();
   for (unsigned Cand = 0; unsigned PhysReg = Order.next(); ++Cand) {
     if (GlobalCand.size() <= Cand)
       GlobalCand.resize(Cand+1);
-    GlobalCand[Cand].PhysReg = PhysReg;
+    GlobalCand[Cand].reset(PhysReg);
 
-    SpillPlacer->prepare(LiveBundles);
+    SpillPlacer->prepare(GlobalCand[Cand].LiveBundles);
     float Cost;
-    if (!addSplitConstraints(PhysReg, Cost)) {
-      DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bias\n");
+    InterferenceCache::Cursor Intf(IntfCache, PhysReg);
+    if (!addSplitConstraints(Intf, Cost)) {
+      DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n");
       continue;
     }
-    DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tbiased = "
-                 << SpillPlacer->getPositiveNodes() << ", static = " << Cost);
-    if (BestReg && Cost >= BestCost) {
-      DEBUG(dbgs() << " worse than " << PrintReg(BestReg, TRI) << '\n');
+    DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tstatic = " << Cost);
+    if (Cost >= BestCost) {
+      DEBUG({
+        if (BestCand == NoCand)
+          dbgs() << " worse than no bundles\n";
+        else
+          dbgs() << " worse than "
+                 << PrintReg(GlobalCand[BestCand].PhysReg, TRI) << '\n';
+      });
       continue;
     }
+    growRegion(GlobalCand[Cand], Intf);
 
     SpillPlacer->finish();
 
     // No live bundles, defer to splitSingleBlocks().
-    if (!LiveBundles.any()) {
+    if (!GlobalCand[Cand].LiveBundles.any()) {
       DEBUG(dbgs() << " no bundles.\n");
       continue;
     }
 
-    Cost += calcGlobalSplitCost(PhysReg, LiveBundles);
+    Cost += calcGlobalSplitCost(GlobalCand[Cand], Intf);
     DEBUG({
       dbgs() << ", total = " << Cost << " with bundles";
-      for (int i = LiveBundles.find_first(); i>=0; i = LiveBundles.find_next(i))
+      for (int i = GlobalCand[Cand].LiveBundles.find_first(); i>=0;
+           i = GlobalCand[Cand].LiveBundles.find_next(i))
         dbgs() << " EB#" << i;
       dbgs() << ".\n";
     });
-    if (!BestReg || Cost < BestCost) {
-      BestReg = PhysReg;
-      BestCost = 0.98f * Cost; // Prevent rounding effects.
-      BestBundles.swap(LiveBundles);
+    if (Cost < BestCost) {
+      BestCand = Cand;
+      BestCost = Hysteresis * Cost; // Prevent rounding effects.
     }
   }
 
-  if (!BestReg)
+  if (BestCand == NoCand)
     return 0;
 
-  splitAroundRegion(VirtReg, BestReg, BestBundles, NewVRegs);
-  setStage(NewVRegs.begin(), NewVRegs.end(), RS_Region);
+  splitAroundRegion(VirtReg, GlobalCand[BestCand], NewVRegs);
   return 0;
 }
 
@@ -913,47 +1130,6 @@ void RAGreedy::calcGapWeights(unsigned PhysReg,
   }
 }
 
-/// getPrevMappedIndex - Return the slot index of the last non-copy instruction
-/// before MI that has a slot index. If MI is the first mapped instruction in
-/// its block, return the block start index instead.
-///
-SlotIndex RAGreedy::getPrevMappedIndex(const MachineInstr *MI) {
-  assert(MI && "Missing MachineInstr");
-  const MachineBasicBlock *MBB = MI->getParent();
-  MachineBasicBlock::const_iterator B = MBB->begin(), I = MI;
-  while (I != B)
-    if (!(--I)->isDebugValue() && !I->isCopy())
-      return Indexes->getInstructionIndex(I);
-  return Indexes->getMBBStartIdx(MBB);
-}
-
-/// calcPrevSlots - Fill in the PrevSlot array with the index of the previous
-/// real non-copy instruction for each instruction in SA->UseSlots.
-///
-void RAGreedy::calcPrevSlots() {
-  const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots;
-  PrevSlot.clear();
-  PrevSlot.reserve(Uses.size());
-  for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
-    const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i]);
-    PrevSlot.push_back(getPrevMappedIndex(MI).getDefIndex());
-  }
-}
-
-/// nextSplitPoint - Find the next index into SA->UseSlots > i such that it may
-/// be beneficial to split before UseSlots[i].
-///
-/// 0 is always a valid split point
-unsigned RAGreedy::nextSplitPoint(unsigned i) {
-  const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots;
-  const unsigned Size = Uses.size();
-  assert(i != Size && "No split points after the end");
-  // Allow split before i when Uses[i] is not adjacent to the previous use.
-  while (++i != Size && PrevSlot[i].getBaseIndex() <= Uses[i-1].getBaseIndex())
-    ;
-  return i;
-}
-
 /// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only
 /// basic block.
 ///
@@ -981,11 +1157,27 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
     dbgs() << '\n';
   });
 
-  // For every use, find the previous mapped non-copy instruction.
-  // We use this to detect valid split points, and to estimate new interval
-  // sizes.
-  calcPrevSlots();
+  // Since we allow local split results to be split again, there is a risk of
+  // creating infinite loops. It is tempting to require that the new live
+  // ranges have less instructions than the original. That would guarantee
+  // convergence, but it is too strict. A live range with 3 instructions can be
+  // split 2+3 (including the COPY), and we want to allow that.
+  //
+  // Instead we use these rules:
+  //
+  // 1. Allow any split for ranges with getStage() < RS_Local. (Except for the
+  //    noop split, of course).
+  // 2. Require progress be made for ranges with getStage() >= RS_Local. All
+  //    the new ranges must have fewer instructions than before the split.
+  // 3. New ranges with the same number of instructions are marked RS_Local,
+  //    smaller ranges are marked RS_New.
+  //
+  // These rules allow a 3 -> 2+3 split once, which we need. They also prevent
+  // excessive splitting and infinite loops.
+  //
+  bool ProgressRequired = getStage(VirtReg) >= RS_Local;
 
+  // Best split candidate.
   unsigned BestBefore = NumGaps;
   unsigned BestAfter = 0;
   float BestDiff = 0;
@@ -1003,13 +1195,11 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
     // The new spill weight must be larger than any gap interference.
 
     // We will split before Uses[SplitBefore] and after Uses[SplitAfter].
-    unsigned SplitBefore = 0, SplitAfter = nextSplitPoint(1) - 1;
+    unsigned SplitBefore = 0, SplitAfter = 1;
 
     // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]).
     // It is the spill weight that needs to be evicted.
     float MaxGap = GapWeight[0];
-    for (unsigned i = 1; i != SplitAfter; ++i)
-      MaxGap = std::max(MaxGap, GapWeight[i]);
 
     for (;;) {
       // Live before/after split?
@@ -1027,41 +1217,31 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
       }
       // Should the interval be extended or shrunk?
       bool Shrink = true;
-      if (MaxGap < HUGE_VALF) {
-        // Estimate the new spill weight.
-        //
-        // Each instruction reads and writes the register, except the first
-        // instr doesn't read when !FirstLive, and the last instr doesn't write
-        // when !LastLive.
-        //
-        // We will be inserting copies before and after, so the total number of
-        // reads and writes is 2 * EstUses.
-        //
-        const unsigned EstUses = 2*(SplitAfter - SplitBefore) +
-                                 2*(LiveBefore + LiveAfter);
 
-        // Try to guess the size of the new interval. This should be trivial,
-        // but the slot index of an inserted copy can be a lot smaller than the
-        // instruction it is inserted before if there are many dead indexes
-        // between them.
-        //
-        // We measure the distance from the instruction before SplitBefore to
-        // get a conservative estimate.
-        //
-        // The final distance can still be different if inserting copies
-        // triggers a slot index renumbering.
+      // How many gaps would the new range have?
+      unsigned NewGaps = LiveBefore + SplitAfter - SplitBefore + LiveAfter;
+
+      // Legally, without causing looping?
+      bool Legal = !ProgressRequired || NewGaps < NumGaps;
+
+      if (Legal && MaxGap < HUGE_VALF) {
+        // Estimate the new spill weight. Each instruction reads or writes the
+        // register. Conservatively assume there are no read-modify-write
+        // instructions.
         //
-        const float EstWeight = normalizeSpillWeight(blockFreq * EstUses,
-                              PrevSlot[SplitBefore].distance(Uses[SplitAfter]));
+        // Try to guess the size of the new interval.
+        const float EstWeight = normalizeSpillWeight(blockFreq * (NewGaps + 1),
+                                 Uses[SplitBefore].distance(Uses[SplitAfter]) +
+                                 (LiveBefore + LiveAfter)*SlotIndex::InstrDist);
         // Would this split be possible to allocate?
         // Never allocate all gaps, we wouldn't be making progress.
-        float Diff = EstWeight - MaxGap;
-        DEBUG(dbgs() << " w=" << EstWeight << " d=" << Diff);
-        if (Diff > 0) {
+        DEBUG(dbgs() << " w=" << EstWeight);
+        if (EstWeight * Hysteresis >= MaxGap) {
           Shrink = false;
+          float Diff = EstWeight - MaxGap;
           if (Diff > BestDiff) {
             DEBUG(dbgs() << " (best)");
-            BestDiff = Diff;
+            BestDiff = Hysteresis * Diff;
             BestBefore = SplitBefore;
             BestAfter = SplitAfter;
           }
@@ -1070,8 +1250,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
 
       // Try to shrink.
       if (Shrink) {
-        SplitBefore = nextSplitPoint(SplitBefore);
-        if (SplitBefore < SplitAfter) {
+        if (++SplitBefore < SplitAfter) {
           DEBUG(dbgs() << " shrink\n");
           // Recompute the max when necessary.
           if (GapWeight[SplitBefore - 1] >= MaxGap) {
@@ -1091,10 +1270,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
       }
 
       DEBUG(dbgs() << " extend\n");
-      for (unsigned e = nextSplitPoint(SplitAfter + 1) - 1;
-           SplitAfter != e; ++SplitAfter)
-        MaxGap = std::max(MaxGap, GapWeight[SplitAfter]);
-          continue;
+      MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]);
     }
   }
 
@@ -1113,9 +1289,27 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
   SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]);
   SlotIndex SegStop  = SE->leaveIntvAfter(Uses[BestAfter]);
   SE->useIntv(SegStart, SegStop);
-  SE->closeIntv();
-  SE->finish();
-  setStage(NewVRegs.begin(), NewVRegs.end(), RS_Local);
+  SmallVector<unsigned, 8> IntvMap;
+  SE->finish(&IntvMap);
+  DebugVars->splitRegister(VirtReg.reg, LREdit.regs());
+
+  // If the new range has the same number of instructions as before, mark it as
+  // RS_Local so the next split will be forced to make progress. Otherwise,
+  // leave the new intervals as RS_New so they can compete.
+  bool LiveBefore = BestBefore != 0 || BI.LiveIn;
+  bool LiveAfter = BestAfter != NumGaps || BI.LiveOut;
+  unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter;
+  if (NewGaps >= NumGaps) {
+    DEBUG(dbgs() << "Tagging non-progress ranges: ");
+    assert(!ProgressRequired && "Didn't make progress when it was required.");
+    LRStage.resize(MRI->getNumVirtRegs());
+    for (unsigned i = 0, e = IntvMap.size(); i != e; ++i)
+      if (IntvMap[i] == 1) {
+        LRStage[LREdit.get(i)->reg] = RS_Local;
+        DEBUG(dbgs() << PrintReg(LREdit.get(i)->reg));
+      }
+    DEBUG(dbgs() << '\n');
+  }
   ++NumLocalSplits;
 
   return 0;
@@ -1141,30 +1335,36 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
 
   // Don't iterate global splitting.
   // Move straight to spilling if this range was produced by a global split.
-  LiveRangeStage Stage = getStage(VirtReg);
-  if (Stage >= RS_Block)
+  if (getStage(VirtReg) >= RS_Global)
     return 0;
 
   SA->analyze(&VirtReg);
 
-  // First try to split around a region spanning multiple blocks.
-  if (Stage < RS_Region) {
-    unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
-    if (PhysReg || !NewVRegs.empty())
+  // FIXME: SplitAnalysis may repair broken live ranges coming from the
+  // coalescer. That may cause the range to become allocatable which means that
+  // tryRegionSplit won't be making progress. This check should be replaced with
+  // an assertion when the coalescer is fixed.
+  if (SA->didRepairRange()) {
+    // VirtReg has changed, so all cached queries are invalid.
+    invalidateVirtRegs();
+    if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs))
       return PhysReg;
   }
 
+  // First try to split around a region spanning multiple blocks.
+  unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
+  if (PhysReg || !NewVRegs.empty())
+    return PhysReg;
+
   // Then isolate blocks with multiple uses.
-  if (Stage < RS_Block) {
-    SplitAnalysis::BlockPtrSet Blocks;
-    if (SA->getMultiUseBlocks(Blocks)) {
-      LiveRangeEdit LREdit(VirtReg, NewVRegs, this);
-      SE->reset(LREdit);
-      SE->splitSingleBlocks(Blocks);
-      setStage(NewVRegs.begin(), NewVRegs.end(), RS_Block);
-      if (VerifyEnabled)
-        MF->verify(this, "After splitting live range around basic blocks");
-    }
+  SplitAnalysis::BlockPtrSet Blocks;
+  if (SA->getMultiUseBlocks(Blocks)) {
+    LiveRangeEdit LREdit(VirtReg, NewVRegs, this);
+    SE->reset(LREdit);
+    SE->splitSingleBlocks(Blocks);
+    setStage(NewVRegs.begin(), NewVRegs.end(), RS_Global);
+    if (VerifyEnabled)
+      MF->verify(this, "After splitting live range around basic blocks");
   }
 
   // Don't assign any physregs.
@@ -1179,21 +1379,25 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
 unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
                                  SmallVectorImpl<LiveInterval*> &NewVRegs) {
   // First try assigning a free register.
-  AllocationOrder Order(VirtReg.reg, *VRM, ReservedRegs);
-  while (unsigned PhysReg = Order.next()) {
-    if (!checkPhysRegInterference(VirtReg, PhysReg))
-      return PhysReg;
-  }
-
-  if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs))
+  AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo);
+  if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs))
     return PhysReg;
 
+  LiveRangeStage Stage = getStage(VirtReg);
+  DEBUG(dbgs() << StageName[Stage] << '\n');
+
+  // Try to evict a less worthy live range, but only for ranges from the primary
+  // queue. The RS_Second ranges already failed to do this, and they should not
+  // get a second chance until they have been split.
+  if (Stage != RS_Second)
+    if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs))
+      return PhysReg;
+
   assert(NewVRegs.empty() && "Cannot append to existing NewVRegs");
 
   // The first time we see a live range, don't try to split or spill.
   // Wait until the second time, when all smaller ranges have been allocated.
   // This gives a better picture of the interference to split around.
-  LiveRangeStage Stage = getStage(VirtReg);
   if (Stage == RS_First) {
     LRStage[VirtReg.reg] = RS_Second;
     DEBUG(dbgs() << "wait for second round\n");
@@ -1201,7 +1405,10 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
     return 0;
   }
 
-  assert(Stage < RS_Spill && "Cannot allocate after spilling");
+  // If we couldn't allocate a register from spilling, there is probably some
+  // invalid inline assembly. The base class wil report it.
+  if (Stage >= RS_Spill)
+    return ~0u;
 
   // Try splitting VirtReg or interferences.
   unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs);
@@ -1234,12 +1441,12 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
   RegAllocBase::init(getAnalysis<VirtRegMap>(), getAnalysis<LiveIntervals>());
   Indexes = &getAnalysis<SlotIndexes>();
   DomTree = &getAnalysis<MachineDominatorTree>();
-  ReservedRegs = TRI->getReservedRegs(*MF);
   SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
   Loops = &getAnalysis<MachineLoopInfo>();
   LoopRanges = &getAnalysis<MachineLoopRanges>();
   Bundles = &getAnalysis<EdgeBundles>();
   SpillPlacer = &getAnalysis<SpillPlacement>();
+  DebugVars = &getAnalysis<LiveDebugVariables>();
 
   SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
   SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree));
@@ -1258,7 +1465,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
   }
 
   // Write out new DBG_VALUE instructions.
-  getAnalysis<LiveDebugVariables>().emitDebugValues(VRM);
+  DebugVars->emitDebugValues(VRM);
 
   // The pass output is in VirtRegMap. Release all the transient data.
   releaseMemory();
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
index ef78949..0818034 100644
--- a/lib/CodeGen/RegAllocLinearScan.cpp
+++ b/lib/CodeGen/RegAllocLinearScan.cpp
@@ -16,6 +16,7 @@
 #include "LiveRangeEdit.h"
 #include "VirtRegMap.h"
 #include "VirtRegRewriter.h"
+#include "RegisterClassInfo.h"
 #include "Spiller.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Function.h"
@@ -40,7 +41,6 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
-#include <set>
 #include <queue>
 #include <memory>
 #include <cmath>
@@ -67,6 +67,11 @@ TrivCoalesceEnds("trivial-coalesce-ends",
                   cl::desc("Attempt trivial coalescing of interval ends"),
                   cl::init(false), cl::Hidden);
 
+static cl::opt<bool>
+AvoidWAWHazard("avoid-waw-hazard",
+               cl::desc("Avoid write-write hazards for some register classes"),
+               cl::init(false), cl::Hidden);
+
 static RegisterRegAlloc
 linearscanRegAlloc("linearscan", "linear scan register allocator",
                    createLinearScanRegisterAllocator);
@@ -110,6 +115,7 @@ namespace {
       if (NumRecentlyUsedRegs > 0)
         RecentRegs.resize(NumRecentlyUsedRegs, 0);
       RecentNext = RecentRegs.begin();
+      avoidWAW_ = 0;
     }
 
     typedef std::pair<LiveInterval*, LiveInterval::iterator> IntervalPtr;
@@ -143,6 +149,7 @@ namespace {
     BitVector reservedRegs_;
     LiveIntervals* li_;
     MachineLoopInfo *loopInfo;
+    RegisterClassInfo RegClassInfo;
 
     /// handled_ - Intervals are added to the handled_ set in the order of their
     /// start value.  This is uses for backtracking.
@@ -180,6 +187,9 @@ namespace {
     SmallVector<unsigned, 4> RecentRegs;
     SmallVector<unsigned, 4>::iterator RecentNext;
 
+    // Last write-after-write register written.
+    unsigned avoidWAW_;
+
     // Record that we just picked this register.
     void recordRecentlyUsed(unsigned reg) {
       assert(reg != 0 && "Recently used register is NOREG!");
@@ -227,8 +237,8 @@ namespace {
 
     // Determine if we skip this register due to its being recently used.
     bool isRecentlyUsed(unsigned reg) const {
-      return std::find(RecentRegs.begin(), RecentRegs.end(), reg) !=
-             RecentRegs.end();
+      return reg == avoidWAW_ ||
+       std::find(RecentRegs.begin(), RecentRegs.end(), reg) != RecentRegs.end();
     }
 
   private:
@@ -358,13 +368,10 @@ namespace {
     /// getFirstNonReservedPhysReg - return the first non-reserved physical
     /// register in the register class.
     unsigned getFirstNonReservedPhysReg(const TargetRegisterClass *RC) {
-        TargetRegisterClass::iterator aoe = RC->allocation_order_end(*mf_);
-        TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_);
-        while (i != aoe && reservedRegs_.test(*i))
-          ++i;
-        assert(i != aoe && "All registers reserved?!");
-        return *i;
-      }
+      ArrayRef<unsigned> O = RegClassInfo.getOrder(RC);
+      assert(!O.empty() && "All registers reserved?!");
+      return O.front();
+    }
 
     void ComputeRelatedRegClasses();
 
@@ -516,6 +523,7 @@ bool RALinScan::runOnMachineFunction(MachineFunction &fn) {
   reservedRegs_ = tri_->getReservedRegs(fn);
   li_ = &getAnalysis<LiveIntervals>();
   loopInfo = &getAnalysis<MachineLoopInfo>();
+  RegClassInfo.runOnMachineFunction(fn);
 
   // We don't run the coalescer here because we have no reason to
   // interact with it.  If the coalescer requires interaction, it
@@ -792,7 +800,7 @@ void RALinScan::updateSpillWeights(std::vector<float> &Weights,
   // register class we are trying to allocate. Then add the weight to all
   // sub-registers of the super-register even if they are not aliases.
   // e.g. allocating for GR32, bh is not used, updating bl spill weight.
-  //      bl should get the same spill weight otherwise it will be choosen
+  //      bl should get the same spill weight otherwise it will be chosen
   //      as a spill candidate since spilling bh doesn't make ebx available.
   for (unsigned i = 0, e = Supers.size(); i != e; ++i) {
     for (const unsigned *sr = tri_->getSubRegisters(Supers[i]); *sr; ++sr)
@@ -1116,6 +1124,12 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
     active_.push_back(std::make_pair(cur, cur->begin()));
     handled_.push_back(cur);
 
+    // Remember physReg for avoiding a write-after-write hazard in the next
+    // instruction.
+    if (AvoidWAWHazard &&
+        tri_->avoidWriteAfterWrite(mri_->getRegClass(cur->reg)))
+      avoidWAW_ = physReg;
+
     // "Upgrade" the physical register since it has been allocated.
     UpgradeRegister(physReg);
     if (LiveInterval *NextReloadLI = hasNextReloadInterval(cur)) {
@@ -1152,14 +1166,11 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
 
   bool Found = false;
   std::vector<std::pair<unsigned,float> > RegsWeights;
+  ArrayRef<unsigned> Order = RegClassInfo.getOrder(RC);
   if (!minReg || SpillWeights[minReg] == HUGE_VALF)
-    for (TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_),
-           e = RC->allocation_order_end(*mf_); i != e; ++i) {
-      unsigned reg = *i;
+    for (unsigned i = 0; i != Order.size(); ++i) {
+      unsigned reg = Order[i];
       float regWeight = SpillWeights[reg];
-      // Don't even consider reserved regs.
-      if (reservedRegs_.test(reg))
-        continue;
       // Skip recently allocated registers and reserved registers.
       if (minWeight > regWeight && !isRecentlyUsed(reg))
         Found = true;
@@ -1168,11 +1179,8 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
 
   // If we didn't find a register that is spillable, try aliases?
   if (!Found) {
-    for (TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_),
-           e = RC->allocation_order_end(*mf_); i != e; ++i) {
-      unsigned reg = *i;
-      if (reservedRegs_.test(reg))
-        continue;
+    for (unsigned i = 0; i != Order.size(); ++i) {
+      unsigned reg = Order[i];
       // No need to worry about if the alias register size < regsize of RC.
       // We are going to spill all registers that alias it anyway.
       for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as)
@@ -1432,13 +1440,13 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur,
   if (TargetRegisterInfo::isVirtualRegister(physReg) && vrm_->hasPhys(physReg))
     physReg = vrm_->getPhys(physReg);
 
-  TargetRegisterClass::iterator I, E;
-  tie(I, E) = tri_->getAllocationOrder(RC, Hint.first, physReg, *mf_);
-  assert(I != E && "No allocatable register in this register class!");
+  ArrayRef<unsigned> Order = tri_->getRawAllocationOrder(RC, Hint.first,
+                                                         physReg, *mf_);
+  assert(!Order.empty() && "No allocatable register in this register class!");
 
   // Scan for the first available register.
-  for (; I != E; ++I) {
-    unsigned Reg = *I;
+  for (unsigned i = 0; i != Order.size(); ++i) {
+    unsigned Reg = Order[i];
     // Ignore "downgraded" registers.
     if (SkipDGRegs && DowngradedRegs.count(Reg))
       continue;
@@ -1446,7 +1454,7 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur,
     if (reservedRegs_.test(Reg))
       continue;
     // Skip recently allocated registers.
-    if (isRegAvail(Reg) && !isRecentlyUsed(Reg)) {
+    if (isRegAvail(Reg) && (!SkipDGRegs || !isRecentlyUsed(Reg))) {
       FreeReg = Reg;
       if (FreeReg < inactiveCounts.size())
         FreeRegInactiveCount = inactiveCounts[FreeReg];
@@ -1468,8 +1476,8 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur,
   // inactive count.  Alkis found that this reduced register pressure very
   // slightly on X86 (in rev 1.94 of this file), though this should probably be
   // reevaluated now.
-  for (; I != E; ++I) {
-    unsigned Reg = *I;
+  for (unsigned i = 0; i != Order.size(); ++i) {
+    unsigned Reg = Order[i];
     // Ignore "downgraded" registers.
     if (SkipDGRegs && DowngradedRegs.count(Reg))
       continue;
@@ -1477,7 +1485,8 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur,
     if (reservedRegs_.test(Reg))
       continue;
     if (isRegAvail(Reg) && Reg < inactiveCounts.size() &&
-        FreeRegInactiveCount < inactiveCounts[Reg] && !isRecentlyUsed(Reg)) {
+        FreeRegInactiveCount < inactiveCounts[Reg] &&
+        (!SkipDGRegs || !isRecentlyUsed(Reg))) {
       FreeReg = Reg;
       FreeRegInactiveCount = inactiveCounts[Reg];
       if (FreeRegInactiveCount == MaxInactiveCount)
@@ -1528,12 +1537,10 @@ unsigned RALinScan::getFreePhysReg(LiveInterval *cur) {
       return Preference;
   }
 
-  if (!DowngradedRegs.empty()) {
-    unsigned FreeReg = getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts,
-                                      true);
-    if (FreeReg)
-      return FreeReg;
-  }
+  unsigned FreeReg = getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts,
+                                    true);
+  if (FreeReg)
+    return FreeReg;
   return getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, false);
 }
 
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 1e1f1e0..605507f 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -222,10 +222,9 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
     // Compute an initial allowed set for the current vreg.
     typedef std::vector<unsigned> VRAllowed;
     VRAllowed vrAllowed;
-    for (TargetRegisterClass::iterator aoItr = trc->allocation_order_begin(*mf),
-                                       aoEnd = trc->allocation_order_end(*mf);
-         aoItr != aoEnd; ++aoItr) {
-      unsigned preg = *aoItr;
+    ArrayRef<unsigned> rawOrder = trc->getRawAllocationOrder(*mf);
+    for (unsigned i = 0; i != rawOrder.size(); ++i) {
+      unsigned preg = rawOrder[i];
       if (!reservedRegs.test(preg)) {
         vrAllowed.push_back(preg);
       }
@@ -581,7 +580,7 @@ void RegAllocPBQP::finalizeAlloc() const {
 
     if (physReg == 0) {
       const TargetRegisterClass *liRC = mri->getRegClass(li->reg);
-      physReg = *liRC->allocation_order_begin(*mf);
+      physReg = liRC->getRawAllocationOrder(*mf).front();
     }
 
     vrm->assignVirt2Phys(li->reg, physReg);
diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp
new file mode 100644
index 0000000..5a77e47
--- /dev/null
+++ b/lib/CodeGen/RegisterClassInfo.cpp
@@ -0,0 +1,112 @@
+//===-- RegisterClassInfo.cpp - Dynamic Register Class Info ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the RegisterClassInfo class which provides dynamic
+// information about target register classes. Callee saved and reserved
+// registers depends on calling conventions and other dynamic information, so
+// some things cannot be determined statically.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "RegisterClassInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetMachine.h"
+
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+RegisterClassInfo::RegisterClassInfo() : Tag(0), MF(0), TRI(0), CalleeSaved(0)
+{}
+
+void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
+  bool Update = false;
+  MF = &mf;
+
+  // Allocate new array the first time we see a new target.
+  if (MF->getTarget().getRegisterInfo() != TRI) {
+    TRI = MF->getTarget().getRegisterInfo();
+    RegClass.reset(new RCInfo[TRI->getNumRegClasses()]);
+    Update = true;
+  }
+
+  // Does this MF have different CSRs?
+  const unsigned *CSR = TRI->getCalleeSavedRegs(MF);
+  if (Update || CSR != CalleeSaved) {
+    // Build a CSRNum map. Every CSR alias gets an entry pointing to the last
+    // overlapping CSR.
+    CSRNum.clear();
+    CSRNum.resize(TRI->getNumRegs(), 0);
+    for (unsigned N = 0; unsigned Reg = CSR[N]; ++N)
+      for (const unsigned *AS = TRI->getOverlaps(Reg);
+           unsigned Alias = *AS; ++AS)
+        CSRNum[Alias] = N + 1; // 0 means no CSR, 1 means CalleeSaved[0], ...
+    Update = true;
+  }
+  CalleeSaved = CSR;
+
+  // Different reserved registers?
+  BitVector RR = TRI->getReservedRegs(*MF);
+  if (RR != Reserved)
+    Update = true;
+  Reserved = RR;
+
+  // Invalidate cached information from previous function.
+  if (Update)
+    ++Tag;
+}
+
+/// compute - Compute the preferred allocation order for RC with reserved
+/// registers filtered out. Volatile registers come first followed by CSR
+/// aliases ordered according to the CSR order specified by the target.
+void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
+  RCInfo &RCI = RegClass[RC->getID()];
+
+  // Raw register count, including all reserved regs.
+  unsigned NumRegs = RC->getNumRegs();
+
+  if (!RCI.Order)
+    RCI.Order.reset(new unsigned[NumRegs]);
+
+  unsigned N = 0;
+  SmallVector<unsigned, 16> CSRAlias;
+
+  // FIXME: Once targets reserve registers instead of removing them from the
+  // allocation order, we can simply use begin/end here.
+  ArrayRef<unsigned> RawOrder = RC->getRawAllocationOrder(*MF);
+  for (unsigned i = 0; i != RawOrder.size(); ++i) {
+    unsigned PhysReg = RawOrder[i];
+    // Remove reserved registers from the allocation order.
+    if (Reserved.test(PhysReg))
+      continue;
+    if (CSRNum[PhysReg])
+      // PhysReg aliases a CSR, save it for later.
+      CSRAlias.push_back(PhysReg);
+    else
+      RCI.Order[N++] = PhysReg;
+  }
+  RCI.NumRegs = N + CSRAlias.size();
+  assert (RCI.NumRegs <= NumRegs && "Allocation order larger than regclass");
+
+  // CSR aliases go after the volatile registers, preserve the target's order.
+  std::copy(CSRAlias.begin(), CSRAlias.end(), &RCI.Order[N]);
+
+  DEBUG({
+    dbgs() << "AllocationOrder(" << RC->getName() << ") = [";
+    for (unsigned I = 0; I != RCI.NumRegs; ++I)
+      dbgs() << ' ' << PrintReg(RCI.Order[I], TRI);
+    dbgs() << " ]\n";
+  });
+
+  // RCI is now up-to-date.
+  RCI.Tag = Tag;
+}
+
diff --git a/lib/CodeGen/RegisterClassInfo.h b/lib/CodeGen/RegisterClassInfo.h
new file mode 100644
index 0000000..6f7d9c9
--- /dev/null
+++ b/lib/CodeGen/RegisterClassInfo.h
@@ -0,0 +1,121 @@
+//===-- RegisterClassInfo.h - Dynamic Register Class Info -*- C++ -*-------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the RegisterClassInfo class which provides dynamic
+// information about target register classes. Callee saved and reserved
+// registers depends on calling conventions and other dynamic information, so
+// some things cannot be determined statically.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_REGISTERCLASSINFO_H
+#define LLVM_CODEGEN_REGISTERCLASSINFO_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+namespace llvm {
+
+class RegisterClassInfo {
+  struct RCInfo {
+    unsigned Tag;
+    unsigned NumRegs;
+    OwningArrayPtr<unsigned> Order;
+
+    RCInfo() : Tag(0), NumRegs(0) {}
+    operator ArrayRef<unsigned>() const {
+      return ArrayRef<unsigned>(Order.get(), NumRegs);
+    }
+  };
+
+  // Brief cached information for each register class.
+  OwningArrayPtr<RCInfo> RegClass;
+
+  // Tag changes whenever cached information needs to be recomputed. An RCInfo
+  // entry is valid when its tag matches.
+  unsigned Tag;
+
+  const MachineFunction *MF;
+  const TargetRegisterInfo *TRI;
+
+  // Callee saved registers of last MF. Assumed to be valid until the next
+  // runOnFunction() call.
+  const unsigned *CalleeSaved;
+
+  // Map register number to CalleeSaved index + 1;
+  SmallVector<uint8_t, 4> CSRNum;
+
+  // Reserved registers in the current MF.
+  BitVector Reserved;
+
+  // Compute all information about RC.
+  void compute(const TargetRegisterClass *RC) const;
+
+  // Return an up-to-date RCInfo for RC.
+  const RCInfo &get(const TargetRegisterClass *RC) const {
+    const RCInfo &RCI = RegClass[RC->getID()];
+    if (Tag != RCI.Tag)
+      compute(RC);
+    return RCI;
+  }
+
+public:
+  RegisterClassInfo();
+
+  /// runOnFunction - Prepare to answer questions about MF. This must be called
+  /// before any other methods are used.
+  void runOnMachineFunction(const MachineFunction &MF);
+
+  /// getNumAllocatableRegs - Returns the number of actually allocatable
+  /// registers in RC in the current function.
+  unsigned getNumAllocatableRegs(const TargetRegisterClass *RC) const {
+    return get(RC).NumRegs;
+  }
+
+  /// getOrder - Returns the preferred allocation order for RC. The order
+  /// contains no reserved registers, and registers that alias callee saved
+  /// registers come last.
+  ArrayRef<unsigned> getOrder(const TargetRegisterClass *RC) const {
+    return get(RC);
+  }
+
+  /// getLastCalleeSavedAlias - Returns the last callee saved register that
+  /// overlaps PhysReg, or 0 if Reg doesn't overlap a CSR.
+  unsigned getLastCalleeSavedAlias(unsigned PhysReg) const {
+    assert(TargetRegisterInfo::isPhysicalRegister(PhysReg));
+    if (unsigned N = CSRNum[PhysReg])
+      return CalleeSaved[N-1];
+    return 0;
+  }
+
+  /// isReserved - Returns true when PhysReg is a reserved register.
+  ///
+  /// Reserved registers may belong to an allocatable register class, but the
+  /// target has explicitly requested that they are not used.
+  ///
+  bool isReserved(unsigned PhysReg) const {
+    return Reserved.test(PhysReg);
+  }
+
+  /// isAllocatable - Returns true when PhysReg belongs to an allocatable
+  /// register class and it hasn't been reserved.
+  ///
+  /// Allocatable registers may show up in the allocation order of some virtual
+  /// register, so a register allocator needs to track its liveness and
+  /// availability.
+  bool isAllocatable(unsigned PhysReg) const {
+    return TRI->get(PhysReg).inAllocatableClass && !isReserved(PhysReg);
+  }
+};
+} // end namespace llvm
+
+#endif
+
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index ebfe533..9e9a145 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -154,13 +154,16 @@ void RegScavenger::forward() {
   BitVector DeadRegs(NumPhysRegs);
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
-    if (!MO.isReg() || MO.isUndef())
+    if (!MO.isReg())
       continue;
     unsigned Reg = MO.getReg();
     if (!Reg || isReserved(Reg))
       continue;
 
     if (MO.isUse()) {
+      // Ignore undef uses.
+      if (MO.isUndef())
+        continue;
       // Two-address operands implicitly kill.
       if (!isPred && (MO.isKill() || MI->isRegTiedToDefOperand(i)))
         addRegWithSubRegs(KillRegs, Reg);
@@ -178,12 +181,14 @@ void RegScavenger::forward() {
   // Verify uses and defs.
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
-    if (!MO.isReg() || MO.isUndef())
+    if (!MO.isReg())
       continue;
     unsigned Reg = MO.getReg();
     if (!Reg || isReserved(Reg))
       continue;
     if (MO.isUse()) {
+      if (MO.isUndef())
+        continue;
       if (!isUsed(Reg)) {
         // Check if it's partial live: e.g.
         // D0 = insert_subreg D0<undef>, S0
diff --git a/lib/CodeGen/RenderMachineFunction.cpp b/lib/CodeGen/RenderMachineFunction.cpp
index cbfd5a2..8b02ec4 100644
--- a/lib/CodeGen/RenderMachineFunction.cpp
+++ b/lib/CodeGen/RenderMachineFunction.cpp
@@ -47,7 +47,7 @@ outputFileSuffix("rmf-file-suffix",
 
 static cl::opt<std::string>
 machineFuncsToRender("rmf-funcs",
-                     cl::desc("Coma seperated list of functions to render"
+                     cl::desc("Comma separated list of functions to render"
                               ", or \"*\"."),
                      cl::init(""), cl::Hidden);
 
@@ -434,8 +434,7 @@ namespace llvm {
            rcEnd = tri->regclass_end();
          rcItr != rcEnd; ++rcItr) {
       const TargetRegisterClass *trc = *rcItr;
-      unsigned capacity = std::distance(trc->allocation_order_begin(*mf),
-                                        trc->allocation_order_end(*mf));
+      unsigned capacity = trc->getRawAllocationOrder(*mf).size();
 
       if (capacity != 0)
         capacityMap[trc] = capacity;
@@ -482,8 +481,7 @@ namespace llvm {
                rcItr != rcEnd; ++rcItr) {
             const TargetRegisterClass *trc = *rcItr;
 
-            if (trc->allocation_order_begin(*mf) ==
-                trc->allocation_order_end(*mf))
+            if (trc->getRawAllocationOrder(*mf).empty())
               continue;
 
             unsigned worstAtI = getWorst(li->reg, trc);
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 3388889..f328493 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -19,17 +19,27 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include <climits>
 using namespace llvm;
 
+#ifndef NDEBUG
+cl::opt<bool> StressSchedOpt(
+  "stress-sched", cl::Hidden, cl::init(false),
+  cl::desc("Stress test instruction scheduling"));
+#endif
+
 ScheduleDAG::ScheduleDAG(MachineFunction &mf)
   : TM(mf.getTarget()),
     TII(TM.getInstrInfo()),
     TRI(TM.getRegisterInfo()),
     MF(mf), MRI(mf.getRegInfo()),
     EntrySU(), ExitSU() {
+#ifndef NDEBUG
+  StressSched = StressSchedOpt;
+#endif
 }
 
 ScheduleDAG::~ScheduleDAG() {}
@@ -307,6 +317,8 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
       if (I->isArtificial())
         dbgs() << " *";
       dbgs() << ": Latency=" << I->getLatency();
+      if (I->isAssignedRegDep())
+        dbgs() << " Reg=" << G->TRI->getName(I->getReg());
       dbgs() << "\n";
     }
   }
@@ -472,7 +484,7 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
 #endif
 }
 
-/// AddPred - Updates the topological ordering to accomodate an edge
+/// AddPred - Updates the topological ordering to accommodate an edge
 /// to be added from SUnit X to SUnit Y.
 void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) {
   int UpperBound, LowerBound;
@@ -490,7 +502,7 @@ void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) {
   }
 }
 
-/// RemovePred - Updates the topological ordering to accomodate an
+/// RemovePred - Updates the topological ordering to accommodate an
 /// an edge to be removed from the specified node N from the predecessors
 /// of the current node M.
 void ScheduleDAGTopologicalSort::RemovePred(SUnit *M, SUnit *N) {
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index f17023e..2363df4 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -35,8 +35,9 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
                                      const MachineDominatorTree &mdt)
   : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()),
     InstrItins(mf.getTarget().getInstrItineraryData()),
-    Defs(TRI->getNumRegs()), Uses(TRI->getNumRegs()), LoopRegs(MLI, MDT) {
-  DbgValueVec.clear();
+    Defs(TRI->getNumRegs()), Uses(TRI->getNumRegs()), 
+    LoopRegs(MLI, MDT), FirstDbgValue(0) {
+  DbgValues.clear();
 }
 
 /// Run - perform scheduling.
@@ -120,7 +121,7 @@ static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI,
     // such aliases.
     if (PSV->isAliased(MFI))
       return 0;
-    
+
     MayAlias = PSV->mayAlias(MFI);
     return V;
   }
@@ -174,7 +175,7 @@ void ScheduleDAGInstrs::AddSchedBarrierDeps() {
     for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
            SE = BB->succ_end(); SI != SE; ++SI)
       for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
-             E = (*SI)->livein_end(); I != E; ++I) {    
+             E = (*SI)->livein_end(); I != E; ++I) {
         unsigned Reg = *I;
         if (Seen.insert(Reg))
           Uses[Reg].push_back(&ExitSU);
@@ -200,11 +201,6 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
   std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs;
   std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
 
-  // Keep track of dangling debug references to registers.
-  std::vector<std::pair<MachineInstr*, unsigned> >
-    DanglingDebugValue(TRI->getNumRegs(),
-    std::make_pair(static_cast<MachineInstr*>(0), 0));
-
   // Check to see if the scheduler cares about latencies.
   bool UnitLatencies = ForceUnitLatencies();
 
@@ -214,26 +210,32 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
 
   // Remove any stale debug info; sometimes BuildSchedGraph is called again
   // without emitting the info from the previous call.
-  DbgValueVec.clear();
+  DbgValues.clear();
+  FirstDbgValue = NULL;
 
   // Model data dependencies between instructions being scheduled and the
   // ExitSU.
   AddSchedBarrierDeps();
 
+  for (int i = 0, e = TRI->getNumRegs(); i != e; ++i) {
+    assert(Defs[i].empty() && "Only BuildGraph should push/pop Defs");
+  }
+
   // Walk the list of instructions, from bottom moving up.
+  MachineInstr *PrevMI = NULL;
   for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin;
        MII != MIE; --MII) {
     MachineInstr *MI = prior(MII);
-    // DBG_VALUE does not have SUnit's built, so just remember these for later
-    // reinsertion.
+    if (MI && PrevMI) {
+      DbgValues.push_back(std::make_pair(PrevMI, MI));
+      PrevMI = NULL;
+    }
+
     if (MI->isDebugValue()) {
-      if (MI->getNumOperands()==3 && MI->getOperand(0).isReg() &&
-          MI->getOperand(0).getReg())
-        DanglingDebugValue[MI->getOperand(0).getReg()] =
-             std::make_pair(MI, DbgValueVec.size());
-      DbgValueVec.push_back(MI);
+      PrevMI = MI;
       continue;
     }
+
     const TargetInstrDesc &TID = MI->getDesc();
     assert(!TID.isTerminator() && !MI->isLabel() &&
            "Cannot schedule terminators or labels!");
@@ -257,13 +259,8 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
 
       assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!");
 
-      if (MO.isDef() && DanglingDebugValue[Reg].first!=0) {
-        SU->DbgInstrList.push_back(DanglingDebugValue[Reg].first);
-        DbgValueVec[DanglingDebugValue[Reg].second] = 0;
-        DanglingDebugValue[Reg] = std::make_pair((MachineInstr*)0, 0);
-      }
-
       std::vector<SUnit *> &UseList = Uses[Reg];
+      // Defs are push in the order they are visited and never reordered.
       std::vector<SUnit *> &DefList = Defs[Reg];
       // Optionally add output and anti dependencies. For anti
       // dependencies we use a latency of 0 because for a multi-issue
@@ -283,9 +280,9 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
           DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/Reg));
       }
       for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
-        std::vector<SUnit *> &DefList = Defs[*Alias];
-        for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
-          SUnit *DefSU = DefList[i];
+        std::vector<SUnit *> &MemDefList = Defs[*Alias];
+        for (unsigned i = 0, e = MemDefList.size(); i != e; ++i) {
+          SUnit *DefSU = MemDefList[i];
           if (DefSU == &ExitSU)
             continue;
           if (DefSU != SU &&
@@ -371,7 +368,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
                 // will be overlapped by work done outside the current
                 // scheduling region.
                 Latency -= std::min(Latency, Count);
-                // Add the artifical edge.
+                // Add the artificial edge.
                 ExitSU.addPred(SDep(SU, SDep::Order, Latency,
                                     /*Reg=*/0, /*isNormalMemory=*/false,
                                     /*isMustAlias=*/false,
@@ -393,6 +390,16 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
         UseList.clear();
         if (!MO.isDead())
           DefList.clear();
+
+        // Calls will not be reordered because of chain dependencies (see
+        // below). Since call operands are dead, calls may continue to be added
+        // to the DefList making dependence checking quadratic in the size of
+        // the block. Instead, we leave only one call at the back of the
+        // DefList.
+        if (SU->isCall) {
+          while (!DefList.empty() && DefList.back()->isCall)
+            DefList.pop_back();
+        }
         DefList.push_back(SU);
       } else {
         UseList.push_back(SU);
@@ -411,11 +418,11 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
 #define STORE_LOAD_LATENCY 1
     unsigned TrueMemOrderLatency = 0;
     if (TID.isCall() || MI->hasUnmodeledSideEffects() ||
-        (MI->hasVolatileMemoryRef() && 
+        (MI->hasVolatileMemoryRef() &&
          (!TID.mayLoad() || !MI->isInvariantLoad(AA)))) {
       // Be conservative with these and add dependencies on all memory
       // references, even those that are known to not alias.
-      for (std::map<const Value *, SUnit *>::iterator I = 
+      for (std::map<const Value *, SUnit *>::iterator I =
              NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) {
         I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
       }
@@ -458,9 +465,9 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
         // A store to a specific PseudoSourceValue. Add precise dependencies.
         // Record the def in MemDefs, first adding a dep if there is
         // an existing def.
-        std::map<const Value *, SUnit *>::iterator I = 
+        std::map<const Value *, SUnit *>::iterator I =
           ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
-        std::map<const Value *, SUnit *>::iterator IE = 
+        std::map<const Value *, SUnit *>::iterator IE =
           ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
         if (I != IE) {
           I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0,
@@ -513,39 +520,41 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
       if (MI->isInvariantLoad(AA)) {
         // Invariant load, no chain dependencies needed!
       } else {
-        if (const Value *V = 
+        if (const Value *V =
             getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
           // A load from a specific PseudoSourceValue. Add precise dependencies.
-          std::map<const Value *, SUnit *>::iterator I = 
+          std::map<const Value *, SUnit *>::iterator I =
             ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
-          std::map<const Value *, SUnit *>::iterator IE = 
+          std::map<const Value *, SUnit *>::iterator IE =
             ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
           if (I != IE)
             I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0,
                                     /*isNormalMemory=*/true));
           if (MayAlias)
             AliasMemUses[V].push_back(SU);
-          else 
+          else
             NonAliasMemUses[V].push_back(SU);
         } else {
           // A load with no underlying object. Depend on all
           // potentially aliasing stores.
-          for (std::map<const Value *, SUnit *>::iterator I = 
+          for (std::map<const Value *, SUnit *>::iterator I =
                  AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
             I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
-          
+
           PendingLoads.push_back(SU);
           MayAlias = true;
         }
-        
+
         // Add dependencies on alias and barrier chains, if needed.
         if (MayAlias && AliasChain)
           AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
         if (BarrierChain)
           BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
-      } 
+      }
     }
   }
+  if (PrevMI)
+    FirstDbgValue = PrevMI;
 
   for (int i = 0, e = TRI->getNumRegs(); i != e; ++i) {
     Defs[i].clear();
@@ -572,11 +581,11 @@ void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) {
   }
 }
 
-void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use, 
+void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use,
                                               SDep& dep) const {
   if (!InstrItins || InstrItins->isEmpty())
     return;
-  
+
   // For a data dependency with a known register...
   if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0))
     return;
@@ -655,39 +664,33 @@ MachineBasicBlock *ScheduleDAGInstrs::EmitSchedule() {
     BB->remove(I);
   }
 
-  // First reinsert any remaining debug_values; these are either constants,
-  // or refer to live-in registers.  The beginning of the block is the right
-  // place for the latter.  The former might reasonably be placed elsewhere
-  // using some kind of ordering algorithm, but right now it doesn't matter.
-  for (int i = DbgValueVec.size()-1; i>=0; --i)
-    if (DbgValueVec[i])
-      BB->insert(InsertPos, DbgValueVec[i]);
+  // If first instruction was a DBG_VALUE then put it back.
+  if (FirstDbgValue)
+    BB->insert(InsertPos, FirstDbgValue);
 
   // Then re-insert them according to the given schedule.
   for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
-    SUnit *SU = Sequence[i];
-    if (!SU) {
+    if (SUnit *SU = Sequence[i])
+      BB->insert(InsertPos, SU->getInstr());
+    else
       // Null SUnit* is a noop.
       EmitNoop();
-      continue;
-    }
-
-    BB->insert(InsertPos, SU->getInstr());
-    for (unsigned i = 0, e = SU->DbgInstrList.size() ; i < e ; ++i)
-      BB->insert(InsertPos, SU->DbgInstrList[i]);
   }
 
   // Update the Begin iterator, as the first instruction in the block
   // may have been scheduled later.
-  if (!DbgValueVec.empty()) {
-    for (int i = DbgValueVec.size()-1; i>=0; --i)
-      if (DbgValueVec[i]!=0) {
-        Begin = DbgValueVec[DbgValueVec.size()-1];
-        break;
-      }
-  } else if (!Sequence.empty())
+  if (!Sequence.empty())
     Begin = Sequence[0]->getInstr();
 
-  DbgValueVec.clear();
+  // Reinsert any remaining debug_values.
+  for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
+         DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
+    std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
+    MachineInstr *DbgValue = P.first;
+    MachineInstr *OrigPrivMI = P.second;
+    BB->insertAfter(OrigPrivMI, DbgValue);
+  }
+  DbgValues.clear();
+  FirstDbgValue = NULL;
   return BB;
 }
diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h
index c878287..8a4ea85 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.h
+++ b/lib/CodeGen/ScheduleDAGInstrs.h
@@ -110,10 +110,6 @@ namespace llvm {
     std::vector<std::vector<SUnit *> > Defs;
     std::vector<std::vector<SUnit *> > Uses;
  
-    /// DbgValueVec - Remember DBG_VALUEs that refer to a particular
-    /// register.
-    std::vector<MachineInstr *>DbgValueVec;
-
     /// PendingLoads - Remember where unknown loads are after the most recent
     /// unknown store, as we iterate. As with Defs and Uses, this is here
     /// to minimize construction/destruction.
@@ -128,6 +124,14 @@ namespace llvm {
     ///
     SmallSet<unsigned, 8> LoopLiveInRegs;
 
+  protected:
+
+    /// DbgValues - Remember instruction that preceeds DBG_VALUE.
+    typedef std::vector<std::pair<MachineInstr *, MachineInstr *> > 
+      DbgValueVector;
+    DbgValueVector DbgValues;
+    MachineInstr *FirstDbgValue;
+
   public:
     MachineBasicBlock::iterator Begin;    // The beginning of the range to
                                           // be scheduled. The range extends
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index a9afec8..4ac590a 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -138,6 +138,10 @@ namespace {
     SDValue PromoteExtend(SDValue Op);
     bool PromoteLoad(SDValue Op);
 
+    void ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs,
+                         SDValue Trunc, SDValue ExtLoad, DebugLoc DL,
+                         ISD::NodeType ExtType);
+
     /// combine - call the node-specific routine that knows how to fold each
     /// particular type of node. If that doesn't do anything, try the
     /// target-specific DAG combines.
@@ -165,6 +169,8 @@ namespace {
     SDValue visitMULHS(SDNode *N);
     SDValue visitSMUL_LOHI(SDNode *N);
     SDValue visitUMUL_LOHI(SDNode *N);
+    SDValue visitSMULO(SDNode *N);
+    SDValue visitUMULO(SDNode *N);
     SDValue visitSDIVREM(SDNode *N);
     SDValue visitUDIVREM(SDNode *N);
     SDValue visitAND(SDNode *N);
@@ -529,7 +535,8 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL,
                                    cast<ConstantSDNode>(N0.getOperand(1)),
                                    cast<ConstantSDNode>(N1));
       return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
-    } else if (N0.hasOneUse()) {
+    }
+    if (N0.hasOneUse()) {
       // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use
       SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT,
                                    N0.getOperand(0), N1);
@@ -546,7 +553,8 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL,
                                    cast<ConstantSDNode>(N1.getOperand(1)),
                                    cast<ConstantSDNode>(N0));
       return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
-    } else if (N1.hasOneUse()) {
+    }
+    if (N1.hasOneUse()) {
       // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use
       SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT,
                                    N1.getOperand(0), N0);
@@ -990,6 +998,9 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
           dbgs() << "\nWith: ";
           RV.getNode()->dump(&DAG);
           dbgs() << '\n');
+    
+    // Transfer debug value.
+    DAG.TransferDbgValues(SDValue(N, 0), RV);
     WorkListRemover DeadNodes(*this);
     if (N->getNumValues() == RV.getNode()->getNumValues())
       DAG.ReplaceAllUsesWith(N, RV.getNode(), &DeadNodes);
@@ -1045,6 +1056,8 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::MULHS:              return visitMULHS(N);
   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
+  case ISD::SMULO:              return visitSMULO(N);
+  case ISD::UMULO:              return visitUMULO(N);
   case ISD::SDIVREM:            return visitSDIVREM(N);
   case ISD::UDIVREM:            return visitUDIVREM(N);
   case ISD::AND:                return visitAND(N);
@@ -1566,7 +1579,8 @@ static SDValue tryFoldToZero(DebugLoc DL, const TargetLowering &TLI, EVT VT,
                              SelectionDAG &DAG, bool LegalOperations) {
   if (!VT.isVector()) {
     return DAG.getConstant(0, VT);
-  } else if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
+  }
+  if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
     // Produce a vector of zeros.
     SDValue El = DAG.getConstant(0, VT.getVectorElementType());
     std::vector<SDValue> Ops(VT.getVectorNumElements(), El);
@@ -2174,6 +2188,26 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
   return SDValue();
 }
 
+SDValue DAGCombiner::visitSMULO(SDNode *N) {
+  // (smulo x, 2) -> (saddo x, x)
+  if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+    if (C2->getAPIntValue() == 2)
+      return DAG.getNode(ISD::SADDO, N->getDebugLoc(), N->getVTList(),
+                         N->getOperand(0), N->getOperand(0));
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitUMULO(SDNode *N) {
+  // (umulo x, 2) -> (uaddo x, x)
+  if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+    if (C2->getAPIntValue() == 2)
+      return DAG.getNode(ISD::UADDO, N->getDebugLoc(), N->getVTList(),
+                         N->getOperand(0), N->getOperand(0));
+
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitSDIVREM(SDNode *N) {
   SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM);
   if (Res.getNode()) return Res;
@@ -3000,6 +3034,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
   // fold (shl x, 0) -> x
   if (N1C && N1C->isNullValue())
     return N0;
+  // fold (shl undef, x) -> 0
+  if (N0.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
   // if (shl x, c) is known to be zero, return 0
   if (DAG.MaskedValueIsZero(SDValue(N, 0),
                             APInt::getAllOnesValue(OpSizeInBits)))
@@ -3062,26 +3099,27 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
     }
   }
 
-  // fold (shl (srl x, c1), c2) -> (shl (and x, (shl -1, c1)), (sub c2, c1)) or
-  //                               (srl (and x, (shl -1, c1)), (sub c1, c2))
+  // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
+  //                               (and (srl x, (sub c1, c2), MASK)
   if (N1C && N0.getOpcode() == ISD::SRL &&
       N0.getOperand(1).getOpcode() == ISD::Constant) {
     uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
     if (c1 < VT.getSizeInBits()) {
       uint64_t c2 = N1C->getZExtValue();
-      SDValue HiBitsMask =
-        DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(),
-                                              VT.getSizeInBits() - c1),
-                        VT);
-      SDValue Mask = DAG.getNode(ISD::AND, N0.getDebugLoc(), VT,
-                                 N0.getOperand(0),
-                                 HiBitsMask);
-      if (c2 > c1)
-        return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, Mask,
-                           DAG.getConstant(c2-c1, N1.getValueType()));
-      else
-        return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Mask,
-                           DAG.getConstant(c1-c2, N1.getValueType()));
+      APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
+                                         VT.getSizeInBits() - c1);
+      SDValue Shift;
+      if (c2 > c1) {
+        Mask = Mask.shl(c2-c1);
+        Shift = DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0),
+                            DAG.getConstant(c2-c1, N1.getValueType()));
+      } else {
+        Mask = Mask.lshr(c1-c2);
+        Shift = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0),
+                            DAG.getConstant(c1-c2, N1.getValueType()));
+      }
+      return DAG.getNode(ISD::AND, N0.getDebugLoc(), VT, Shift,
+                         DAG.getConstant(Mask, VT));
     }
   }
   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
@@ -3323,8 +3361,10 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
       return DAG.getUNDEF(VT);
 
     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
+      uint64_t ShiftAmt = N1C->getZExtValue();
       SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT,
-                                       N0.getOperand(0), N1);
+                                       N0.getOperand(0),
+                          DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT)));
       AddToWorkList(SmallShift.getNode());
       return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift);
     }
@@ -3663,6 +3703,28 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
   return true;
 }
 
+void DAGCombiner::ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs,
+                                  SDValue Trunc, SDValue ExtLoad, DebugLoc DL,
+                                  ISD::NodeType ExtType) {
+  // Extend SetCC uses if necessary.
+  for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
+    SDNode *SetCC = SetCCs[i];
+    SmallVector<SDValue, 4> Ops;
+
+    for (unsigned j = 0; j != 2; ++j) {
+      SDValue SOp = SetCC->getOperand(j);
+      if (SOp == Trunc)
+        Ops.push_back(ExtLoad);
+      else
+        Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
+    }
+
+    Ops.push_back(SetCC->getOperand(2));
+    CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0),
+                                 &Ops[0], Ops.size()));
+  }
+}
+
 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
@@ -3751,27 +3813,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
                                   N0.getValueType(), ExtLoad);
       CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
-
-      // Extend SetCC uses if necessary.
-      for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
-        SDNode *SetCC = SetCCs[i];
-        SmallVector<SDValue, 4> Ops;
-
-        for (unsigned j = 0; j != 2; ++j) {
-          SDValue SOp = SetCC->getOperand(j);
-          if (SOp == Trunc)
-            Ops.push_back(ExtLoad);
-          else
-            Ops.push_back(DAG.getNode(ISD::SIGN_EXTEND,
-                                      N->getDebugLoc(), VT, SOp));
-        }
-
-        Ops.push_back(SetCC->getOperand(2));
-        CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(),
-                                     SetCC->getValueType(0),
-                                     &Ops[0], Ops.size()));
-      }
-
+      ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(),
+                      ISD::SIGN_EXTEND);
       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
     }
   }
@@ -3799,6 +3842,45 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
     }
   }
 
+  // fold (sext (and/or/xor (load x), cst)) ->
+  //      (and/or/xor (sextload x), (sext cst))
+  if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
+       N0.getOpcode() == ISD::XOR) &&
+      isa<LoadSDNode>(N0.getOperand(0)) &&
+      N0.getOperand(1).getOpcode() == ISD::Constant &&
+      TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()) &&
+      (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
+    if (LN0->getExtensionType() != ISD::ZEXTLOAD) {
+      bool DoXform = true;
+      SmallVector<SDNode*, 4> SetCCs;
+      if (!N0.hasOneUse())
+        DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND,
+                                          SetCCs, TLI);
+      if (DoXform) {
+        SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, LN0->getDebugLoc(), VT,
+                                         LN0->getChain(), LN0->getBasePtr(),
+                                         LN0->getPointerInfo(),
+                                         LN0->getMemoryVT(),
+                                         LN0->isVolatile(),
+                                         LN0->isNonTemporal(),
+                                         LN0->getAlignment());
+        APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+        Mask = Mask.sext(VT.getSizeInBits());
+        SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,
+                                  ExtLoad, DAG.getConstant(Mask, VT));
+        SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
+                                    N0.getOperand(0).getDebugLoc(),
+                                    N0.getOperand(0).getValueType(), ExtLoad);
+        CombineTo(N, And);
+        CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
+        ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(),
+                        ISD::SIGN_EXTEND);
+        return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+      }
+    }
+  }
+
   if (N0.getOpcode() == ISD::SETCC) {
     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
     // Only do this before legalize for now.
@@ -3882,7 +3964,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
         // CombineTo deleted the truncate, if needed, but not what's under it.
         AddToWorkList(oye);
       }
-      return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, NarrowLoad);
+      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
     }
   }
 
@@ -3957,27 +4039,48 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
                                   N0.getValueType(), ExtLoad);
       CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
 
-      // Extend SetCC uses if necessary.
-      for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
-        SDNode *SetCC = SetCCs[i];
-        SmallVector<SDValue, 4> Ops;
-
-        for (unsigned j = 0; j != 2; ++j) {
-          SDValue SOp = SetCC->getOperand(j);
-          if (SOp == Trunc)
-            Ops.push_back(ExtLoad);
-          else
-            Ops.push_back(DAG.getNode(ISD::ZERO_EXTEND,
-                                      N->getDebugLoc(), VT, SOp));
-        }
+      ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(),
+                      ISD::ZERO_EXTEND);
+      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+    }
+  }
 
-        Ops.push_back(SetCC->getOperand(2));
-        CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(),
-                                     SetCC->getValueType(0),
-                                     &Ops[0], Ops.size()));
+  // fold (zext (and/or/xor (load x), cst)) ->
+  //      (and/or/xor (zextload x), (zext cst))
+  if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
+       N0.getOpcode() == ISD::XOR) &&
+      isa<LoadSDNode>(N0.getOperand(0)) &&
+      N0.getOperand(1).getOpcode() == ISD::Constant &&
+      TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()) &&
+      (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
+    if (LN0->getExtensionType() != ISD::SEXTLOAD) {
+      bool DoXform = true;
+      SmallVector<SDNode*, 4> SetCCs;
+      if (!N0.hasOneUse())
+        DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::ZERO_EXTEND,
+                                          SetCCs, TLI);
+      if (DoXform) {
+        SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT,
+                                         LN0->getChain(), LN0->getBasePtr(),
+                                         LN0->getPointerInfo(),
+                                         LN0->getMemoryVT(),
+                                         LN0->isVolatile(),
+                                         LN0->isNonTemporal(),
+                                         LN0->getAlignment());
+        APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+        Mask = Mask.zext(VT.getSizeInBits());
+        SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,
+                                  ExtLoad, DAG.getConstant(Mask, VT));
+        SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
+                                    N0.getOperand(0).getDebugLoc(),
+                                    N0.getOperand(0).getValueType(), ExtLoad);
+        CombineTo(N, And);
+        CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
+        ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(),
+                        ISD::ZERO_EXTEND);
+        return SDValue(N, 0);   // Return N so it doesn't get rechecked!
       }
-
-      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
     }
   }
 
@@ -4012,7 +4115,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
       EVT EltVT = VT.getVectorElementType();
       SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(),
                                     DAG.getConstant(1, EltVT));
-      if (VT.getSizeInBits() == N0VT.getSizeInBits()) {
+      if (VT.getSizeInBits() == N0VT.getSizeInBits())
         // We know that the # elements of the results is the same as the
         // # elements of the compare (and the # elements of the compare result
         // for that matter).  Check to see that they are the same size.  If so,
@@ -4024,25 +4127,24 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
                                  cast<CondCodeSDNode>(N0.getOperand(2))->get()),
                            DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
                                        &OneOps[0], OneOps.size()));
-      } else {
-        // If the desired elements are smaller or larger than the source
-        // elements we can use a matching integer vector type and then
-        // truncate/sign extend
-        EVT MatchingElementType =
-          EVT::getIntegerVT(*DAG.getContext(),
-                            N0VT.getScalarType().getSizeInBits());
-        EVT MatchingVectorType =
-          EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
-                           N0VT.getVectorNumElements());
-        SDValue VsetCC =
-          DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
-                        N0.getOperand(1),
-                        cast<CondCodeSDNode>(N0.getOperand(2))->get());
-        return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
-                           DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT),
-                           DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
-                                       &OneOps[0], OneOps.size()));
-      }
+
+      // If the desired elements are smaller or larger than the source
+      // elements we can use a matching integer vector type and then
+      // truncate/sign extend
+      EVT MatchingElementType =
+        EVT::getIntegerVT(*DAG.getContext(),
+                          N0VT.getScalarType().getSizeInBits());
+      EVT MatchingVectorType =
+        EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
+                         N0VT.getVectorNumElements());
+      SDValue VsetCC =
+        DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
+                      N0.getOperand(1),
+                      cast<CondCodeSDNode>(N0.getOperand(2))->get());
+      return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+                         DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT),
+                         DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
+                                     &OneOps[0], OneOps.size()));
     }
 
     // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
@@ -4110,7 +4212,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
         // CombineTo deleted the truncate, if needed, but not what's under it.
         AddToWorkList(oye);
       }
-      return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, NarrowLoad);
+      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
     }
   }
 
@@ -4166,27 +4268,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
                                   N0.getValueType(), ExtLoad);
       CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
-
-      // Extend SetCC uses if necessary.
-      for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
-        SDNode *SetCC = SetCCs[i];
-        SmallVector<SDValue, 4> Ops;
-
-        for (unsigned j = 0; j != 2; ++j) {
-          SDValue SOp = SetCC->getOperand(j);
-          if (SOp == Trunc)
-            Ops.push_back(ExtLoad);
-          else
-            Ops.push_back(DAG.getNode(ISD::ANY_EXTEND,
-                                      N->getDebugLoc(), VT, SOp));
-        }
-
-        Ops.push_back(SetCC->getOperand(2));
-        CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(),
-                                     SetCC->getValueType(0),
-                                     &Ops[0], Ops.size()));
-      }
-
+      ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(),
+                      ISD::ANY_EXTEND);
       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
     }
   }
@@ -6265,6 +6348,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
                           ST->isNonTemporal(), OrigAlign);
   }
 
+  // Turn 'store undef, Ptr' -> nothing.
+  if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed())
+    return Chain;
+
   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) {
     // NOTE: If the original store is volatile, this transform must not increase
@@ -6298,8 +6385,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
           return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
                               Ptr, ST->getPointerInfo(), ST->isVolatile(),
                               ST->isNonTemporal(), ST->getAlignment());
-        } else if (!ST->isVolatile() &&
-                   TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
+        }
+
+        if (!ST->isVolatile() &&
+            TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
           // Many FP stores are not made apparent until after legalize, e.g. for
           // argument passing.  Since this is so common, custom legalize the
           // 64-bit integer store into two 32-bit stores.
@@ -6393,8 +6482,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
     SDValue Shorter =
       GetDemandedBits(Value,
-                      APInt::getLowBitsSet(Value.getValueSizeInBits(),
-                                           ST->getMemoryVT().getSizeInBits()));
+                      APInt::getLowBitsSet(
+                        Value.getValueType().getScalarType().getSizeInBits(),
+                        ST->getMemoryVT().getScalarType().getSizeInBits()));
     AddToWorkList(Value.getNode());
     if (Shorter.getNode())
       return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter,
@@ -6486,18 +6576,18 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
   // (vextract (scalar_to_vector val, 0) -> val
   SDValue InVec = N->getOperand(0);
 
- if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
-   // Check if the result type doesn't match the inserted element type. A
-   // SCALAR_TO_VECTOR may truncate the inserted element and the
-   // EXTRACT_VECTOR_ELT may widen the extracted vector.
-   SDValue InOp = InVec.getOperand(0);
-   EVT NVT = N->getValueType(0);
-   if (InOp.getValueType() != NVT) {
-     assert(InOp.getValueType().isInteger() && NVT.isInteger());
-     return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT);
-   }
-   return InOp;
- }
+  if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+    // Check if the result type doesn't match the inserted element type. A
+    // SCALAR_TO_VECTOR may truncate the inserted element and the
+    // EXTRACT_VECTOR_ELT may widen the extracted vector.
+    SDValue InOp = InVec.getOperand(0);
+    EVT NVT = N->getValueType(0);
+    if (InOp.getValueType() != NVT) {
+      assert(InOp.getValueType().isInteger() && NVT.isInteger());
+      return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT);
+    }
+    return InOp;
+  }
 
   // Perform only after legalization to ensure build_vector / vector_shuffle
   // optimizations have already been done.
@@ -6558,7 +6648,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
       }
     }
 
-    if (!LN0 || !LN0->hasOneUse() || LN0->isVolatile())
+    if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
       return SDValue();
 
     // If Idx was -1 above, Elt is going to be -1, so just return undef.
@@ -7497,18 +7587,17 @@ bool DAGCombiner::FindAliasInfo(SDNode *N,
     SrcValueAlign = LD->getOriginalAlignment();
     TBAAInfo = LD->getTBAAInfo();
     return true;
-  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+  }
+  if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
     Ptr = ST->getBasePtr();
     Size = ST->getMemoryVT().getSizeInBits() >> 3;
     SrcValue = ST->getSrcValue();
     SrcValueOffset = ST->getSrcValueOffset();
     SrcValueAlign = ST->getOriginalAlignment();
     TBAAInfo = ST->getTBAAInfo();
-  } else {
-    llvm_unreachable("FindAliasInfo expected a memory operand");
+    return false;
   }
-
-  return false;
+  llvm_unreachable("FindAliasInfo expected a memory operand");
 }
 
 /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
@@ -7621,13 +7710,13 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
   // Accumulate all the aliases to this node.
   GatherAllAliases(N, OldChain, Aliases);
 
-  if (Aliases.size() == 0) {
-    // If no operands then chain to entry token.
+  // If no operands then chain to entry token.
+  if (Aliases.size() == 0)
     return DAG.getEntryNode();
-  } else if (Aliases.size() == 1) {
-    // If a single operand then chain to it.  We don't need to revisit it.
+
+  // If a single operand then chain to it.  We don't need to revisit it.
+  if (Aliases.size() == 1)
     return Aliases[0];
-  }
 
   // Construct a custom tailored token factor.
   return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index ea8ace3..797f174 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -43,6 +43,8 @@
 #include "llvm/GlobalVariable.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/Operator.h"
+#include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -109,8 +111,8 @@ unsigned FastISel::getRegForValue(const Value *V) {
   // of whether FastISel can handle them.
   MVT VT = RealVT.getSimpleVT();
   if (!TLI.isTypeLegal(VT)) {
-    // Promote MVT::i1 to a legal type though, because it's common and easy.
-    if (VT == MVT::i1)
+    // Handle integer promotions, though, because they're common and easy.
+    if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
       VT = TLI.getTypeToTransformTo(V->getContext(), VT).getSimpleVT();
     else
       return 0;
@@ -121,10 +123,9 @@ unsigned FastISel::getRegForValue(const Value *V) {
   // only locally. This is because Instructions already have the SSA
   // def-dominates-use requirement enforced.
   DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V);
-  if (I != FuncInfo.ValueMap.end()) {
-    unsigned Reg = I->second;
-    return Reg;
-  }
+  if (I != FuncInfo.ValueMap.end())
+    return I->second;
+
   unsigned Reg = LocalValueMap[V];
   if (Reg != 0)
     return Reg;
@@ -164,8 +165,12 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
     Reg =
       getRegForValue(Constant::getNullValue(TD.getIntPtrType(V->getContext())));
   } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
-    // Try to emit the constant directly.
-    Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF);
+    if (CF->isNullValue()) {
+      Reg = TargetMaterializeFloatZero(CF);
+    } else {
+      // Try to emit the constant directly.
+      Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF);
+    }
 
     if (!Reg) {
       // Try to emit the constant by using an integer constant with a cast.
@@ -230,10 +235,10 @@ unsigned FastISel::lookUpRegForValue(const Value *V) {
 /// NOTE: This is only necessary because we might select a block that uses
 /// a value before we select the block that defines the value.  It might be
 /// possible to fix this by selecting blocks in reverse postorder.
-unsigned FastISel::UpdateValueMap(const Value *I, unsigned Reg) {
+void FastISel::UpdateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) {
   if (!isa<Instruction>(I)) {
     LocalValueMap[I] = Reg;
-    return Reg;
+    return;
   }
 
   unsigned &AssignedReg = FuncInfo.ValueMap[I];
@@ -242,12 +247,11 @@ unsigned FastISel::UpdateValueMap(const Value *I, unsigned Reg) {
     AssignedReg = Reg;
   else if (Reg != AssignedReg) {
     // Arrange for uses of AssignedReg to be replaced by uses of Reg.
-    FuncInfo.RegFixups[AssignedReg] = Reg;
+    for (unsigned i = 0; i < NumRegs; i++)
+      FuncInfo.RegFixups[AssignedReg+i] = Reg+i;
 
     AssignedReg = Reg;
   }
-
-  return AssignedReg;
 }
 
 std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) {
@@ -330,23 +334,51 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) {
       return false;
   }
 
+  // Check if the first operand is a constant, and handle it as "ri".  At -O0,
+  // we don't have anything that canonicalizes operand order.
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(0)))
+    if (isa<Instruction>(I) && cast<Instruction>(I)->isCommutative()) {
+      unsigned Op1 = getRegForValue(I->getOperand(1));
+      if (Op1 == 0) return false;
+
+      bool Op1IsKill = hasTrivialKill(I->getOperand(1));
+
+      unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op1,
+                                        Op1IsKill, CI->getZExtValue(),
+                                        VT.getSimpleVT());
+      if (ResultReg == 0) return false;
+
+      // We successfully emitted code for the given LLVM Instruction.
+      UpdateValueMap(I, ResultReg);
+      return true;
+    }
+
+
   unsigned Op0 = getRegForValue(I->getOperand(0));
-  if (Op0 == 0)
-    // Unhandled operand. Halt "fast" selection and bail.
+  if (Op0 == 0)   // Unhandled operand. Halt "fast" selection and bail.
     return false;
 
   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
 
   // Check if the second operand is a constant and handle it appropriately.
   if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
-    unsigned ResultReg = FastEmit_ri(VT.getSimpleVT(), VT.getSimpleVT(),
-                                     ISDOpcode, Op0, Op0IsKill,
-                                     CI->getZExtValue());
-    if (ResultReg != 0) {
-      // We successfully emitted code for the given LLVM Instruction.
-      UpdateValueMap(I, ResultReg);
-      return true;
+    uint64_t Imm = CI->getZExtValue();
+
+    // Transform "sdiv exact X, 8" -> "sra X, 3".
+    if (ISDOpcode == ISD::SDIV && isa<BinaryOperator>(I) &&
+        cast<BinaryOperator>(I)->isExact() &&
+        isPowerOf2_64(Imm)) {
+      Imm = Log2_64(Imm);
+      ISDOpcode = ISD::SRA;
     }
+
+    unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0,
+                                      Op0IsKill, Imm, VT.getSimpleVT());
+    if (ResultReg == 0) return false;
+
+    // We successfully emitted code for the given LLVM Instruction.
+    UpdateValueMap(I, ResultReg);
+    return true;
   }
 
   // Check if the second operand is a constant float.
@@ -454,15 +486,35 @@ bool FastISel::SelectGetElementPtr(const User *I) {
 }
 
 bool FastISel::SelectCall(const User *I) {
-  const Function *F = cast<CallInst>(I)->getCalledFunction();
+  const CallInst *Call = cast<CallInst>(I);
+
+  // Handle simple inline asms.
+  if (const InlineAsm *IA = dyn_cast<InlineAsm>(Call->getArgOperand(0))) {
+    // Don't attempt to handle constraints.
+    if (!IA->getConstraintString().empty())
+      return false;
+
+    unsigned ExtraInfo = 0;
+    if (IA->hasSideEffects())
+      ExtraInfo |= InlineAsm::Extra_HasSideEffects;
+    if (IA->isAlignStack())
+      ExtraInfo |= InlineAsm::Extra_IsAlignStack;
+
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+            TII.get(TargetOpcode::INLINEASM))
+      .addExternalSymbol(IA->getAsmString().c_str())
+      .addImm(ExtraInfo);
+    return true;
+  }
+
+  const Function *F = Call->getCalledFunction();
   if (!F) return false;
 
   // Handle selected intrinsic function calls.
-  unsigned IID = F->getIntrinsicID();
-  switch (IID) {
+  switch (F->getIntrinsicID()) {
   default: break;
   case Intrinsic::dbg_declare: {
-    const DbgDeclareInst *DI = cast<DbgDeclareInst>(I);
+    const DbgDeclareInst *DI = cast<DbgDeclareInst>(Call);
     if (!DIVariable(DI->getVariable()).Verify() ||
         !FuncInfo.MF->getMMI().hasDebugInfo())
       return true;
@@ -494,7 +546,7 @@ bool FastISel::SelectCall(const User *I) {
   }
   case Intrinsic::dbg_value: {
     // This form of DBG_VALUE is target-independent.
-    const DbgValueInst *DI = cast<DbgValueInst>(I);
+    const DbgValueInst *DI = cast<DbgValueInst>(Call);
     const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
     const Value *V = DI->getValue();
     if (!V) {
@@ -523,65 +575,68 @@ bool FastISel::SelectCall(const User *I) {
     return true;
   }
   case Intrinsic::eh_exception: {
-    EVT VT = TLI.getValueType(I->getType());
-    switch (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)) {
-    default: break;
-    case TargetLowering::Expand: {
-      assert(FuncInfo.MBB->isLandingPad() &&
-             "Call to eh.exception not in landing pad!");
-      unsigned Reg = TLI.getExceptionAddressRegister();
-      const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
-      unsigned ResultReg = createResultReg(RC);
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
-              ResultReg).addReg(Reg);
-      UpdateValueMap(I, ResultReg);
-      return true;
-    }
-    }
-    break;
+    EVT VT = TLI.getValueType(Call->getType());
+    if (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)!=TargetLowering::Expand)
+      break;
+
+    assert(FuncInfo.MBB->isLandingPad() &&
+           "Call to eh.exception not in landing pad!");
+    unsigned Reg = TLI.getExceptionAddressRegister();
+    const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
+    unsigned ResultReg = createResultReg(RC);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+            ResultReg).addReg(Reg);
+    UpdateValueMap(Call, ResultReg);
+    return true;
   }
   case Intrinsic::eh_selector: {
-    EVT VT = TLI.getValueType(I->getType());
-    switch (TLI.getOperationAction(ISD::EHSELECTION, VT)) {
-    default: break;
-    case TargetLowering::Expand: {
-      if (FuncInfo.MBB->isLandingPad())
-        AddCatchInfo(*cast<CallInst>(I), &FuncInfo.MF->getMMI(), FuncInfo.MBB);
-      else {
+    EVT VT = TLI.getValueType(Call->getType());
+    if (TLI.getOperationAction(ISD::EHSELECTION, VT) != TargetLowering::Expand)
+      break;
+    if (FuncInfo.MBB->isLandingPad())
+      AddCatchInfo(*Call, &FuncInfo.MF->getMMI(), FuncInfo.MBB);
+    else {
 #ifndef NDEBUG
-        FuncInfo.CatchInfoLost.insert(cast<CallInst>(I));
+      FuncInfo.CatchInfoLost.insert(Call);
 #endif
-        // FIXME: Mark exception selector register as live in.  Hack for PR1508.
-        unsigned Reg = TLI.getExceptionSelectorRegister();
-        if (Reg) FuncInfo.MBB->addLiveIn(Reg);
-      }
-
+      // FIXME: Mark exception selector register as live in.  Hack for PR1508.
       unsigned Reg = TLI.getExceptionSelectorRegister();
-      EVT SrcVT = TLI.getPointerTy();
-      const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT);
-      unsigned ResultReg = createResultReg(RC);
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
-              ResultReg).addReg(Reg);
-
-      bool ResultRegIsKill = hasTrivialKill(I);
-
-      // Cast the register to the type of the selector.
-      if (SrcVT.bitsGT(MVT::i32))
-        ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE,
-                               ResultReg, ResultRegIsKill);
-      else if (SrcVT.bitsLT(MVT::i32))
-        ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32,
-                               ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill);
-      if (ResultReg == 0)
-        // Unhandled operand. Halt "fast" selection and bail.
-        return false;
+      if (Reg) FuncInfo.MBB->addLiveIn(Reg);
+    }
 
-      UpdateValueMap(I, ResultReg);
+    unsigned Reg = TLI.getExceptionSelectorRegister();
+    EVT SrcVT = TLI.getPointerTy();
+    const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT);
+    unsigned ResultReg = createResultReg(RC);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+            ResultReg).addReg(Reg);
+
+    bool ResultRegIsKill = hasTrivialKill(Call);
+
+    // Cast the register to the type of the selector.
+    if (SrcVT.bitsGT(MVT::i32))
+      ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE,
+                             ResultReg, ResultRegIsKill);
+    else if (SrcVT.bitsLT(MVT::i32))
+      ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32,
+                             ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill);
+    if (ResultReg == 0)
+      // Unhandled operand. Halt "fast" selection and bail.
+      return false;
 
-      return true;
-    }
-    }
-    break;
+    UpdateValueMap(Call, ResultReg);
+
+    return true;
+  }
+  case Intrinsic::objectsize: {
+    ConstantInt *CI = cast<ConstantInt>(Call->getArgOperand(1));
+    unsigned long long Res = CI->isZero() ? -1ULL : 0;
+    Constant *ResCI = ConstantInt::get(Call->getType(), Res);
+    unsigned ResultReg = getRegForValue(ResCI);
+    if (ResultReg == 0)
+      return false;
+    UpdateValueMap(Call, ResultReg);
+    return true;
   }
   }
 
@@ -598,21 +653,13 @@ bool FastISel::SelectCast(const User *I, unsigned Opcode) {
     // Unhandled type. Halt "fast" selection and bail.
     return false;
 
-  // Check if the destination type is legal. Or as a special case,
-  // it may be i1 if we're doing a truncate because that's
-  // easy and somewhat common.
+  // Check if the destination type is legal.
   if (!TLI.isTypeLegal(DstVT))
-    if (DstVT != MVT::i1 || Opcode != ISD::TRUNCATE)
-      // Unhandled type. Halt "fast" selection and bail.
-      return false;
+    return false;
 
-  // Check if the source operand is legal. Or as a special case,
-  // it may be i1 if we're doing zero-extension because that's
-  // easy and somewhat common.
+  // Check if the source operand is legal.
   if (!TLI.isTypeLegal(SrcVT))
-    if (SrcVT != MVT::i1 || Opcode != ISD::ZERO_EXTEND)
-      // Unhandled type. Halt "fast" selection and bail.
-      return false;
+    return false;
 
   unsigned InputReg = getRegForValue(I->getOperand(0));
   if (!InputReg)
@@ -621,18 +668,6 @@ bool FastISel::SelectCast(const User *I, unsigned Opcode) {
 
   bool InputRegIsKill = hasTrivialKill(I->getOperand(0));
 
-  // If the operand is i1, arrange for the high bits in the register to be zero.
-  if (SrcVT == MVT::i1) {
-   SrcVT = TLI.getTypeToTransformTo(I->getContext(), SrcVT);
-   InputReg = FastEmitZExtFromI1(SrcVT.getSimpleVT(), InputReg, InputRegIsKill);
-   if (!InputReg)
-     return false;
-   InputRegIsKill = true;
-  }
-  // If the result is i1, truncate to the target's type for i1 first.
-  if (DstVT == MVT::i1)
-    DstVT = TLI.getTypeToTransformTo(I->getContext(), DstVT);
-
   unsigned ResultReg = FastEmit_r(SrcVT.getSimpleVT(),
                                   DstVT.getSimpleVT(),
                                   Opcode,
@@ -784,6 +819,47 @@ FastISel::SelectFNeg(const User *I) {
 }
 
 bool
+FastISel::SelectExtractValue(const User *U) {
+  const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(U);
+  if (!EVI)
+    return false;
+
+  // Make sure we only try to handle extracts with a legal result.  But also
+  // allow i1 because it's easy.
+  EVT RealVT = TLI.getValueType(EVI->getType(), /*AllowUnknown=*/true);
+  if (!RealVT.isSimple())
+    return false;
+  MVT VT = RealVT.getSimpleVT();
+  if (!TLI.isTypeLegal(VT) && VT != MVT::i1)
+    return false;
+
+  const Value *Op0 = EVI->getOperand(0);
+  const Type *AggTy = Op0->getType();
+
+  // Get the base result register.
+  unsigned ResultReg;
+  DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(Op0);
+  if (I != FuncInfo.ValueMap.end())
+    ResultReg = I->second;
+  else if (isa<Instruction>(Op0))
+    ResultReg = FuncInfo.InitializeRegForValue(Op0);
+  else
+    return false; // fast-isel can't handle aggregate constants at the moment
+
+  // Get the actual result register, which is an offset from the base register.
+  unsigned VTIndex = ComputeLinearIndex(AggTy, EVI->idx_begin(), EVI->idx_end());
+
+  SmallVector<EVT, 4> AggValueVTs;
+  ComputeValueVTs(TLI, AggTy, AggValueVTs);
+
+  for (unsigned i = 0; i < VTIndex; i++)
+    ResultReg += TLI.getNumRegisters(FuncInfo.Fn->getContext(), AggValueVTs[i]);
+
+  UpdateValueMap(EVI, ResultReg);
+  return true;
+}
+
+bool
 FastISel::SelectOperator(const User *I, unsigned Opcode) {
   switch (Opcode) {
   case Instruction::Add:
@@ -887,6 +963,9 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) {
     return true;
   }
 
+  case Instruction::ExtractValue:
+    return SelectExtractValue(I);
+
   case Instruction::PHI:
     llvm_unreachable("FastISel shouldn't visit PHI nodes!");
 
@@ -966,59 +1045,33 @@ unsigned FastISel::FastEmit_rri(MVT, MVT,
 unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode,
                                 unsigned Op0, bool Op0IsKill,
                                 uint64_t Imm, MVT ImmType) {
+  // If this is a multiply by a power of two, emit this as a shift left.
+  if (Opcode == ISD::MUL && isPowerOf2_64(Imm)) {
+    Opcode = ISD::SHL;
+    Imm = Log2_64(Imm);
+  } else if (Opcode == ISD::UDIV && isPowerOf2_64(Imm)) {
+    // div x, 8 -> srl x, 3
+    Opcode = ISD::SRL;
+    Imm = Log2_64(Imm);
+  }
+
+  // Horrible hack (to be removed), check to make sure shift amounts are
+  // in-range.
+  if ((Opcode == ISD::SHL || Opcode == ISD::SRA || Opcode == ISD::SRL) &&
+      Imm >= VT.getSizeInBits())
+    return 0;
+
   // First check if immediate type is legal. If not, we can't use the ri form.
   unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm);
   if (ResultReg != 0)
     return ResultReg;
   unsigned MaterialReg = FastEmit_i(ImmType, ImmType, ISD::Constant, Imm);
-  if (MaterialReg == 0)
-    return 0;
-  return FastEmit_rr(VT, VT, Opcode,
-                     Op0, Op0IsKill,
-                     MaterialReg, /*Kill=*/true);
-}
-
-/// FastEmit_rf_ - This method is a wrapper of FastEmit_ri. It first tries
-/// to emit an instruction with a floating-point immediate operand using
-/// FastEmit_rf. If that fails, it materializes the immediate into a register
-/// and try FastEmit_rr instead.
-unsigned FastISel::FastEmit_rf_(MVT VT, unsigned Opcode,
-                                unsigned Op0, bool Op0IsKill,
-                                const ConstantFP *FPImm, MVT ImmType) {
-  // First check if immediate type is legal. If not, we can't use the rf form.
-  unsigned ResultReg = FastEmit_rf(VT, VT, Opcode, Op0, Op0IsKill, FPImm);
-  if (ResultReg != 0)
-    return ResultReg;
-
-  // Materialize the constant in a register.
-  unsigned MaterialReg = FastEmit_f(ImmType, ImmType, ISD::ConstantFP, FPImm);
   if (MaterialReg == 0) {
-    // If the target doesn't have a way to directly enter a floating-point
-    // value into a register, use an alternate approach.
-    // TODO: The current approach only supports floating-point constants
-    // that can be constructed by conversion from integer values. This should
-    // be replaced by code that creates a load from a constant-pool entry,
-    // which will require some target-specific work.
-    const APFloat &Flt = FPImm->getValueAPF();
-    EVT IntVT = TLI.getPointerTy();
-
-    uint64_t x[2];
-    uint32_t IntBitWidth = IntVT.getSizeInBits();
-    bool isExact;
-    (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true,
-                             APFloat::rmTowardZero, &isExact);
-    if (!isExact)
-      return 0;
-    APInt IntVal(IntBitWidth, 2, x);
-
-    unsigned IntegerReg = FastEmit_i(IntVT.getSimpleVT(), IntVT.getSimpleVT(),
-                                     ISD::Constant, IntVal.getZExtValue());
-    if (IntegerReg == 0)
-      return 0;
-    MaterialReg = FastEmit_r(IntVT.getSimpleVT(), VT,
-                             ISD::SINT_TO_FP, IntegerReg, /*Kill=*/true);
-    if (MaterialReg == 0)
-      return 0;
+    // This is a bit ugly/slow, but failing here means falling out of
+    // fast-isel, which would be very slow.
+    const IntegerType *ITy = IntegerType::get(FuncInfo.Fn->getContext(),
+                                              VT.getSizeInBits());
+    MaterialReg = getRegForValue(ConstantInt::get(ITy, Imm));
   }
   return FastEmit_rr(VT, VT, Opcode,
                      Op0, Op0IsKill,
@@ -1078,6 +1131,30 @@ unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
   return ResultReg;
 }
 
+unsigned FastISel::FastEmitInst_rrr(unsigned MachineInstOpcode,
+                                   const TargetRegisterClass *RC,
+                                   unsigned Op0, bool Op0IsKill,
+                                   unsigned Op1, bool Op1IsKill,
+                                   unsigned Op2, bool Op2IsKill) {
+  unsigned ResultReg = createResultReg(RC);
+  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+  if (II.getNumDefs() >= 1)
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+      .addReg(Op0, Op0IsKill * RegState::Kill)
+      .addReg(Op1, Op1IsKill * RegState::Kill)
+      .addReg(Op2, Op2IsKill * RegState::Kill);
+  else {
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+      .addReg(Op0, Op0IsKill * RegState::Kill)
+      .addReg(Op1, Op1IsKill * RegState::Kill)
+      .addReg(Op2, Op2IsKill * RegState::Kill);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+            ResultReg).addReg(II.ImplicitDefs[0]);
+  }
+  return ResultReg;
+}
+
 unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
                                    const TargetRegisterClass *RC,
                                    unsigned Op0, bool Op0IsKill,
@@ -1183,6 +1260,23 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode,
   return ResultReg;
 }
 
+unsigned FastISel::FastEmitInst_ii(unsigned MachineInstOpcode,
+                                  const TargetRegisterClass *RC,
+                                  uint64_t Imm1, uint64_t Imm2) {
+  unsigned ResultReg = createResultReg(RC);
+  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+  if (II.getNumDefs() >= 1)
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+      .addImm(Imm1).addImm(Imm2);
+  else {
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II).addImm(Imm1).addImm(Imm2);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+            ResultReg).addReg(II.ImplicitDefs[0]);
+  }
+  return ResultReg;
+}
+
 unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT,
                                               unsigned Op0, bool Op0IsKill,
                                               uint32_t Idx) {
@@ -1238,7 +1332,7 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
       // Only handle legal types. Two interesting things to note here. First,
       // by bailing out early, we may leave behind some dead instructions,
       // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its
-      // own moves. Second, this check is necessary becuase FastISel doesn't
+      // own moves. Second, this check is necessary because FastISel doesn't
       // use CreateRegs to create registers, so it always creates
       // exactly one register for each non-void instruction.
       EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true);
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index d8a5770..d518b5d 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -54,25 +54,6 @@ static bool isUsedOutsideOfDefiningBlock(const Instruction *I) {
   return false;
 }
 
-/// isOnlyUsedInEntryBlock - If the specified argument is only used in the
-/// entry block, return true.  This includes arguments used by switches, since
-/// the switch may expand into multiple basic blocks.
-static bool isOnlyUsedInEntryBlock(const Argument *A, bool EnableFastISel) {
-  // With FastISel active, we may be splitting blocks, so force creation
-  // of virtual registers for all non-dead arguments.
-  if (EnableFastISel)
-    return A->use_empty();
-
-  const BasicBlock *Entry = A->getParent()->begin();
-  for (Value::const_use_iterator UI = A->use_begin(), E = A->use_end();
-       UI != E; ++UI) {
-    const User *U = *UI;
-    if (cast<Instruction>(U)->getParent() != Entry || isa<SwitchInst>(U))
-      return false;  // Use not in entry block.
-  }
-  return true;
-}
-
 FunctionLoweringInfo::FunctionLoweringInfo(const TargetLowering &tli)
   : TLI(tli) {
 }
@@ -86,16 +67,10 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) {
   SmallVector<ISD::OutputArg, 4> Outs;
   GetReturnInfo(Fn->getReturnType(),
                 Fn->getAttributes().getRetAttributes(), Outs, TLI);
-  CanLowerReturn = TLI.CanLowerReturn(Fn->getCallingConv(), Fn->isVarArg(),
+  CanLowerReturn = TLI.CanLowerReturn(Fn->getCallingConv(), *MF,
+				      Fn->isVarArg(),
                                       Outs, Fn->getContext());
 
-  // Create a vreg for each argument register that is not dead and is used
-  // outside of the entry block for the function.
-  for (Function::const_arg_iterator AI = Fn->arg_begin(), E = Fn->arg_end();
-       AI != E; ++AI)
-    if (!isOnlyUsedInEntryBlock(AI, EnableFastISel))
-      InitializeRegForValue(AI);
-
   // Initialize the mapping of values to registers.  This is only set up for
   // instruction values that are used outside of the block that defines
   // them.
@@ -181,6 +156,10 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) {
          const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
       if (PN->use_empty()) continue;
 
+      // Skip empty types
+      if (PN->getType()->isEmptyTy())
+        continue;
+
       DebugLoc DL = PN->getDebugLoc();
       unsigned PHIReg = ValueMap[PN];
       assert(PHIReg && "PHI node does not have an assigned virtual register!");
@@ -343,7 +322,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
       APInt Zero(BitWidth, 0);
       DestLOI.KnownZero = Zero;
       DestLOI.KnownOne = Zero;
-      return;      
+      return;
     }
 
     if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
@@ -375,18 +354,18 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
 /// setByValArgumentFrameIndex - Record frame index for the byval
 /// argument. This overrides previous frame index entry for this argument,
 /// if any.
-void FunctionLoweringInfo::setByValArgumentFrameIndex(const Argument *A, 
+void FunctionLoweringInfo::setByValArgumentFrameIndex(const Argument *A,
                                                       int FI) {
   assert (A->hasByValAttr() && "Argument does not have byval attribute!");
   ByValArgFrameIndexMap[A] = FI;
 }
-  
+
 /// getByValArgumentFrameIndex - Get frame index for the byval argument.
 /// If the argument does not have any assigned frame index then 0 is
 /// returned.
 int FunctionLoweringInfo::getByValArgumentFrameIndex(const Argument *A) {
   assert (A->hasByValAttr() && "Argument does not have byval attribute!");
-  DenseMap<const Argument *, int>::iterator I = 
+  DenseMap<const Argument *, int>::iterator I =
     ByValArgFrameIndexMap.find(A);
   if (I != ByValArgFrameIndexMap.end())
     return I->second;
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index e309def..2a65d65 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -76,6 +76,12 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
   // the CopyToReg'd destination register instead of creating a new vreg.
   bool MatchReg = true;
   const TargetRegisterClass *UseRC = NULL;
+  EVT VT = Node->getValueType(ResNo);
+
+  // Stick to the preferred register classes for legal types.
+  if (TLI->isTypeLegal(VT))
+    UseRC = TLI->getRegClassFor(VT);
+
   if (!IsClone && !IsCloned)
     for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
          UI != E; ++UI) {
@@ -121,10 +127,9 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
         break;
     }
 
-  EVT VT = Node->getValueType(ResNo);
   const TargetRegisterClass *SrcRC = 0, *DstRC = 0;
   SrcRC = TRI->getMinimalPhysRegClass(SrcReg, VT);
-  
+
   // Figure out the register class to create for the destreg.
   if (VRBase) {
     DstRC = MRI->getRegClass(VRBase);
@@ -283,7 +288,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
       DstRC = II->OpInfo[IIOpNum].getRegClass(TRI);
     assert((DstRC || (TID.isVariadic() && IIOpNum >= TID.getNumOperands())) &&
            "Don't have operand info for this instruction!");
-    if (DstRC && SrcRC != DstRC && !SrcRC->hasSuperClass(DstRC)) {
+    if (DstRC && !SrcRC->hasSuperClassEq(DstRC)) {
       unsigned NewVReg = MRI->createVirtualRegister(DstRC);
       BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(),
               TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg);
@@ -543,17 +548,18 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
 void InstrEmitter::EmitRegSequence(SDNode *Node,
                                   DenseMap<SDValue, unsigned> &VRBaseMap,
                                   bool IsClone, bool IsCloned) {
-  const TargetRegisterClass *RC = TLI->getRegClassFor(Node->getValueType(0));
+  unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
+  const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
   unsigned NewVReg = MRI->createVirtualRegister(RC);
   MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
                              TII->get(TargetOpcode::REG_SEQUENCE), NewVReg);
   unsigned NumOps = Node->getNumOperands();
-  assert((NumOps & 1) == 0 &&
-         "REG_SEQUENCE must have an even number of operands!");
+  assert((NumOps & 1) == 1 &&
+         "REG_SEQUENCE must have an odd number of operands!");
   const TargetInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE);
-  for (unsigned i = 0; i != NumOps; ++i) {
+  for (unsigned i = 1; i != NumOps; ++i) {
     SDValue Op = Node->getOperand(i);
-    if (i & 1) {
+    if ((i & 1) == 0) {
       unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue();
       unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap);
       const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index b837261..4b6d3ef 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -14,23 +14,16 @@
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
 #include "llvm/CallingConv.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
 #include "llvm/LLVMContext.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
@@ -57,19 +50,13 @@ class SelectionDAGLegalize {
   const TargetMachine &TM;
   const TargetLowering &TLI;
   SelectionDAG &DAG;
-  CodeGenOpt::Level OptLevel;
 
   // Libcall insertion helpers.
 
-  /// LastCALLSEQ_END - This keeps track of the CALLSEQ_END node that has been
+  /// LastCALLSEQ - This keeps track of the CALLSEQ_END node that has been
   /// legalized.  We use this to ensure that calls are properly serialized
   /// against each other, including inserted libcalls.
-  SDValue LastCALLSEQ_END;
-
-  /// IsLegalizingCall - This member is used *only* for purposes of providing
-  /// helpful assertions that a libcall isn't created while another call is
-  /// being legalized (which could lead to non-serialized call sequences).
-  bool IsLegalizingCall;
+  SmallVector<SDValue, 8> LastCALLSEQ;
 
   enum LegalizeAction {
     Legal,      // The target natively supports this operation.
@@ -98,13 +85,13 @@ class SelectionDAGLegalize {
   }
 
 public:
-  SelectionDAGLegalize(SelectionDAG &DAG, CodeGenOpt::Level ol);
+  explicit SelectionDAGLegalize(SelectionDAG &DAG);
 
   /// getTypeAction - Return how we should legalize values of this type, either
   /// it is already legal or we need to expand it into multiple registers of
   /// smaller integer type, or we need to promote it to a larger type.
   LegalizeAction getTypeAction(EVT VT) const {
-    return (LegalizeAction)ValueTypeActions.getTypeAction(VT);
+    return (LegalizeAction)TLI.getTypeAction(*DAG.getContext(), VT);
   }
 
   /// isTypeLegal - Return true if this type is legal on this target.
@@ -147,6 +134,9 @@ private:
                              DebugLoc dl);
 
   SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
+  SDValue ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops,
+                        unsigned NumOps, bool isSigned, DebugLoc dl);
+
   std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
                                                  SDNode *Node, bool isSigned);
   SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32,
@@ -158,7 +148,7 @@ private:
                            RTLIB::Libcall Call_I32,
                            RTLIB::Libcall Call_I64,
                            RTLIB::Libcall Call_I128);
-  SDValue ExpandDivRemLibCall(SDNode *Node, bool isSigned, bool isDIV);
+  void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
 
   SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, DebugLoc dl);
   SDValue ExpandBUILD_VECTOR(SDNode *Node);
@@ -184,6 +174,15 @@ private:
 
   void ExpandNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);
   void PromoteNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+
+  SDValue getLastCALLSEQ() { return LastCALLSEQ.back();  }
+  void setLastCALLSEQ(const SDValue s) { LastCALLSEQ.back() = s; }
+  void pushLastCALLSEQ(SDValue s) {
+    LastCALLSEQ.push_back(s);
+  }
+  void popLastCALLSEQ() {
+    LastCALLSEQ.pop_back();
+  }
 };
 }
 
@@ -219,18 +218,16 @@ SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT,  DebugLoc dl,
   return DAG.getVectorShuffle(NVT, dl, N1, N2, &NewMask[0]);
 }
 
-SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag,
-                                           CodeGenOpt::Level ol)
+SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag)
   : TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()),
-    DAG(dag), OptLevel(ol),
+    DAG(dag),
     ValueTypeActions(TLI.getValueTypeActions()) {
   assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE &&
          "Too many value types for ValueTypeActions to hold!");
 }
 
 void SelectionDAGLegalize::LegalizeDAG() {
-  LastCALLSEQ_END = DAG.getEntryNode();
-  IsLegalizingCall = false;
+  pushLastCALLSEQ(DAG.getEntryNode());
 
   // The legalize process is inherently a bottom-up recursive process (users
   // legalize their uses before themselves).  Given infinite stack space, we
@@ -258,14 +255,15 @@ void SelectionDAGLegalize::LegalizeDAG() {
 /// FindCallEndFromCallStart - Given a chained node that is part of a call
 /// sequence, find the CALLSEQ_END node that terminates the call sequence.
 static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) {
-  // Nested CALLSEQ_START/END constructs aren't yet legal,
-  // but we can DTRT and handle them correctly here.
+  int next_depth = depth;
   if (Node->getOpcode() == ISD::CALLSEQ_START)
-    depth++;
-  else if (Node->getOpcode() == ISD::CALLSEQ_END) {
-    depth--;
-    if (depth == 0)
+    next_depth = depth + 1;
+  if (Node->getOpcode() == ISD::CALLSEQ_END) {
+    assert(depth > 0 && "negative depth!");
+    if (depth == 1)
       return Node;
+    else
+      next_depth = depth - 1;
   }
   if (Node->use_empty())
     return 0;   // No CallSeqEnd
@@ -296,7 +294,7 @@ static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) {
     SDNode *User = *UI;
     for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i)
       if (User->getOperand(i) == TheChain)
-        if (SDNode *Result = FindCallEndFromCallStart(User, depth))
+        if (SDNode *Result = FindCallEndFromCallStart(User, next_depth))
           return Result;
   }
   return 0;
@@ -317,6 +315,7 @@ static SDNode *FindCallStartFromCallEnd(SDNode *Node) {
     case ISD::CALLSEQ_START:
       if (!nested)
         return Node;
+      Node = Node->getOperand(0).getNode();
       nested--;
       break;
     case ISD::CALLSEQ_END:
@@ -324,7 +323,7 @@ static SDNode *FindCallStartFromCallEnd(SDNode *Node) {
       break;
     }
   }
-  return 0;
+  return (Node->getOpcode() == ISD::CALLSEQ_START) ? Node : 0;
 }
 
 /// LegalizeAllNodesNotLeadingTo - Recursively walk the uses of N, looking to
@@ -433,68 +432,67 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
       SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
       return DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
                           ST->isVolatile(), ST->isNonTemporal(), Alignment);
-    } else {
-      // Do a (aligned) store to a stack slot, then copy from the stack slot
-      // to the final destination using (unaligned) integer loads and stores.
-      EVT StoredVT = ST->getMemoryVT();
-      EVT RegVT =
-        TLI.getRegisterType(*DAG.getContext(),
-                            EVT::getIntegerVT(*DAG.getContext(),
-                                              StoredVT.getSizeInBits()));
-      unsigned StoredBytes = StoredVT.getSizeInBits() / 8;
-      unsigned RegBytes = RegVT.getSizeInBits() / 8;
-      unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
-
-      // Make sure the stack slot is also aligned for the register type.
-      SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT);
-
-      // Perform the original store, only redirected to the stack slot.
-      SDValue Store = DAG.getTruncStore(Chain, dl,
-                                        Val, StackPtr, MachinePointerInfo(),
-                                        StoredVT, false, false, 0);
-      SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
-      SmallVector<SDValue, 8> Stores;
-      unsigned Offset = 0;
-
-      // Do all but one copies using the full register width.
-      for (unsigned i = 1; i < NumRegs; i++) {
-        // Load one integer register's worth from the stack slot.
-        SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr,
-                                   MachinePointerInfo(),
-                                   false, false, 0);
-        // Store it to the final location.  Remember the store.
-        Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
-                                    ST->getPointerInfo().getWithOffset(Offset),
-                                      ST->isVolatile(), ST->isNonTemporal(),
-                                      MinAlign(ST->getAlignment(), Offset)));
-        // Increment the pointers.
-        Offset += RegBytes;
-        StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
-                               Increment);
-        Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
-      }
+    }
+    // Do a (aligned) store to a stack slot, then copy from the stack slot
+    // to the final destination using (unaligned) integer loads and stores.
+    EVT StoredVT = ST->getMemoryVT();
+    EVT RegVT =
+      TLI.getRegisterType(*DAG.getContext(),
+                          EVT::getIntegerVT(*DAG.getContext(),
+                                            StoredVT.getSizeInBits()));
+    unsigned StoredBytes = StoredVT.getSizeInBits() / 8;
+    unsigned RegBytes = RegVT.getSizeInBits() / 8;
+    unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
 
-      // The last store may be partial.  Do a truncating store.  On big-endian
-      // machines this requires an extending load from the stack slot to ensure
-      // that the bits are in the right place.
-      EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
-                                    8 * (StoredBytes - Offset));
+    // Make sure the stack slot is also aligned for the register type.
+    SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT);
 
-      // Load from the stack slot.
-      SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
-                                    MachinePointerInfo(),
-                                    MemVT, false, false, 0);
+    // Perform the original store, only redirected to the stack slot.
+    SDValue Store = DAG.getTruncStore(Chain, dl,
+                                      Val, StackPtr, MachinePointerInfo(),
+                                      StoredVT, false, false, 0);
+    SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
+    SmallVector<SDValue, 8> Stores;
+    unsigned Offset = 0;
 
-      Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
-                                         ST->getPointerInfo()
-                                           .getWithOffset(Offset),
-                                         MemVT, ST->isVolatile(),
-                                         ST->isNonTemporal(),
-                                         MinAlign(ST->getAlignment(), Offset)));
-      // The order of the stores doesn't matter - say it with a TokenFactor.
-      return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
-                         Stores.size());
+    // Do all but one copies using the full register width.
+    for (unsigned i = 1; i < NumRegs; i++) {
+      // Load one integer register's worth from the stack slot.
+      SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr,
+                                 MachinePointerInfo(),
+                                 false, false, 0);
+      // Store it to the final location.  Remember the store.
+      Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
+                                  ST->getPointerInfo().getWithOffset(Offset),
+                                    ST->isVolatile(), ST->isNonTemporal(),
+                                    MinAlign(ST->getAlignment(), Offset)));
+      // Increment the pointers.
+      Offset += RegBytes;
+      StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+                             Increment);
+      Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
     }
+
+    // The last store may be partial.  Do a truncating store.  On big-endian
+    // machines this requires an extending load from the stack slot to ensure
+    // that the bits are in the right place.
+    EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
+                                  8 * (StoredBytes - Offset));
+
+    // Load from the stack slot.
+    SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
+                                  MachinePointerInfo(),
+                                  MemVT, false, false, 0);
+
+    Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
+                                       ST->getPointerInfo()
+                                         .getWithOffset(Offset),
+                                       MemVT, ST->isVolatile(),
+                                       ST->isNonTemporal(),
+                                       MinAlign(ST->getAlignment(), Offset)));
+    // The order of the stores doesn't matter - say it with a TokenFactor.
+    return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
+                       Stores.size());
   }
   assert(ST->getMemoryVT().isInteger() &&
          !ST->getMemoryVT().isVector() &&
@@ -941,11 +939,12 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
     case ISD::BR_JT:
     case ISD::BR_CC:
     case ISD::BRCOND:
-      // Branches tweak the chain to include LastCALLSEQ_END
+      assert(LastCALLSEQ.size() == 1 && "branch inside CALLSEQ_BEGIN/END?");
+      // Branches tweak the chain to include LastCALLSEQ
       Ops[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ops[0],
-                            LastCALLSEQ_END);
+                           getLastCALLSEQ());
       Ops[0] = LegalizeOp(Ops[0]);
-      LastCALLSEQ_END = DAG.getEntryNode();
+      setLastCALLSEQ(DAG.getEntryNode());
       break;
     case ISD::SHL:
     case ISD::SRL:
@@ -1034,6 +1033,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
     break;
   case ISD::CALLSEQ_START: {
     SDNode *CallEnd = FindCallEndFromCallStart(Node);
+    assert(CallEnd && "didn't find CALLSEQ_END!");
 
     // Recursively Legalize all of the inputs of the call end that do not lead
     // to this call start.  This ensures that any libcalls that need be inserted
@@ -1050,9 +1050,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
 
     // Merge in the last call to ensure that this call starts after the last
     // call ended.
-    if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) {
+    if (getLastCALLSEQ().getOpcode() != ISD::EntryToken) {
       Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-                         Tmp1, LastCALLSEQ_END);
+                         Tmp1, getLastCALLSEQ());
       Tmp1 = LegalizeOp(Tmp1);
     }
 
@@ -1073,25 +1073,29 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
     // sequence have been legalized, legalize the call itself.  During this
     // process, no libcalls can/will be inserted, guaranteeing that no calls
     // can overlap.
-    assert(!IsLegalizingCall && "Inconsistent sequentialization of calls!");
     // Note that we are selecting this call!
-    LastCALLSEQ_END = SDValue(CallEnd, 0);
-    IsLegalizingCall = true;
+    setLastCALLSEQ(SDValue(CallEnd, 0));
 
     // Legalize the call, starting from the CALLSEQ_END.
-    LegalizeOp(LastCALLSEQ_END);
-    assert(!IsLegalizingCall && "CALLSEQ_END should have cleared this!");
+    LegalizeOp(getLastCALLSEQ());
     return Result;
   }
   case ISD::CALLSEQ_END:
-    // If the CALLSEQ_START node hasn't been legalized first, legalize it.  This
-    // will cause this node to be legalized as well as handling libcalls right.
-    if (LastCALLSEQ_END.getNode() != Node) {
-      LegalizeOp(SDValue(FindCallStartFromCallEnd(Node), 0));
-      DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
-      assert(I != LegalizedNodes.end() &&
-             "Legalizing the call start should have legalized this node!");
-      return I->second;
+    {
+      SDNode *myCALLSEQ_BEGIN = FindCallStartFromCallEnd(Node);
+
+      // If the CALLSEQ_START node hasn't been legalized first, legalize it.
+      // This will cause this node to be legalized as well as handling libcalls
+      // right.
+      if (getLastCALLSEQ().getNode() != Node) {
+        LegalizeOp(SDValue(myCALLSEQ_BEGIN, 0));
+        DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
+        assert(I != LegalizedNodes.end() &&
+               "Legalizing the call start should have legalized this node!");
+        return I->second;
+      }
+
+      pushLastCALLSEQ(SDValue(myCALLSEQ_BEGIN, 0));
     }
 
     // Otherwise, the call start has been legalized and everything is going
@@ -1119,9 +1123,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
                          Result.getResNo());
       }
     }
-    assert(IsLegalizingCall && "Call sequence imbalance between start/end?");
     // This finishes up call legalization.
-    IsLegalizingCall = false;
+    popLastCALLSEQ();
 
     // If the CALLSEQ_END node has a flag, remember that we legalized it.
     AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0));
@@ -1371,6 +1374,91 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
           Tmp2 = LegalizeOp(Load.getValue(1));
           break;
         }
+
+        // If this is a promoted vector load, and the vector element types are
+        // legal, then scalarize it.
+        if (ExtType == ISD::EXTLOAD && SrcVT.isVector() &&
+          isTypeLegal(Node->getValueType(0).getScalarType())) {
+          SmallVector<SDValue, 8> LoadVals;
+          SmallVector<SDValue, 8> LoadChains;
+          unsigned NumElem = SrcVT.getVectorNumElements();
+          unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
+
+          for (unsigned Idx=0; Idx<NumElem; Idx++) {
+            Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                                DAG.getIntPtrConstant(Stride));
+            SDValue ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl,
+                  Node->getValueType(0).getScalarType(),
+                  Tmp1, Tmp2, LD->getPointerInfo().getWithOffset(Idx * Stride),
+                  SrcVT.getScalarType(),
+                  LD->isVolatile(), LD->isNonTemporal(),
+                  LD->getAlignment());
+
+            LoadVals.push_back(ScalarLoad.getValue(0));
+            LoadChains.push_back(ScalarLoad.getValue(1));
+          }
+          Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+            &LoadChains[0], LoadChains.size());
+          SDValue ValRes = DAG.getNode(ISD::BUILD_VECTOR, dl,
+            Node->getValueType(0), &LoadVals[0], LoadVals.size());
+
+          Tmp1 = LegalizeOp(ValRes);  // Relegalize new nodes.
+          Tmp2 = LegalizeOp(Result.getValue(0));  // Relegalize new nodes.
+          break;
+        }
+
+        // If this is a promoted vector load, and the vector element types are
+        // illegal, create the promoted vector from bitcasted segments.
+        if (ExtType == ISD::EXTLOAD && SrcVT.isVector()) {
+          EVT MemElemTy = Node->getValueType(0).getScalarType();
+          EVT SrcSclrTy = SrcVT.getScalarType();
+          unsigned SizeRatio =
+            (MemElemTy.getSizeInBits() / SrcSclrTy.getSizeInBits());
+
+          SmallVector<SDValue, 8> LoadVals;
+          SmallVector<SDValue, 8> LoadChains;
+          unsigned NumElem = SrcVT.getVectorNumElements();
+          unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
+
+          for (unsigned Idx=0; Idx<NumElem; Idx++) {
+            Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                                DAG.getIntPtrConstant(Stride));
+            SDValue ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl,
+                  SrcVT.getScalarType(),
+                  Tmp1, Tmp2, LD->getPointerInfo().getWithOffset(Idx * Stride),
+                  SrcVT.getScalarType(),
+                  LD->isVolatile(), LD->isNonTemporal(),
+                  LD->getAlignment());
+            if (TLI.isBigEndian()) {
+              // MSB (which is garbage, comes first)
+              LoadVals.push_back(ScalarLoad.getValue(0));
+              for (unsigned i = 0; i<SizeRatio-1; ++i)
+                LoadVals.push_back(DAG.getUNDEF(SrcVT.getScalarType()));
+            } else {
+              // LSB (which is data, comes first)
+              for (unsigned i = 0; i<SizeRatio-1; ++i)
+                LoadVals.push_back(DAG.getUNDEF(SrcVT.getScalarType()));
+              LoadVals.push_back(ScalarLoad.getValue(0));
+            }
+            LoadChains.push_back(ScalarLoad.getValue(1));
+          }
+
+          Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+            &LoadChains[0], LoadChains.size());
+          EVT TempWideVector = EVT::getVectorVT(*DAG.getContext(),
+            SrcVT.getScalarType(), NumElem*SizeRatio);
+          SDValue ValRes = DAG.getNode(ISD::BUILD_VECTOR, dl, 
+            TempWideVector, &LoadVals[0], LoadVals.size());
+
+          // Cast to the correct type
+          ValRes = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), ValRes);
+
+          Tmp1 = LegalizeOp(ValRes);  // Relegalize new nodes.
+          Tmp2 = LegalizeOp(Result.getValue(0));  // Relegalize new nodes.
+          break;
+
+        }
+
         // FIXME: This does not work for vectors on most targets.  Sign- and
         // zero-extend operations are currently folded into extending loads,
         // whether they are legal or not, and then we end up here without any
@@ -1546,6 +1634,88 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
           Result = TLI.LowerOperation(Result, DAG);
           break;
         case Expand:
+
+          EVT WideScalarVT = Tmp3.getValueType().getScalarType();
+          EVT NarrowScalarVT = StVT.getScalarType();
+
+          // The Store type is illegal, must scalarize the vector store.
+          SmallVector<SDValue, 8> Stores;
+          bool ScalarLegal = isTypeLegal(WideScalarVT);
+          if (!isTypeLegal(StVT) && StVT.isVector() && ScalarLegal) {
+            unsigned NumElem = StVT.getVectorNumElements();
+
+            unsigned ScalarSize = StVT.getScalarType().getSizeInBits();
+            // Round odd types to the next pow of two.
+            if (!isPowerOf2_32(ScalarSize))
+              ScalarSize = NextPowerOf2(ScalarSize);
+            // Types smaller than 8 bits are promoted to 8 bits.
+            ScalarSize = std::max<unsigned>(ScalarSize, 8);
+            // Store stride
+            unsigned Stride = ScalarSize/8;
+            assert(isPowerOf2_32(Stride) && "Stride must be a power of two");
+
+            for (unsigned Idx=0; Idx<NumElem; Idx++) {
+              SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+                                       WideScalarVT, Tmp3, DAG.getIntPtrConstant(Idx));
+
+
+              EVT NVT = EVT::getIntegerVT(*DAG.getContext(), ScalarSize);
+
+              Ex = DAG.getNode(ISD::TRUNCATE, dl, NVT, Ex);
+              Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                                 DAG.getIntPtrConstant(Stride));
+              SDValue Store = DAG.getStore(Tmp1, dl, Ex, Tmp2,
+                                           ST->getPointerInfo().getWithOffset(Idx*Stride),
+                                           isVolatile, isNonTemporal, Alignment);
+              Stores.push_back(Store);
+            }
+            Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                                 &Stores[0], Stores.size());
+            break;
+          }
+
+          // The Store type is illegal, must scalarize the vector store.
+          // However, the scalar type is illegal. Must bitcast the result
+          // and store it in smaller parts.
+          if (!isTypeLegal(StVT) && StVT.isVector()) {
+            unsigned WideNumElem = StVT.getVectorNumElements();
+            unsigned Stride = NarrowScalarVT.getSizeInBits()/8;
+
+            unsigned SizeRatio =
+              (WideScalarVT.getSizeInBits() / NarrowScalarVT.getSizeInBits());
+
+            EVT CastValueVT = EVT::getVectorVT(*DAG.getContext(), NarrowScalarVT,
+                                               SizeRatio*WideNumElem);
+
+            // Cast the wide elem vector to wider vec with smaller elem type.
+            // Example <2 x i64> -> <4 x i32>
+            Tmp3 = DAG.getNode(ISD::BITCAST, dl, CastValueVT, Tmp3);
+
+            for (unsigned Idx=0; Idx<WideNumElem*SizeRatio; Idx++) {
+              // Extract elment i
+              SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+                                       NarrowScalarVT, Tmp3, DAG.getIntPtrConstant(Idx));
+              // bump pointer.
+              Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                                 DAG.getIntPtrConstant(Stride));
+
+              // Store if, this element is:
+              //  - First element on big endian, or
+              //  - Last element on little endian
+              if (( TLI.isBigEndian() && (Idx%SizeRatio == 0)) ||
+                  ((!TLI.isBigEndian() && (Idx%SizeRatio == SizeRatio-1)))) {
+                SDValue Store = DAG.getStore(Tmp1, dl, Ex, Tmp2,
+                                             ST->getPointerInfo().getWithOffset(Idx*Stride),
+                                             isVolatile, isNonTemporal, Alignment);
+                Stores.push_back(Store);
+              }
+            }
+            Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                                 &Stores[0], Stores.size());
+            break;
+          }
+
+
           // TRUNCSTORE:i16 i32 -> STORE i16
           assert(isTypeLegal(StVT) && "Do not know how to expand this store!");
           Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3);
@@ -2007,7 +2177,6 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
 // and leave the Hi part unset.
 SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
                                             bool isSigned) {
-  assert(!IsLegalizingCall && "Cannot overlap legalization of calls!");
   // The input chain to this libcall is the entry node of the function.
   // Legalizing the call will automatically add the previous call to the
   // dependence.
@@ -2043,9 +2212,43 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
     return DAG.getRoot();
 
   // Legalize the call sequence, starting with the chain.  This will advance
+  // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that
+  // was added by LowerCallTo (guaranteeing proper serialization of calls).
+  LegalizeOp(CallInfo.second);
+  return CallInfo.first;
+}
+
+/// ExpandLibCall - Generate a libcall taking the given operands as arguments
+/// and returning a result of type RetVT.
+SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT,
+                                            const SDValue *Ops, unsigned NumOps,
+                                            bool isSigned, DebugLoc dl) {
+  TargetLowering::ArgListTy Args;
+  Args.reserve(NumOps);
+
+  TargetLowering::ArgListEntry Entry;
+  for (unsigned i = 0; i != NumOps; ++i) {
+    Entry.Node = Ops[i];
+    Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
+    Entry.isSExt = isSigned;
+    Entry.isZExt = !isSigned;
+    Args.push_back(Entry);
+  }
+  SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+                                         TLI.getPointerTy());
+
+  const Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+  std::pair<SDValue,SDValue> CallInfo =
+  TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
+                  false, 0, TLI.getLibcallCallingConv(LC), false,
+                  /*isReturnValueUsed=*/true,
+                  Callee, Args, DAG, dl);
+
+  // Legalize the call sequence, starting with the chain.  This will advance
   // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
   // was added by LowerCallTo (guaranteeing proper serialization of calls).
   LegalizeOp(CallInfo.second);
+
   return CallInfo.first;
 }
 
@@ -2055,7 +2258,6 @@ std::pair<SDValue, SDValue>
 SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
                                          SDNode *Node,
                                          bool isSigned) {
-  assert(!IsLegalizingCall && "Cannot overlap legalization of calls!");
   SDValue InChain = Node->getOperand(0);
 
   TargetLowering::ArgListTy Args;
@@ -2081,7 +2283,7 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
                     Callee, Args, DAG, Node->getDebugLoc());
 
   // Legalize the call sequence, starting with the chain.  This will advance
-  // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
+  // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that
   // was added by LowerCallTo (guaranteeing proper serialization of calls).
   LegalizeOp(CallInfo.second);
   return CallInfo;
@@ -2121,10 +2323,9 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
   return ExpandLibCall(LC, Node, isSigned);
 }
 
-/// ExpandDivRemLibCall - Issue libcalls to __{u}divmod to compute div / rem
-/// pairs.
-SDValue SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, bool isSigned,
-                                                  bool isDIV) {
+/// isDivRemLibcallAvailable - Return true if divmod libcall is available.
+static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
+                                     const TargetLowering &TLI) {
   RTLIB::Libcall LC;
   switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
   default: assert(0 && "Unexpected request for libcall!");
@@ -2135,17 +2336,18 @@ SDValue SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, bool isSigned,
   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
   }
 
-  if (!TLI.getLibcallName(LC))
-    return SDValue();
+  return TLI.getLibcallName(LC) != 0;
+}
 
-  // Only issue divrem libcall if both quotient and remainder are needed.
+/// UseDivRem - Only issue divrem libcall if both quotient and remainder are
+/// needed.
+static bool UseDivRem(SDNode *Node, bool isSigned, bool isDIV) {
   unsigned OtherOpcode = 0;
-  if (isSigned) {
+  if (isSigned)
     OtherOpcode = isDIV ? ISD::SREM : ISD::SDIV;
-  } else {
+  else
     OtherOpcode = isDIV ? ISD::UREM : ISD::UDIV;
-  }
-  SDNode *OtherNode = 0;
+
   SDValue Op0 = Node->getOperand(0);
   SDValue Op1 = Node->getOperand(1);
   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
@@ -2155,32 +2357,28 @@ SDValue SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, bool isSigned,
       continue;
     if (User->getOpcode() == OtherOpcode &&
         User->getOperand(0) == Op0 &&
-        User->getOperand(1) == Op1) {
-      OtherNode = User;
-      break;
-    }
+        User->getOperand(1) == Op1)
+      return true;
   }
-  if (!OtherNode)
-    return SDValue();
+  return false;
+}
 
-  // If the libcall is already generated, no need to issue it again.
-  DenseMap<SDValue, SDValue>::iterator I
-    = LegalizedNodes.find(SDValue(OtherNode,0));
-  if (I != LegalizedNodes.end()) {
-    OtherNode = I->second.getNode();
-    SDNode *Chain = OtherNode->getOperand(0).getNode();
-    for (SDNode::use_iterator UI = Chain->use_begin(), UE = Chain->use_end();
-         UI != UE; ++UI) {
-      SDNode *User = *UI;
-      if (User == OtherNode)
-        continue;
-      if (isDIV) {
-        assert(User->getOpcode() == ISD::CopyFromReg);
-      } else {
-        assert(User->getOpcode() == ISD::LOAD);
-      }
-      return SDValue(User, 0);
-    }
+/// ExpandDivRemLibCall - Issue libcalls to __{u}divmod to compute div / rem
+/// pairs.
+void
+SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
+                                          SmallVectorImpl<SDValue> &Results) {
+  unsigned Opcode = Node->getOpcode();
+  bool isSigned = Opcode == ISD::SDIVREM;
+
+  RTLIB::Libcall LC;
+  switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+  default: assert(0 && "Unexpected request for libcall!");
+  case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
+  case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
+  case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
+  case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
+  case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
   }
 
   // The input chain to this libcall is the entry node of the function.
@@ -2221,14 +2419,15 @@ SDValue SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, bool isSigned,
                     /*isReturnValueUsed=*/true, Callee, Args, DAG, dl);
 
   // Legalize the call sequence, starting with the chain.  This will advance
-  // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
+  // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that
   // was added by LowerCallTo (guaranteeing proper serialization of calls).
   LegalizeOp(CallInfo.second);
 
   // Remainder is loaded back from the stack frame.
-  SDValue Rem = DAG.getLoad(RetVT, dl, LastCALLSEQ_END, FIPtr,
+  SDValue Rem = DAG.getLoad(RetVT, dl, getLastCALLSEQ(), FIPtr,
                             MachinePointerInfo(), false, false, 0);
-  return isDIV ? CallInfo.first : Rem;
+  Results.push_back(CallInfo.first);
+  Results.push_back(Rem);
 }
 
 /// ExpandLegalINT_TO_FP - This function is responsible for legalizing a
@@ -2878,7 +3077,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
   }
   case ISD::FP_ROUND_INREG: {
     // The only way we can lower this is to turn it into a TRUNCSTORE,
-    // EXTLOAD pair, targetting a temporary location (a stack slot).
+    // EXTLOAD pair, targeting a temporary location (a stack slot).
 
     // NOTE: there is a choice here between constantly creating new stack
     // slots and always reusing the same one.  We currently always create
@@ -3204,28 +3403,25 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
     Tmp2 = Node->getOperand(0);
     Tmp3 = Node->getOperand(1);
-    if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) {
+    if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) ||
+        (isDivRemLibcallAvailable(Node, isSigned, TLI) &&
+         UseDivRem(Node, isSigned, false))) {
       Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1);
     } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) {
       // X % Y -> X-X/Y*Y
       Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3);
       Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3);
       Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1);
-    } else if (isSigned) {
-      Tmp1 = ExpandDivRemLibCall(Node, true, false);
-      if (!Tmp1.getNode())
-        Tmp1 = ExpandIntLibCall(Node, true,
-                                RTLIB::SREM_I8,
-                                RTLIB::SREM_I16, RTLIB::SREM_I32,
-                                RTLIB::SREM_I64, RTLIB::SREM_I128);
-    } else {
-      Tmp1 = ExpandDivRemLibCall(Node, false, false);
-      if (!Tmp1.getNode())
-        Tmp1 = ExpandIntLibCall(Node, false,
-                                RTLIB::UREM_I8,
-                                RTLIB::UREM_I16, RTLIB::UREM_I32,
-                                RTLIB::UREM_I64, RTLIB::UREM_I128);
-    }
+    } else if (isSigned)
+      Tmp1 = ExpandIntLibCall(Node, true,
+                              RTLIB::SREM_I8,
+                              RTLIB::SREM_I16, RTLIB::SREM_I32,
+                              RTLIB::SREM_I64, RTLIB::SREM_I128);
+    else
+      Tmp1 = ExpandIntLibCall(Node, false,
+                              RTLIB::UREM_I8,
+                              RTLIB::UREM_I16, RTLIB::UREM_I32,
+                              RTLIB::UREM_I64, RTLIB::UREM_I128);
     Results.push_back(Tmp1);
     break;
   }
@@ -3235,26 +3431,21 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
     EVT VT = Node->getValueType(0);
     SDVTList VTs = DAG.getVTList(VT, VT);
-    if (TLI.isOperationLegalOrCustom(DivRemOpc, VT))
+    if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) ||
+        (isDivRemLibcallAvailable(Node, isSigned, TLI) &&
+         UseDivRem(Node, isSigned, true)))
       Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0),
                          Node->getOperand(1));
-    else if (isSigned) {
-      Tmp1 = ExpandDivRemLibCall(Node, true, true);
-      if (!Tmp1.getNode()) {
-        Tmp1 = ExpandIntLibCall(Node, true,
-                                RTLIB::SDIV_I8,
-                                RTLIB::SDIV_I16, RTLIB::SDIV_I32,
-                                RTLIB::SDIV_I64, RTLIB::SDIV_I128);
-      }
-    } else {
-      Tmp1 = ExpandDivRemLibCall(Node, false, true);
-      if (!Tmp1.getNode()) {
-        Tmp1 = ExpandIntLibCall(Node, false,
-                                RTLIB::UDIV_I8,
-                                RTLIB::UDIV_I16, RTLIB::UDIV_I32,
-                                RTLIB::UDIV_I64, RTLIB::UDIV_I128);
-      }
-    }
+    else if (isSigned)
+      Tmp1 = ExpandIntLibCall(Node, true,
+                              RTLIB::SDIV_I8,
+                              RTLIB::SDIV_I16, RTLIB::SDIV_I32,
+                              RTLIB::SDIV_I64, RTLIB::SDIV_I128);
+    else
+      Tmp1 = ExpandIntLibCall(Node, false,
+                              RTLIB::UDIV_I8,
+                              RTLIB::UDIV_I16, RTLIB::UDIV_I32,
+                              RTLIB::UDIV_I64, RTLIB::UDIV_I128);
     Results.push_back(Tmp1);
     break;
   }
@@ -3271,6 +3462,11 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     Results.push_back(Tmp1.getValue(1));
     break;
   }
+  case ISD::SDIVREM:
+  case ISD::UDIVREM:
+    // Expand into divrem libcall
+    ExpandDivRemLibCall(Node, Results);
+    break;
   case ISD::MUL: {
     EVT VT = Node->getValueType(0);
     SDVTList VTs = DAG.getVTList(VT, VT);
@@ -3355,6 +3551,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
   case ISD::UMULO:
   case ISD::SMULO: {
     EVT VT = Node->getValueType(0);
+    EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
     SDValue LHS = Node->getOperand(0);
     SDValue RHS = Node->getOperand(1);
     SDValue BottomHalf;
@@ -3372,7 +3569,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
       TopHalf = BottomHalf.getValue(1);
     } else if (TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(),
                                                  VT.getSizeInBits() * 2))) {
-      EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
       LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
       RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
       Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
@@ -3385,7 +3581,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
       // have a libcall big enough.
       // Also, we can fall back to a division in some cases, but that's a big
       // performance hit in the general case.
-      EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
       RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
       if (WideVT == MVT::i16)
         LC = RTLIB::MUL_I16;
@@ -3396,15 +3591,27 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
       else if (WideVT == MVT::i128)
         LC = RTLIB::MUL_I128;
       assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
-      LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
-      RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
 
-      SDValue Ret = ExpandLibCall(LC, Node, isSigned);
-      BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Ret);
-      TopHalf = DAG.getNode(ISD::SRL, dl, Ret.getValueType(), Ret,
-                       DAG.getConstant(VT.getSizeInBits(), TLI.getPointerTy()));
-      TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, TopHalf);
+      // The high part is obtained by SRA'ing all but one of the bits of low
+      // part.
+      unsigned LoSize = VT.getSizeInBits();
+      SDValue HiLHS = DAG.getNode(ISD::SRA, dl, VT, RHS,
+                                DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+      SDValue HiRHS = DAG.getNode(ISD::SRA, dl, VT, LHS,
+                                DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+
+      // Here we're passing the 2 arguments explicitly as 4 arguments that are
+      // pre-lowered to the correct types. This all depends upon WideVT not
+      // being a legal type for the architecture and thus has to be split to
+      // two arguments.
+      SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
+      SDValue Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl);
+      BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret,
+                               DAG.getIntPtrConstant(0));
+      TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret,
+                            DAG.getIntPtrConstant(1));
     }
+
     if (isSigned) {
       Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1,
                              TLI.getShiftAmountTy(BottomHalf.getValueType()));
@@ -3486,9 +3693,13 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
                          Tmp2.getOperand(0), Tmp2.getOperand(1),
                          Node->getOperand(2));
     } else {
+      // We test only the i1 bit.  Skip the AND if UNDEF.
+      Tmp3 = (Tmp2.getOpcode() == ISD::UNDEF) ? Tmp2 :
+        DAG.getNode(ISD::AND, dl, Tmp2.getValueType(), Tmp2,
+                    DAG.getConstant(1, Tmp2.getValueType()));
       Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1,
-                         DAG.getCondCode(ISD::SETNE), Tmp2,
-                         DAG.getConstant(0, Tmp2.getValueType()),
+                         DAG.getCondCode(ISD::SETNE), Tmp3,
+                         DAG.getConstant(0, Tmp3.getValueType()),
                          Node->getOperand(2));
     }
     Results.push_back(Tmp1);
@@ -3539,7 +3750,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
 
     LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp2.getValueType()),
                           Tmp2, Tmp3, Tmp4, dl);
-    LastCALLSEQ_END = DAG.getEntryNode();
+    assert(LastCALLSEQ.size() == 1 && "branch inside CALLSEQ_BEGIN/END?");
+    setLastCALLSEQ(DAG.getEntryNode());
 
     assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!");
     Tmp3 = DAG.getConstant(0, Tmp2.getValueType());
@@ -3697,9 +3909,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
 
 // SelectionDAG::Legalize - This is the entry point for the file.
 //
-void SelectionDAG::Legalize(CodeGenOpt::Level OptLevel) {
+void SelectionDAG::Legalize() {
   /// run - This is the main entry point to this class.
   ///
-  SelectionDAGLegalize(*this, OptLevel).LegalizeDAG();
+  SelectionDAGLegalize(*this).LegalizeDAG();
 }
-
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 935aab0..da75b8a 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -73,6 +73,17 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::UNDEF:       Res = PromoteIntRes_UNDEF(N); break;
   case ISD::VAARG:       Res = PromoteIntRes_VAARG(N); break;
 
+  case ISD::EXTRACT_SUBVECTOR:
+                         Res = PromoteIntRes_EXTRACT_SUBVECTOR(N); break;
+  case ISD::VECTOR_SHUFFLE:
+                         Res = PromoteIntRes_VECTOR_SHUFFLE(N); break;
+  case ISD::INSERT_VECTOR_ELT:
+                         Res = PromoteIntRes_INSERT_VECTOR_ELT(N); break;
+  case ISD::BUILD_VECTOR:
+                         Res = PromoteIntRes_BUILD_VECTOR(N); break;
+  case ISD::SCALAR_TO_VECTOR:
+                         Res = PromoteIntRes_SCALAR_TO_VECTOR(N); break;
+
   case ISD::SIGN_EXTEND:
   case ISD::ZERO_EXTEND:
   case ISD::ANY_EXTEND:  Res = PromoteIntRes_INT_EXTEND(N); break;
@@ -174,24 +185,30 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
   default:
     assert(false && "Unknown type action!");
     break;
-  case Legal:
+  case TargetLowering::TypeLegal:
     break;
-  case PromoteInteger:
+  case TargetLowering::TypePromoteInteger:
     if (NOutVT.bitsEq(NInVT))
       // The input promotes to the same size.  Convert the promoted value.
       return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetPromotedInteger(InOp));
+    if (NInVT.isVector())
+      // Promote vector element via memory load/store.
+      return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
+                         CreateStackStoreLoad(InOp, OutVT));
     break;
-  case SoftenFloat:
+  case TargetLowering::TypeSoftenFloat:
     // Promote the integer operand by hand.
     return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftenedFloat(InOp));
-  case ExpandInteger:
-  case ExpandFloat:
+  case TargetLowering::TypeExpandInteger:
+  case TargetLowering::TypeExpandFloat:
     break;
-  case ScalarizeVector:
+  case TargetLowering::TypeScalarizeVector:
     // Convert the element to an integer and promote it by hand.
-    return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
-                       BitConvertToInteger(GetScalarizedVector(InOp)));
-  case SplitVector: {
+    if (!NOutVT.isVector())
+      return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
+                         BitConvertToInteger(GetScalarizedVector(InOp)));
+    break;
+  case TargetLowering::TypeSplitVector: {
     // For example, i32 = BITCAST v2i16 on alpha.  Convert the split
     // pieces of the input into integers and reassemble in the final type.
     SDValue Lo, Hi;
@@ -208,7 +225,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
                        JoinIntegers(Lo, Hi));
     return DAG.getNode(ISD::BITCAST, dl, NOutVT, InOp);
   }
-  case WidenVector:
+  case TargetLowering::TypeWidenVector:
     if (OutVT.bitsEq(NInVT))
       // The input is widened to the same size.  Convert to the widened value.
       return DAG.getNode(ISD::BITCAST, dl, OutVT, GetWidenedVector(InOp));
@@ -342,7 +359,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   DebugLoc dl = N->getDebugLoc();
 
-  if (getTypeAction(N->getOperand(0).getValueType()) == PromoteInteger) {
+  if (getTypeAction(N->getOperand(0).getValueType())
+      == TargetLowering::TypePromoteInteger) {
     SDValue Res = GetPromotedInteger(N->getOperand(0));
     assert(Res.getValueType().bitsLE(NVT) && "Extension doesn't make sense!");
 
@@ -507,11 +525,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
 
   switch (getTypeAction(N->getOperand(0).getValueType())) {
   default: llvm_unreachable("Unknown type action!");
-  case Legal:
-  case ExpandInteger:
+  case TargetLowering::TypeLegal:
+  case TargetLowering::TypeExpandInteger:
     Res = N->getOperand(0);
     break;
-  case PromoteInteger:
+  case TargetLowering::TypePromoteInteger:
     Res = GetPromotedInteger(N->getOperand(0));
     break;
   }
@@ -557,9 +575,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
   DebugLoc DL = N->getDebugLoc();
   EVT SmallVT = LHS.getValueType();
 
-  // To determine if the result overflowed in a larger type, we extend the input
-  // to the larger type, do the multiply, then check the high bits of the result
-  // to see if the overflow happened.
+  // To determine if the result overflowed in a larger type, we extend the
+  // input to the larger type, do the multiply, then check the high bits of
+  // the result to see if the overflow happened.
   if (N->getOpcode() == ISD::SMULO) {
     LHS = SExtPromotedInteger(LHS);
     RHS = SExtPromotedInteger(RHS);
@@ -569,8 +587,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
   }
   SDValue Mul = DAG.getNode(ISD::MUL, DL, LHS.getValueType(), LHS, RHS);
 
-  // Overflow occurred iff the high part of the result does not zero/sign-extend
-  // the low part.
+  // Overflow occurred iff the high part of the result does not
+  // zero/sign-extend the low part.
   SDValue Overflow;
   if (N->getOpcode() == ISD::UMULO) {
     // Unsigned overflow occurred iff the high part is non-zero.
@@ -672,6 +690,8 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
   case ISD::BRCOND:       Res = PromoteIntOp_BRCOND(N, OpNo); break;
   case ISD::BUILD_PAIR:   Res = PromoteIntOp_BUILD_PAIR(N); break;
   case ISD::BUILD_VECTOR: Res = PromoteIntOp_BUILD_VECTOR(N); break;
+  case ISD::CONCAT_VECTORS: Res = PromoteIntOp_CONCAT_VECTORS(N); break;
+  case ISD::EXTRACT_VECTOR_ELT: Res = PromoteIntOp_EXTRACT_VECTOR_ELT(N); break;
   case ISD::CONVERT_RNDSAT:
                           Res = PromoteIntOp_CONVERT_RNDSAT(N); break;
   case ISD::INSERT_VECTOR_ELT:
@@ -952,7 +972,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
   SDValue Op = GetPromotedInteger(N->getOperand(0));
   Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op);
-  return DAG.getZeroExtendInReg(Op, dl, N->getOperand(0).getValueType());
+  return DAG.getZeroExtendInReg(Op, dl,
+                                N->getOperand(0).getValueType().getScalarType());
 }
 
 
@@ -1513,7 +1534,8 @@ void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N,
   } else {
     // For example, extension of an i48 to an i64.  The operand type necessarily
     // promotes to the result type, so will end up being expanded too.
-    assert(getTypeAction(Op.getValueType()) == PromoteInteger &&
+    assert(getTypeAction(Op.getValueType()) ==
+           TargetLowering::TypePromoteInteger &&
            "Only know how to promote this result!");
     SDValue Res = GetPromotedInteger(Op);
     assert(Res.getValueType() == N->getValueType(0) &&
@@ -2030,7 +2052,8 @@ void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N,
   } else {
     // For example, extension of an i48 to an i64.  The operand type necessarily
     // promotes to the result type, so will end up being expanded too.
-    assert(getTypeAction(Op.getValueType()) == PromoteInteger &&
+    assert(getTypeAction(Op.getValueType()) ==
+           TargetLowering::TypePromoteInteger &&
            "Only know how to promote this result!");
     SDValue Res = GetPromotedInteger(Op);
     assert(Res.getValueType() == N->getValueType(0) &&
@@ -2178,7 +2201,8 @@ void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,
   } else {
     // For example, extension of an i48 to an i64.  The operand type necessarily
     // promotes to the result type, so will end up being expanded too.
-    assert(getTypeAction(Op.getValueType()) == PromoteInteger &&
+    assert(getTypeAction(Op.getValueType()) ==
+           TargetLowering::TypePromoteInteger &&
            "Only know how to promote this result!");
     SDValue Res = GetPromotedInteger(Op);
     assert(Res.getValueType() == N->getValueType(0) &&
@@ -2613,3 +2637,158 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
          "Don't know how to expand this UINT_TO_FP!");
   return MakeLibCall(LC, DstVT, &Op, 1, true, dl);
 }
+
+SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
+  SDValue InOp0 = N->getOperand(0);
+  EVT InVT = InOp0.getValueType();
+  EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
+
+  EVT OutVT = N->getValueType(0);
+  EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+  assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+  unsigned OutNumElems = N->getValueType(0).getVectorNumElements();
+  EVT NOutVTElem = NOutVT.getVectorElementType();
+
+  DebugLoc dl = N->getDebugLoc();
+  SDValue BaseIdx = N->getOperand(1);
+
+  SmallVector<SDValue, 8> Ops;
+  for (unsigned i = 0; i != OutNumElems; ++i) {
+
+    // Extract the element from the original vector.
+    SDValue Index = DAG.getNode(ISD::ADD, dl, BaseIdx.getValueType(),
+      BaseIdx, DAG.getIntPtrConstant(i));
+    SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+      InVT.getVectorElementType(), N->getOperand(0), Index);
+
+    SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, Ext);
+    // Insert the converted element to the new vector.
+    Ops.push_back(Op);
+  }
+
+  return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size());
+}
+
+
+SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) {
+
+  ShuffleVectorSDNode *SV = cast<ShuffleVectorSDNode>(N);
+  EVT VT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+
+  unsigned NumElts = VT.getVectorNumElements();
+  SmallVector<int, 8> NewMask;
+  for (unsigned i = 0; i != NumElts; ++i) {
+    NewMask.push_back(SV->getMaskElt(i));
+  }
+
+  SDValue V0 = GetPromotedInteger(N->getOperand(0));
+  SDValue V1 = GetPromotedInteger(N->getOperand(1));
+  EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+
+  return DAG.getVectorShuffle(OutVT, dl, V0,V1, &NewMask[0]);
+}
+
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) {
+
+  SDValue InOp0 = N->getOperand(0);
+  EVT InVT = InOp0.getValueType();
+  EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
+
+  EVT OutVT = N->getValueType(0);
+  EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+  assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+  unsigned NumElems = N->getNumOperands();
+  EVT NOutVTElem = NOutVT.getVectorElementType();
+
+  DebugLoc dl = N->getDebugLoc();
+
+  SmallVector<SDValue, 8> Ops;
+  for (unsigned i = 0; i != NumElems; ++i) {
+    SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(i));
+    Ops.push_back(Op);
+  }
+
+  return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size());
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) {
+
+  DebugLoc dl = N->getDebugLoc();
+
+  SDValue InOp0 = N->getOperand(0);
+  EVT InVT = InOp0.getValueType();
+  EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
+  assert(!InVT.isVector() && "Input must not be a scalar");
+
+  EVT OutVT = N->getValueType(0);
+  EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+  assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+  EVT NOutVTElem = NOutVT.getVectorElementType();
+
+  SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(0));
+
+  return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NOutVT, Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) {
+
+  SDValue InOp0 = N->getOperand(0);
+  EVT InVT = InOp0.getValueType();
+  EVT InElVT = InVT.getVectorElementType();
+  EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
+
+  EVT OutVT = N->getValueType(0);
+  EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+  assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+
+  EVT NOutVTElem = NOutVT.getVectorElementType();
+
+  DebugLoc dl = N->getDebugLoc();
+
+  SDValue ConvertedVector = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, InOp0);
+
+  SDValue ConvElem = DAG.getNode(ISD::ANY_EXTEND, dl,
+    NOutVTElem, N->getOperand(1));
+  return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,NOutVT,
+    ConvertedVector, ConvElem, N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  SDValue V0 = GetPromotedInteger(N->getOperand(0));
+  SDValue V1 = N->getOperand(1);
+  SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+    V0->getValueType(0).getScalarType(), V0, V1);
+
+  return DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), Ext);
+
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) {
+
+  DebugLoc dl = N->getDebugLoc();
+
+  EVT RetSclrTy = N->getValueType(0).getVectorElementType();
+
+  SmallVector<SDValue, 8> NewOps;
+
+  // For each incoming vector
+  for (unsigned VecIdx = 0, E = N->getNumOperands(); VecIdx!= E; ++VecIdx) {
+    SDValue Incoming = GetPromotedInteger(N->getOperand(VecIdx));
+    EVT SclrTy = Incoming->getValueType(0).getVectorElementType();
+    unsigned NumElem = Incoming->getValueType(0).getVectorNumElements();
+
+    for (unsigned i=0; i<NumElem; ++i) {
+      // Extract element from incoming vector
+      SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SclrTy,
+      Incoming, DAG.getIntPtrConstant(i));
+      SDValue Tr = DAG.getNode(ISD::TRUNCATE, dl, RetSclrTy, Ex);
+      NewOps.push_back(Tr);
+    }
+  }
+
+  return DAG.getNode(ISD::BUILD_VECTOR, dl,  N->getValueType(0),
+    &NewOps[0], NewOps.size());
+  }
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index cedda7e..ba658b0 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -224,38 +224,38 @@ bool DAGTypeLegalizer::run() {
       switch (getTypeAction(ResultVT)) {
       default:
         assert(false && "Unknown action!");
-      case Legal:
+      case TargetLowering::TypeLegal:
         break;
       // The following calls must take care of *all* of the node's results,
       // not just the illegal result they were passed (this includes results
       // with a legal type).  Results can be remapped using ReplaceValueWith,
       // or their promoted/expanded/etc values registered in PromotedIntegers,
       // ExpandedIntegers etc.
-      case PromoteInteger:
+      case TargetLowering::TypePromoteInteger:
         PromoteIntegerResult(N, i);
         Changed = true;
         goto NodeDone;
-      case ExpandInteger:
+      case TargetLowering::TypeExpandInteger:
         ExpandIntegerResult(N, i);
         Changed = true;
         goto NodeDone;
-      case SoftenFloat:
+      case TargetLowering::TypeSoftenFloat:
         SoftenFloatResult(N, i);
         Changed = true;
         goto NodeDone;
-      case ExpandFloat:
+      case TargetLowering::TypeExpandFloat:
         ExpandFloatResult(N, i);
         Changed = true;
         goto NodeDone;
-      case ScalarizeVector:
+      case TargetLowering::TypeScalarizeVector:
         ScalarizeVectorResult(N, i);
         Changed = true;
         goto NodeDone;
-      case SplitVector:
+      case TargetLowering::TypeSplitVector:
         SplitVectorResult(N, i);
         Changed = true;
         goto NodeDone;
-      case WidenVector:
+      case TargetLowering::TypeWidenVector:
         WidenVectorResult(N, i);
         Changed = true;
         goto NodeDone;
@@ -277,36 +277,36 @@ ScanOperands:
       switch (getTypeAction(OpVT)) {
       default:
         assert(false && "Unknown action!");
-      case Legal:
+      case TargetLowering::TypeLegal:
         continue;
       // The following calls must either replace all of the node's results
       // using ReplaceValueWith, and return "false"; or update the node's
       // operands in place, and return "true".
-      case PromoteInteger:
+      case TargetLowering::TypePromoteInteger:
         NeedsReanalyzing = PromoteIntegerOperand(N, i);
         Changed = true;
         break;
-      case ExpandInteger:
+      case TargetLowering::TypeExpandInteger:
         NeedsReanalyzing = ExpandIntegerOperand(N, i);
         Changed = true;
         break;
-      case SoftenFloat:
+      case TargetLowering::TypeSoftenFloat:
         NeedsReanalyzing = SoftenFloatOperand(N, i);
         Changed = true;
         break;
-      case ExpandFloat:
+      case TargetLowering::TypeExpandFloat:
         NeedsReanalyzing = ExpandFloatOperand(N, i);
         Changed = true;
         break;
-      case ScalarizeVector:
+      case TargetLowering::TypeScalarizeVector:
         NeedsReanalyzing = ScalarizeVectorOperand(N, i);
         Changed = true;
         break;
-      case SplitVector:
+      case TargetLowering::TypeSplitVector:
         NeedsReanalyzing = SplitVectorOperand(N, i);
         Changed = true;
         break;
-      case WidenVector:
+      case TargetLowering::TypeWidenVector:
         NeedsReanalyzing = WidenVectorOperand(N, i);
         Changed = true;
         break;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 5409b88..06dc40f 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -57,16 +57,6 @@ public:
     // 1+ - This is a node which has this many unprocessed operands.
   };
 private:
-  enum LegalizeAction {
-    Legal,           // The target natively supports this type.
-    PromoteInteger,  // Replace this integer type with a larger one.
-    ExpandInteger,   // Split this integer type into two of half the size.
-    SoftenFloat,     // Convert this float type to a same size integer type.
-    ExpandFloat,     // Split this float type into two of half the size.
-    ScalarizeVector, // Replace this one-element vector with its element type.
-    SplitVector,     // Split this vector type into two of half the size.
-    WidenVector      // This vector type should be widened into a larger vector.
-  };
 
   /// ValueTypeActions - This is a bitvector that contains two bits for each
   /// simple value type, where the two bits correspond to the LegalizeAction
@@ -74,41 +64,13 @@ private:
   TargetLowering::ValueTypeActionImpl ValueTypeActions;
 
   /// getTypeAction - Return how we should legalize values of this type.
-  LegalizeAction getTypeAction(EVT VT) const {
-    switch (ValueTypeActions.getTypeAction(VT)) {
-    default:
-      assert(false && "Unknown legalize action!");
-    case TargetLowering::Legal:
-      return Legal;
-    case TargetLowering::Promote:
-      // Promote can mean
-      //   1) For integers, use a larger integer type (e.g. i8 -> i32).
-      //   2) For vectors, use a wider vector type (e.g. v3i32 -> v4i32).
-      if (!VT.isVector())
-        return PromoteInteger;
-      return WidenVector;
-    case TargetLowering::Expand:
-      // Expand can mean
-      // 1) split scalar in half, 2) convert a float to an integer,
-      // 3) scalarize a single-element vector, 4) split a vector in two.
-      if (!VT.isVector()) {
-        if (VT.isInteger())
-          return ExpandInteger;
-        if (VT.getSizeInBits() ==
-                TLI.getTypeToTransformTo(*DAG.getContext(), VT).getSizeInBits())
-          return SoftenFloat;
-        return ExpandFloat;
-      }
-
-      if (VT.getVectorNumElements() == 1)
-        return ScalarizeVector;
-      return SplitVector;
-    }
+  TargetLowering::LegalizeTypeAction getTypeAction(EVT VT) const {
+    return TLI.getTypeAction(*DAG.getContext(), VT);
   }
 
   /// isTypeLegal - Return true if this type is legal on this target.
   bool isTypeLegal(EVT VT) const {
-    return ValueTypeActions.getTypeAction(VT) == TargetLowering::Legal;
+    return TLI.getTypeAction(*DAG.getContext(), VT) == TargetLowering::TypeLegal;
   }
 
   /// IgnoreNodeResults - Pretend all of this node's results are legal.
@@ -239,7 +201,7 @@ private:
     EVT OldVT = Op.getValueType();
     DebugLoc dl = Op.getDebugLoc();
     Op = GetPromotedInteger(Op);
-    return DAG.getZeroExtendInReg(Op, dl, OldVT);
+    return DAG.getZeroExtendInReg(Op, dl, OldVT.getScalarType());
   }
 
   // Integer Result Promotion.
@@ -248,6 +210,11 @@ private:
   SDValue PromoteIntRes_AssertZext(SDNode *N);
   SDValue PromoteIntRes_Atomic1(AtomicSDNode *N);
   SDValue PromoteIntRes_Atomic2(AtomicSDNode *N);
+  SDValue PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N);
+  SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N);
+  SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N);
+  SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N);
+  SDValue PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N);
   SDValue PromoteIntRes_BITCAST(SDNode *N);
   SDValue PromoteIntRes_BSWAP(SDNode *N);
   SDValue PromoteIntRes_BUILD_PAIR(SDNode *N);
@@ -289,6 +256,9 @@ private:
   SDValue PromoteIntOp_BUILD_VECTOR(SDNode *N);
   SDValue PromoteIntOp_CONVERT_RNDSAT(SDNode *N);
   SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo);
+  SDValue PromoteIntOp_EXTRACT_ELEMENT(SDNode *N);
+  SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N);
+  SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N);
   SDValue PromoteIntOp_MEMBARRIER(SDNode *N);
   SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N);
   SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index a75ae87..85ea6b6 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -43,36 +43,36 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
   switch (getTypeAction(InVT)) {
     default:
       assert(false && "Unknown type action!");
-    case Legal:
-    case PromoteInteger:
+    case TargetLowering::TypeLegal:
+    case TargetLowering::TypePromoteInteger:
       break;
-    case SoftenFloat:
+    case TargetLowering::TypeSoftenFloat:
       // Convert the integer operand instead.
       SplitInteger(GetSoftenedFloat(InOp), Lo, Hi);
       Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
       Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
       return;
-    case ExpandInteger:
-    case ExpandFloat:
+    case TargetLowering::TypeExpandInteger:
+    case TargetLowering::TypeExpandFloat:
       // Convert the expanded pieces of the input.
       GetExpandedOp(InOp, Lo, Hi);
       Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
       Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
       return;
-    case SplitVector:
+    case TargetLowering::TypeSplitVector:
       GetSplitVector(InOp, Lo, Hi);
       if (TLI.isBigEndian())
         std::swap(Lo, Hi);
       Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
       Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
       return;
-    case ScalarizeVector:
+    case TargetLowering::TypeScalarizeVector:
       // Convert the element instead.
       SplitInteger(BitConvertToInteger(GetScalarizedVector(InOp)), Lo, Hi);
       Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
       Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
       return;
-    case WidenVector: {
+    case TargetLowering::TypeWidenVector: {
       assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BITCAST");
       InOp = GetWidenedVector(InOp);
       EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 0b4dd35..b5698f9 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -526,13 +526,13 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
   switch (getTypeAction(InVT)) {
   default:
     assert(false && "Unknown type action!");
-  case Legal:
-  case PromoteInteger:
-  case SoftenFloat:
-  case ScalarizeVector:
+  case TargetLowering::TypeLegal:
+  case TargetLowering::TypePromoteInteger:
+  case TargetLowering::TypeSoftenFloat:
+  case TargetLowering::TypeScalarizeVector:
     break;
-  case ExpandInteger:
-  case ExpandFloat:
+  case TargetLowering::TypeExpandInteger:
+  case TargetLowering::TypeExpandFloat:
     // A scalar to vector conversion, where the scalar needs expansion.
     // If the vector is being split in two then we can just convert the
     // expanded pieces.
@@ -545,7 +545,7 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
       return;
     }
     break;
-  case SplitVector:
+  case TargetLowering::TypeSplitVector:
     // If the input is a vector that needs to be split, convert each split
     // piece of the input now.
     GetSplitVector(InOp, Lo, Hi);
@@ -774,7 +774,7 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
   EVT InVT = N->getOperand(0).getValueType();
   switch (getTypeAction(InVT)) {
   default: llvm_unreachable("Unexpected type action!");
-  case Legal: {
+  case TargetLowering::TypeLegal: {
     EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
                                  LoVT.getVectorNumElements());
     Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
@@ -783,10 +783,21 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
                      DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
     break;
   }
-  case SplitVector:
+  case TargetLowering::TypePromoteInteger: {
+    SDValue InOp = GetPromotedInteger(N->getOperand(0));
+    EVT InNVT = EVT::getVectorVT(*DAG.getContext(),
+                                 InOp.getValueType().getVectorElementType(),
+                                 LoVT.getVectorNumElements());
+    Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+                     DAG.getIntPtrConstant(0));
+    Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+                     DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+    break;
+  }
+  case TargetLowering::TypeSplitVector:
     GetSplitVector(N->getOperand(0), Lo, Hi);
     break;
-  case WidenVector: {
+  case TargetLowering::TypeWidenVector: {
     // If the result needs to be split and the input needs to be widened,
     // the two types must have different lengths. Use the widened result
     // and extract from it to do the split.
@@ -1439,7 +1450,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
   unsigned Opcode = N->getOpcode();
   unsigned InVTNumElts = InVT.getVectorNumElements();
 
-  if (getTypeAction(InVT) == WidenVector) {
+  if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
     InOp = GetWidenedVector(N->getOperand(0));
     InVT = InOp.getValueType();
     InVTNumElts = InVT.getVectorNumElements();
@@ -1515,7 +1526,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) {
   SDValue ShOp = N->getOperand(1);
 
   EVT ShVT = ShOp.getValueType();
-  if (getTypeAction(ShVT) == WidenVector) {
+  if (getTypeAction(ShVT) == TargetLowering::TypeWidenVector) {
     ShOp = GetWidenedVector(ShOp);
     ShVT = ShOp.getValueType();
   }
@@ -1557,9 +1568,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
   default:
     assert(false && "Unknown type action!");
     break;
-  case Legal:
+  case TargetLowering::TypeLegal:
     break;
-  case PromoteInteger:
+  case TargetLowering::TypePromoteInteger:
     // If the InOp is promoted to the same size, convert it.  Otherwise,
     // fall out of the switch and widen the promoted input.
     InOp = GetPromotedInteger(InOp);
@@ -1567,13 +1578,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
     if (WidenVT.bitsEq(InVT))
       return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);
     break;
-  case SoftenFloat:
-  case ExpandInteger:
-  case ExpandFloat:
-  case ScalarizeVector:
-  case SplitVector:
+  case TargetLowering::TypeSoftenFloat:
+  case TargetLowering::TypeExpandInteger:
+  case TargetLowering::TypeExpandFloat:
+  case TargetLowering::TypeScalarizeVector:
+  case TargetLowering::TypeSplitVector:
     break;
-  case WidenVector:
+  case TargetLowering::TypeWidenVector:
     // If the InOp is widened to the same size, convert it.  Otherwise, fall
     // out of the switch and widen the widened input.
     InOp = GetWidenedVector(InOp);
@@ -1653,7 +1664,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
   unsigned NumOperands = N->getNumOperands();
 
   bool InputWidened = false; // Indicates we need to widen the input.
-  if (getTypeAction(InVT) != WidenVector) {
+  if (getTypeAction(InVT) != TargetLowering::TypeWidenVector) {
     if (WidenVT.getVectorNumElements() % InVT.getVectorNumElements() == 0) {
       // Add undef vectors to widen to correct length.
       unsigned NumConcat = WidenVT.getVectorNumElements() /
@@ -1732,7 +1743,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
   ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
 
   unsigned InVTNumElts = InVT.getVectorNumElements();
-  if (getTypeAction(InVT) == WidenVector) {
+  if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
     InOp = GetWidenedVector(InOp);
     InVT = InOp.getValueType();
     InVTNumElts = InVT.getVectorNumElements();
@@ -1800,7 +1811,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
   SDValue  Idx  = N->getOperand(1);
   DebugLoc dl = N->getDebugLoc();
 
-  if (getTypeAction(InOp.getValueType()) == WidenVector)
+  if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
     InOp = GetWidenedVector(InOp);
 
   EVT InVT = InOp.getValueType();
@@ -1882,7 +1893,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
     EVT CondEltVT = CondVT.getVectorElementType();
     EVT CondWidenVT =  EVT::getVectorVT(*DAG.getContext(),
                                         CondEltVT, WidenNumElts);
-    if (getTypeAction(CondVT) == WidenVector)
+    if (getTypeAction(CondVT) == TargetLowering::TypeWidenVector)
       Cond1 = GetWidenedVector(Cond1);
 
     if (Cond1.getValueType() != CondWidenVT)
@@ -2026,7 +2037,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
   unsigned NumElts = VT.getVectorNumElements();
   SDValue InOp = N->getOperand(0);
-  if (getTypeAction(InOp.getValueType()) == WidenVector)
+  if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
     InOp = GetWidenedVector(InOp);
   EVT InVT = InOp.getValueType();
   EVT InEltVT = InVT.getVectorElementType();
@@ -2081,7 +2092,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
   unsigned NumOperands = N->getNumOperands();
   for (unsigned i=0; i < NumOperands; ++i) {
     SDValue InOp = N->getOperand(i);
-    if (getTypeAction(InOp.getValueType()) == WidenVector)
+    if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
       InOp = GetWidenedVector(InOp);
     for (unsigned j=0; j < NumInElts; ++j)
       Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
@@ -2153,6 +2164,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
     if (MemVT.getSizeInBits() <= WidenEltWidth)
       break;
     if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 &&
+        isPowerOf2_32(WidenWidth / MemVTWidth) &&
         (MemVTWidth <= Width ||
          (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
       RetVT = MemVT;
@@ -2168,6 +2180,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
     unsigned MemVTWidth = MemVT.getSizeInBits();
     if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() &&
         (WidenWidth % MemVTWidth) == 0 &&
+        isPowerOf2_32(WidenWidth / MemVTWidth) &&
         (MemVTWidth <= Width ||
          (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
       if (RetVT.getSizeInBits() < MemVTWidth || MemVT == WidenVT)
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index b2e9c15..f09b381 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -71,6 +71,7 @@ static cl::opt<bool> DisableSchedCycles(
   cl::desc("Disable cycle-level precision during preRA scheduling"));
 
 // Temporary sched=list-ilp flags until the heuristics are robust.
+// Some options are also available under sched=list-hybrid.
 static cl::opt<bool> DisableSchedRegPressure(
   "disable-sched-reg-pressure", cl::Hidden, cl::init(false),
   cl::desc("Disable regpressure priority in sched=list-ilp"));
@@ -80,6 +81,9 @@ static cl::opt<bool> DisableSchedLiveUses(
 static cl::opt<bool> DisableSchedVRegCycle(
   "disable-sched-vrcycle", cl::Hidden, cl::init(false),
   cl::desc("Disable virtual register cycle interference checks"));
+static cl::opt<bool> DisableSchedPhysRegJoin(
+  "disable-sched-physreg-join", cl::Hidden, cl::init(false),
+  cl::desc("Disable physreg def-use affinity"));
 static cl::opt<bool> DisableSchedStalls(
   "disable-sched-stalls", cl::Hidden, cl::init(true),
   cl::desc("Disable no-stall priority in sched=list-ilp"));
@@ -102,11 +106,11 @@ static cl::opt<unsigned> AvgIPC(
 #ifndef NDEBUG
 namespace {
   // For sched=list-ilp, Count the number of times each factor comes into play.
-  enum { FactPressureDiff, FactRegUses, FactHeight, FactDepth, FactStatic,
-         FactOther, NumFactors };
+  enum { FactPressureDiff, FactRegUses, FactStall, FactHeight, FactDepth,
+         FactStatic, FactOther, NumFactors };
 }
 static const char *FactorName[NumFactors] =
-{"PressureDiff", "RegUses", "Height", "Depth","Static", "Other"};
+{"PressureDiff", "RegUses", "Stall", "Height", "Depth","Static", "Other"};
 static int FactorCount[NumFactors];
 #endif //!NDEBUG
 
@@ -272,6 +276,33 @@ private:
 };
 }  // end anonymous namespace
 
+/// GetCostForDef - Looks up the register class and cost for a given definition.
+/// Typically this just means looking up the representative register class,
+/// but for untyped values (MVT::untyped) it means inspecting the node's
+/// opcode to determine what register class is being generated.
+static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
+                          const TargetLowering *TLI,
+                          const TargetInstrInfo *TII,
+                          const TargetRegisterInfo *TRI,
+                          unsigned &RegClass, unsigned &Cost) {
+  EVT VT = RegDefPos.GetValue();
+
+  // Special handling for untyped values.  These values can only come from
+  // the expansion of custom DAG-to-DAG patterns.
+  if (VT == MVT::untyped) {
+    unsigned Opcode = RegDefPos.GetNode()->getMachineOpcode();
+    unsigned Idx = RegDefPos.GetIdx();
+    const TargetInstrDesc Desc = TII->get(Opcode);
+    const TargetRegisterClass *RC = Desc.getRegClass(Idx, TRI);
+    RegClass = RC->getID();
+    // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
+    // better way to determine it.
+    Cost = 1;
+  } else {
+    RegClass = TLI->getRepRegClassFor(VT)->getID();
+    Cost = TLI->getRepRegClassCostFor(VT);
+  }
+}
 
 /// Schedule - Schedule the DAG using list scheduling.
 void ScheduleDAGRRList::Schedule() {
@@ -463,6 +494,13 @@ void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
   if (DisableSchedCycles)
     return;
 
+  // FIXME: Nodes such as CopyFromReg probably should not advance the current
+  // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
+  // has predecessors the cycle will be advanced when they are scheduled.
+  // But given the crude nature of modeling latency though such nodes, we
+  // currently need to treat these nodes like real instructions.
+  // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
+
   unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth();
 
   // Bump CurCycle to account for latency. We assume the latency of other
@@ -533,6 +571,8 @@ void ScheduleDAGRRList::EmitNode(SUnit *SU) {
   }
 }
 
+static void resetVRegCycle(SUnit *SU);
+
 /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
 /// count of its predecessors. If a predecessor pending count is zero, add it to
 /// the Available queue.
@@ -542,7 +582,8 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
 
 #ifndef NDEBUG
   if (CurCycle < SU->getHeight())
-    DEBUG(dbgs() << "   Height [" << SU->getHeight() << "] pipeline stall!\n");
+    DEBUG(dbgs() << "   Height [" << SU->getHeight()
+          << "] pipeline stall!\n");
 #endif
 
   // FIXME: Do not modify node height. It may interfere with
@@ -559,7 +600,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
   AvailableQueue->ScheduledNode(SU);
 
   // If HazardRec is disabled, and each inst counts as one cycle, then
-  // advance CurCycle before ReleasePredecessors to avoid useles pushed to
+  // advance CurCycle before ReleasePredecessors to avoid useless pushes to
   // PendingQueue for schedulers that implement HasReadyFilter.
   if (!HazardRec->isEnabled() && AvgIPC < 2)
     AdvanceToCycle(CurCycle + 1);
@@ -580,20 +621,25 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
     }
   }
 
+  resetVRegCycle(SU);
+
   SU->isScheduled = true;
 
   // Conditions under which the scheduler should eagerly advance the cycle:
   // (1) No available instructions
   // (2) All pipelines full, so available instructions must have hazards.
   //
-  // If HazardRec is disabled, the cycle was advanced earlier.
+  // If HazardRec is disabled, the cycle was pre-advanced before calling
+  // ReleasePredecessors. In that case, IssueCount should remain 0.
   //
   // Check AvailableQueue after ReleasePredecessors in case of zero latency.
-  ++IssueCount;
-  if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
-      || (!HazardRec->isEnabled() && AvgIPC > 1 && IssueCount == AvgIPC)
-      || AvailableQueue->empty())
-    AdvanceToCycle(CurCycle + 1);
+  if (HazardRec->isEnabled() || AvgIPC > 1) {
+    if (SU->getNode() && SU->getNode()->isMachineOpcode())
+      ++IssueCount;
+    if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
+        || (!HazardRec->isEnabled() && IssueCount == AvgIPC))
+      AdvanceToCycle(CurCycle + 1);
+  }
 }
 
 /// CapturePred - This does the opposite of ReleasePred. Since SU is being
@@ -989,14 +1035,15 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
   for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) {
 
     // Check if Ref is live.
-    if (!LiveRegDefs[Reg]) continue;
+    if (!LiveRegDefs[*AliasI]) continue;
 
     // Allow multiple uses of the same def.
-    if (LiveRegDefs[Reg] == SU) continue;
+    if (LiveRegDefs[*AliasI] == SU) continue;
 
     // Add Reg to the set of interfering live regs.
-    if (RegAdded.insert(Reg))
-      LRegs.push_back(Reg);
+    if (RegAdded.insert(*AliasI)) {
+      LRegs.push_back(*AliasI);
+    }
   }
 }
 
@@ -1220,7 +1267,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
   // priority. If it is not ready put it back.  Schedule the node.
   Sequence.reserve(SUnits.size());
   while (!AvailableQueue->empty()) {
-    DEBUG(dbgs() << "\n*** Examining Available\n";
+    DEBUG(dbgs() << "\nExamining Available:\n";
           AvailableQueue->dump(this));
 
     // Pick the best node to schedule taking all constraints into
@@ -1349,6 +1396,21 @@ struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> {
   bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
 };
 
+#ifndef NDEBUG
+template<class SF>
+struct reverse_sort : public queue_sort {
+  SF &SortFunc;
+  reverse_sort(SF &sf) : SortFunc(sf) {}
+  reverse_sort(const reverse_sort &RHS) : SortFunc(RHS.SortFunc) {}
+
+  bool operator()(SUnit* left, SUnit* right) const {
+    // reverse left/right rather than simply !SortFunc(left, right)
+    // to expose different paths in the comparison logic.
+    return SortFunc(right, left);
+  }
+};
+#endif // NDEBUG
+
 /// bu_ls_rr_sort - Priority function for bottom up register pressure
 // reduction scheduler.
 struct bu_ls_rr_sort : public queue_sort {
@@ -1549,20 +1611,33 @@ protected:
 };
 
 template<class SF>
-class RegReductionPriorityQueue : public RegReductionPQBase {
-  static SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker) {
-    std::vector<SUnit *>::iterator Best = Q.begin();
-    for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()),
-           E = Q.end(); I != E; ++I)
-      if (Picker(*Best, *I))
-        Best = I;
-    SUnit *V = *Best;
-    if (Best != prior(Q.end()))
-      std::swap(*Best, Q.back());
-    Q.pop_back();
-    return V;
+static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) {
+  std::vector<SUnit *>::iterator Best = Q.begin();
+  for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()),
+         E = Q.end(); I != E; ++I)
+    if (Picker(*Best, *I))
+      Best = I;
+  SUnit *V = *Best;
+  if (Best != prior(Q.end()))
+    std::swap(*Best, Q.back());
+  Q.pop_back();
+  return V;
+}
+
+template<class SF>
+SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) {
+#ifndef NDEBUG
+  if (DAG->StressSched) {
+    reverse_sort<SF> RPicker(Picker);
+    return popFromQueueImpl(Q, RPicker);
   }
+#endif
+  (void)DAG;
+  return popFromQueueImpl(Q, Picker);
+}
 
+template<class SF>
+class RegReductionPriorityQueue : public RegReductionPQBase {
   SF Picker;
 
 public:
@@ -1583,7 +1658,7 @@ public:
   SUnit *pop() {
     if (Queue.empty()) return NULL;
 
-    SUnit *V = popFromQueue(Queue, Picker);
+    SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
     V->NodeQueueId = 0;
     return V;
   }
@@ -1593,7 +1668,7 @@ public:
     std::vector<SUnit*> DumpQueue = Queue;
     SF DumpPicker = Picker;
     while (!DumpQueue.empty()) {
-      SUnit *SU = popFromQueue(DumpQueue, DumpPicker);
+      SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
       if (isBottomUp())
         dbgs() << "Height " << SU->getHeight() << ": ";
       else
@@ -1623,6 +1698,20 @@ ILPBURRPriorityQueue;
 //           Static Node Priority for Register Pressure Reduction
 //===----------------------------------------------------------------------===//
 
+// Check for special nodes that bypass scheduling heuristics.
+// Currently this pushes TokenFactor nodes down, but may be used for other
+// pseudo-ops as well.
+//
+// Return -1 to schedule right above left, 1 for left above right.
+// Return 0 if no bias exists.
+static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
+  bool LSchedLow = left->isScheduleLow;
+  bool RSchedLow = right->isScheduleLow;
+  if (LSchedLow != RSchedLow)
+    return LSchedLow < RSchedLow ? 1 : -1;
+  return 0;
+}
+
 /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
 /// Smaller number is the higher priority.
 static unsigned
@@ -1661,17 +1750,6 @@ void RegReductionPQBase::CalculateSethiUllmanNumbers() {
     CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
 }
 
-void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
-  SUnits = &sunits;
-  // Add pseudo dependency edges for two-address nodes.
-  AddPseudoTwoAddrDeps();
-  // Reroute edges to nodes with multiple uses.
-  if (!TracksRegPressure)
-    PrescheduleNodesWithMultipleUses();
-  // Calculate node priorities.
-  CalculateSethiUllmanNumbers();
-}
-
 void RegReductionPQBase::addNode(const SUnit *SU) {
   unsigned SUSize = SethiUllmanNumbers.size();
   if (SUnits->size() > SUSize)
@@ -1710,7 +1788,17 @@ unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
     // If SU does not have a register def, schedule it close to its uses
     // because it does not lengthen any live ranges.
     return 0;
+#if 1
   return SethiUllmanNumbers[SU->NodeNum];
+#else
+  unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
+  if (SU->isCallOp) {
+    // FIXME: This assumes all of the defs are used as call operands.
+    int NP = (int)Priority - SU->getNode()->getNumValues();
+    return (NP > 0) ? NP : 0;
+  }
+  return Priority;
+#endif
 }
 
 //===----------------------------------------------------------------------===//
@@ -1746,8 +1834,10 @@ bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
     for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
          RegDefPos.IsValid(); RegDefPos.Advance()) {
       EVT VT = RegDefPos.GetValue();
-      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-      unsigned Cost = TLI->getRepRegClassCostFor(VT);
+
+      unsigned RCId, Cost;
+      GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost);
+
       if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
         return true;
     }
@@ -1858,9 +1948,10 @@ void RegReductionPQBase::ScheduledNode(SUnit *SU) {
          RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
       if (SkipRegDefs)
         continue;
-      EVT VT = RegDefPos.GetValue();
-      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+
+      unsigned RCId, Cost;
+      GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost);
+      RegPressure[RCId] += Cost;
       break;
     }
   }
@@ -1873,16 +1964,16 @@ void RegReductionPQBase::ScheduledNode(SUnit *SU) {
        RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
     if (SkipRegDefs > 0)
       continue;
-    EVT VT = RegDefPos.GetValue();
-    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-    if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) {
+    unsigned RCId, Cost;
+    GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost);
+    if (RegPressure[RCId] < Cost) {
       // Register pressure tracking is imprecise. This can happen. But we try
       // hard not to let it happen because it likely results in poor scheduling.
       DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") has too many regdefs\n");
       RegPressure[RCId] = 0;
     }
     else {
-      RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
+      RegPressure[RCId] -= Cost;
     }
   }
   dumpRegPressure();
@@ -2008,7 +2099,29 @@ static unsigned calcMaxScratches(const SUnit *SU) {
   return Scratches;
 }
 
-/// hasOnlyLiveOutUse - Return true if SU has a single value successor that is a
+/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
+/// CopyFromReg from a virtual register.
+static bool hasOnlyLiveInOpers(const SUnit *SU) {
+  bool RetVal = false;
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->isCtrl()) continue;
+    const SUnit *PredSU = I->getSUnit();
+    if (PredSU->getNode() &&
+        PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
+      unsigned Reg =
+        cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
+      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+        RetVal = true;
+        continue;
+      }
+    }
+    return false;
+  }
+  return RetVal;
+}
+
+/// hasOnlyLiveOutUses - Return true if SU has only value successors that are
 /// CopyToReg to a virtual register. This SU def is probably a liveout and
 /// it has no other use. It should be scheduled closer to the terminator.
 static bool hasOnlyLiveOutUses(const SUnit *SU) {
@@ -2030,62 +2143,71 @@ static bool hasOnlyLiveOutUses(const SUnit *SU) {
   return RetVal;
 }
 
-/// UnitsSharePred - Return true if the two scheduling units share a common
-/// data predecessor.
-static bool UnitsSharePred(const SUnit *left, const SUnit *right) {
-  SmallSet<const SUnit*, 4> Preds;
-  for (SUnit::const_pred_iterator I = left->Preds.begin(),E = left->Preds.end();
+// Set isVRegCycle for a node with only live in opers and live out uses. Also
+// set isVRegCycle for its CopyFromReg operands.
+//
+// This is only relevant for single-block loops, in which case the VRegCycle
+// node is likely an induction variable in which the operand and target virtual
+// registers should be coalesced (e.g. pre/post increment values). Setting the
+// isVRegCycle flag helps the scheduler prioritize other uses of the same
+// CopyFromReg so that this node becomes the virtual register "kill". This
+// avoids interference between the values live in and out of the block and
+// eliminates a copy inside the loop.
+static void initVRegCycle(SUnit *SU) {
+  if (DisableSchedVRegCycle)
+    return;
+
+  if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
+    return;
+
+  DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
+
+  SU->isVRegCycle = true;
+
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
        I != E; ++I) {
-    if (I->isCtrl()) continue;  // ignore chain preds
-    Preds.insert(I->getSUnit());
+    if (I->isCtrl()) continue;
+    I->getSUnit()->isVRegCycle = true;
   }
-  for (SUnit::const_pred_iterator I = right->Preds.begin(),E = right->Preds.end();
+}
+
+// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
+// CopyFromReg operands. We should no longer penalize other uses of this VReg.
+static void resetVRegCycle(SUnit *SU) {
+  if (!SU->isVRegCycle)
+    return;
+
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
        I != E; ++I) {
     if (I->isCtrl()) continue;  // ignore chain preds
-    if (Preds.count(I->getSUnit()))
-      return true;
+    SUnit *PredSU = I->getSUnit();
+    if (PredSU->isVRegCycle) {
+      assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
+             "VRegCycle def must be CopyFromReg");
+      I->getSUnit()->isVRegCycle = 0;
+    }
   }
-  return false;
 }
 
-// Return true if the virtual register defined by VRCycleSU may interfere with
-// VRUseSU.
-//
-// Note: We may consider two SU's that use the same value live into a loop as
-// interferng even though the value is not an induction variable. This is an
-// unfortunate consequence of scheduling on the selection DAG.
-static bool checkVRegCycleInterference(const SUnit *VRCycleSU,
-                                       const SUnit *VRUseSU) {
-  for (SUnit::const_pred_iterator I = VRCycleSU->Preds.begin(),
-         E = VRCycleSU->Preds.end(); I != E; ++I) {
+// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
+// means a node that defines the VRegCycle has not been scheduled yet.
+static bool hasVRegCycleUse(const SUnit *SU) {
+  // If this SU also defines the VReg, don't hoist it as a "use".
+  if (SU->isVRegCycle)
+    return false;
+
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
+       I != E; ++I) {
     if (I->isCtrl()) continue;  // ignore chain preds
-    SDNode *InNode = I->getSUnit()->getNode();
-    if (!InNode || InNode->getOpcode() != ISD::CopyFromReg)
-      continue;
-    for (SUnit::const_pred_iterator II = VRUseSU->Preds.begin(),
-           EE = VRUseSU->Preds.end(); II != EE; ++II) {
-      if (II->getSUnit() == I->getSUnit())
-        return true;
+    if (I->getSUnit()->isVRegCycle &&
+        I->getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
+      DEBUG(dbgs() << "  VReg cycle use: SU (" << SU->NodeNum << ")\n");
+      return true;
     }
   }
   return false;
 }
 
-// Compare the VRegCycle properties of the nodes.
-// Return -1 if left has higher priority, 1 if right has higher priority.
-// Return 0 if priority is equivalent.
-static int BUCompareVRegCycle(const SUnit *left, const SUnit *right) {
-  if (left->isVRegCycle && !right->isVRegCycle) {
-    if (checkVRegCycleInterference(left, right))
-      return -1;
-  }
-  else if (!left->isVRegCycle && right->isVRegCycle) {
-    if (checkVRegCycleInterference(right, left))
-      return 1;
-  }
-  return 0;
-}
-
 // Check for either a dependence (latency) or resource (hazard) stall.
 //
 // Note: The ScheduleHazardRecognizer interface requires a non-const SU.
@@ -2101,23 +2223,12 @@ static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
 // Return 0 if latency-based priority is equivalent.
 static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
                             RegReductionPQBase *SPQ) {
-  // If the two nodes share an operand and one of them has a single
-  // use that is a live out copy, favor the one that is live out. Otherwise
-  // it will be difficult to eliminate the copy if the instruction is a
-  // loop induction variable update. e.g.
-  // BB:
-  // sub r1, r3, #1
-  // str r0, [r2, r3]
-  // mov r3, r1
-  // cmp
-  // bne BB
-  bool SharePred = UnitsSharePred(left, right);
-  // FIXME: Only adjust if BB is a loop back edge.
-  // FIXME: What's the cost of a copy?
-  int LBonus = (SharePred && hasOnlyLiveOutUses(left)) ? 1 : 0;
-  int RBonus = (SharePred && hasOnlyLiveOutUses(right)) ? 1 : 0;
-  int LHeight = (int)left->getHeight() - LBonus;
-  int RHeight = (int)right->getHeight() - RBonus;
+  // Scheduling an instruction that uses a VReg whose postincrement has not yet
+  // been scheduled will induce a copy. Model this as an extra cycle of latency.
+  int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
+  int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
+  int LHeight = (int)left->getHeight() + LPenalty;
+  int RHeight = (int)right->getHeight() + RPenalty;
 
   bool LStall = (!checkPref || left->SchedulingPref == Sched::Latency) &&
     BUHasStall(left, LHeight, SPQ);
@@ -2128,48 +2239,102 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
   // If scheduling either one of the node will cause a pipeline stall, sort
   // them according to their height.
   if (LStall) {
-    if (!RStall)
+    if (!RStall) {
+      DEBUG(++FactorCount[FactStall]);
       return 1;
-    if (LHeight != RHeight)
+    }
+    if (LHeight != RHeight) {
+      DEBUG(++FactorCount[FactStall]);
       return LHeight > RHeight ? 1 : -1;
-  } else if (RStall)
+    }
+  } else if (RStall) {
+    DEBUG(++FactorCount[FactStall]);
     return -1;
+  }
 
   // If either node is scheduling for latency, sort them by height/depth
   // and latency.
   if (!checkPref || (left->SchedulingPref == Sched::Latency ||
                      right->SchedulingPref == Sched::Latency)) {
     if (DisableSchedCycles) {
-      if (LHeight != RHeight)
+      if (LHeight != RHeight) {
+        DEBUG(++FactorCount[FactHeight]);
         return LHeight > RHeight ? 1 : -1;
+      }
     }
     else {
       // If neither instruction stalls (!LStall && !RStall) then
       // its height is already covered so only its depth matters. We also reach
       // this if both stall but have the same height.
-      unsigned LDepth = left->getDepth();
-      unsigned RDepth = right->getDepth();
+      int LDepth = left->getDepth() - LPenalty;
+      int RDepth = right->getDepth() - RPenalty;
       if (LDepth != RDepth) {
+        DEBUG(++FactorCount[FactDepth]);
         DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum
               << ") depth " << LDepth << " vs SU (" << right->NodeNum
               << ") depth " << RDepth << "\n");
         return LDepth < RDepth ? 1 : -1;
       }
     }
-    if (left->Latency != right->Latency)
+    if (left->Latency != right->Latency) {
+      DEBUG(++FactorCount[FactOther]);
       return left->Latency > right->Latency ? 1 : -1;
+    }
   }
   return 0;
 }
 
 static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
+  // Schedule physical register definitions close to their use. This is
+  // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
+  // long as shortening physreg live ranges is generally good, we can defer
+  // creating a subtarget hook.
+  if (!DisableSchedPhysRegJoin) {
+    bool LHasPhysReg = left->hasPhysRegDefs;
+    bool RHasPhysReg = right->hasPhysRegDefs;
+    if (LHasPhysReg != RHasPhysReg) {
+      DEBUG(++FactorCount[FactRegUses]);
+      #ifndef NDEBUG
+      const char *PhysRegMsg[] = {" has no physreg", " defines a physreg"};
+      #endif
+      DEBUG(dbgs() << "  SU (" << left->NodeNum << ") "
+            << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") "
+            << PhysRegMsg[RHasPhysReg] << "\n");
+      return LHasPhysReg < RHasPhysReg;
+    }
+  }
+
+  // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
   unsigned LPriority = SPQ->getNodePriority(left);
   unsigned RPriority = SPQ->getNodePriority(right);
+
+  // Be really careful about hoisting call operands above previous calls.
+  // Only allows it if it would reduce register pressure.
+  if (left->isCall && right->isCallOp) {
+    unsigned RNumVals = right->getNode()->getNumValues();
+    RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
+  }
+  if (right->isCall && left->isCallOp) {
+    unsigned LNumVals = left->getNode()->getNumValues();
+    LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
+  }
+
   if (LPriority != RPriority) {
     DEBUG(++FactorCount[FactStatic]);
     return LPriority > RPriority;
   }
-  DEBUG(++FactorCount[FactOther]);
+
+  // One or both of the nodes are calls and their sethi-ullman numbers are the
+  // same, then keep source order.
+  if (left->isCall || right->isCall) {
+    unsigned LOrder = SPQ->getNodeOrdering(left);
+    unsigned ROrder = SPQ->getNodeOrdering(right);
+
+    // Prefer an ordering where the lower the non-zero order number, the higher
+    // the preference.
+    if ((LOrder || ROrder) && LOrder != ROrder)
+      return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
+  }
 
   // Try schedule def + use closer when Sethi-Ullman numbers are the same.
   // e.g.
@@ -2190,40 +2355,62 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
   // This creates more short live intervals.
   unsigned LDist = closestSucc(left);
   unsigned RDist = closestSucc(right);
-  if (LDist != RDist)
+  if (LDist != RDist) {
+    DEBUG(++FactorCount[FactOther]);
     return LDist < RDist;
+  }
 
   // How many registers becomes live when the node is scheduled.
   unsigned LScratch = calcMaxScratches(left);
   unsigned RScratch = calcMaxScratches(right);
-  if (LScratch != RScratch)
+  if (LScratch != RScratch) {
+    DEBUG(++FactorCount[FactOther]);
     return LScratch > RScratch;
+  }
 
-  if (!DisableSchedCycles) {
+  // Comparing latency against a call makes little sense unless the node
+  // is register pressure-neutral.
+  if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
+    return (left->NodeQueueId > right->NodeQueueId);
+
+  // Do not compare latencies when one or both of the nodes are calls.
+  if (!DisableSchedCycles &&
+      !(left->isCall || right->isCall)) {
     int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
     if (result != 0)
       return result > 0;
   }
   else {
-    if (left->getHeight() != right->getHeight())
+    if (left->getHeight() != right->getHeight()) {
+      DEBUG(++FactorCount[FactHeight]);
       return left->getHeight() > right->getHeight();
+    }
 
-    if (left->getDepth() != right->getDepth())
+    if (left->getDepth() != right->getDepth()) {
+      DEBUG(++FactorCount[FactDepth]);
       return left->getDepth() < right->getDepth();
+    }
   }
 
   assert(left->NodeQueueId && right->NodeQueueId &&
          "NodeQueueId cannot be zero");
+  DEBUG(++FactorCount[FactOther]);
   return (left->NodeQueueId > right->NodeQueueId);
 }
 
 // Bottom up
 bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+  if (int res = checkSpecialNodes(left, right))
+    return res > 0;
+
   return BURRSort(left, right, SPQ);
 }
 
 // Source order, otherwise bottom up.
 bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+  if (int res = checkSpecialNodes(left, right))
+    return res > 0;
+
   unsigned LOrder = SPQ->getNodeOrdering(left);
   unsigned ROrder = SPQ->getNodeOrdering(right);
 
@@ -2255,6 +2442,9 @@ bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
 
 // Return true if right should be scheduled with higher priority than left.
 bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+  if (int res = checkSpecialNodes(left, right))
+    return res > 0;
+
   if (left->isCall || right->isCall)
     // No way to compute latency of calls.
     return BURRSort(left, right, SPQ);
@@ -2264,24 +2454,22 @@ bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
   // Avoid causing spills. If register pressure is high, schedule for
   // register pressure reduction.
   if (LHigh && !RHigh) {
+    DEBUG(++FactorCount[FactPressureDiff]);
     DEBUG(dbgs() << "  pressure SU(" << left->NodeNum << ") > SU("
           << right->NodeNum << ")\n");
     return true;
   }
   else if (!LHigh && RHigh) {
+    DEBUG(++FactorCount[FactPressureDiff]);
     DEBUG(dbgs() << "  pressure SU(" << right->NodeNum << ") > SU("
           << left->NodeNum << ")\n");
     return false;
   }
-  int result = 0;
-  if (!DisableSchedVRegCycle) {
-    result = BUCompareVRegCycle(left, right);
-  }
-  if (result == 0 && !LHigh && !RHigh) {
-    result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
+  if (!LHigh && !RHigh) {
+    int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
+    if (result != 0)
+      return result > 0;
   }
-  if (result != 0)
-    return result > 0;
   return BURRSort(left, right, SPQ);
 }
 
@@ -2322,6 +2510,9 @@ static bool canEnableCoalescing(SUnit *SU) {
 // list-ilp is currently an experimental scheduler that allows various
 // heuristics to be enabled prior to the normal register reduction logic.
 bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+  if (int res = checkSpecialNodes(left, right))
+    return res > 0;
+
   if (left->isCall || right->isCall)
     // No way to compute latency of calls.
     return BURRSort(left, right, SPQ);
@@ -2347,12 +2538,6 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
     if (RReduce && !LReduce) return true;
   }
 
-  if (!DisableSchedVRegCycle) {
-    int result = BUCompareVRegCycle(left, right);
-    if (result != 0)
-      return result > 0;
-  }
-
   if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
     DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
           << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n");
@@ -2391,6 +2576,24 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
   return BURRSort(left, right, SPQ);
 }
 
+void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
+  SUnits = &sunits;
+  // Add pseudo dependency edges for two-address nodes.
+  AddPseudoTwoAddrDeps();
+  // Reroute edges to nodes with multiple uses.
+  if (!TracksRegPressure)
+    PrescheduleNodesWithMultipleUses();
+  // Calculate node priorities.
+  CalculateSethiUllmanNumbers();
+
+  // For single block loops, mark nodes that look like canonical IV increments.
+  if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) {
+    for (unsigned i = 0, e = sunits.size(); i != e; ++i) {
+      initVRegCycle(&sunits[i]);
+    }
+  }
+}
+
 //===----------------------------------------------------------------------===//
 //                    Preschedule for Register Pressure
 //===----------------------------------------------------------------------===//
@@ -2668,6 +2871,9 @@ static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
 
 // Top down
 bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
+  if (int res = checkSpecialNodes(left, right))
+    return res < 0;
+
   unsigned LPriority = SPQ->getNodePriority(left);
   unsigned RPriority = SPQ->getNodePriority(right);
   bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 24a1937..0d656ef 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -83,10 +83,13 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
   SU->Latency = Old->Latency;
   SU->isVRegCycle = Old->isVRegCycle;
   SU->isCall = Old->isCall;
+  SU->isCallOp = Old->isCallOp;
   SU->isTwoAddress = Old->isTwoAddress;
   SU->isCommutable = Old->isCommutable;
   SU->hasPhysRegDefs = Old->hasPhysRegDefs;
   SU->hasPhysRegClobbers = Old->hasPhysRegClobbers;
+  SU->isScheduleHigh = Old->isScheduleHigh;
+  SU->isScheduleLow = Old->isScheduleLow;
   SU->SchedulingPref = Old->SchedulingPref;
   Old->isCloned = true;
   return SU;
@@ -283,6 +286,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
   Worklist.push_back(DAG->getRoot().getNode());
   Visited.insert(DAG->getRoot().getNode());
 
+  SmallVector<SUnit*, 8> CallSUnits;
   while (!Worklist.empty()) {
     SDNode *NI = Worklist.pop_back_val();
 
@@ -335,6 +339,15 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
       if (!HasGlueUse) break;
     }
 
+    if (NodeSUnit->isCall)
+      CallSUnits.push_back(NodeSUnit);
+
+    // Schedule zero-latency TokenFactor below any nodes that may increase the
+    // schedule height. Otherwise, ancestors of the TokenFactor may appear to
+    // have false stalls.
+    if (NI->getOpcode() == ISD::TokenFactor)
+      NodeSUnit->isScheduleLow = true;
+
     // If there are glue operands involved, N is now the bottom-most node
     // of the sequence of nodes that are glued together.
     // Update the SUnit.
@@ -342,16 +355,26 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
     assert(N->getNodeId() == -1 && "Node already inserted!");
     N->setNodeId(NodeSUnit->NodeNum);
 
-    // Set isVRegCycle if the node operands are live into and value is live out
-    // of a single block loop.
-    InitVRegCycleFlag(NodeSUnit);
-
     // Compute NumRegDefsLeft. This must be done before AddSchedEdges.
     InitNumRegDefsLeft(NodeSUnit);
 
     // Assign the Latency field of NodeSUnit using target-provided information.
     ComputeLatency(NodeSUnit);
   }
+
+  // Find all call operands.
+  while (!CallSUnits.empty()) {
+    SUnit *SU = CallSUnits.pop_back_val();
+    for (const SDNode *SUNode = SU->getNode(); SUNode;
+         SUNode = SUNode->getGluedNode()) {
+      if (SUNode->getOpcode() != ISD::CopyToReg)
+        continue;
+      SDNode *SrcN = SUNode->getOperand(2).getNode();
+      if (isPassiveNode(SrcN)) continue;   // Not scheduled.
+      SUnit *SrcSU = &SUnits[SrcN->getNodeId()];
+      SrcSU->isCallOp = true;
+    }
+  }
 }
 
 void ScheduleDAGSDNodes::AddSchedEdges() {
@@ -412,11 +435,15 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
         // it requires a cross class copy (cost < 0). That means we are only
         // treating "expensive to copy" register dependency as physical register
         // dependency. This may change in the future though.
-        if (Cost >= 0)
+        if (Cost >= 0 && !StressSched)
           PhysReg = 0;
 
         // If this is a ctrl dep, latency is 1.
         unsigned OpLatency = isChain ? 1 : OpSU->Latency;
+        // Special-case TokenFactor chains as zero-latency.
+        if(isChain && OpN->getOpcode() == ISD::TokenFactor)
+          OpLatency = 0;
+
         const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
                                OpLatency, PhysReg);
         if (!isChain && !UnitLatencies) {
@@ -512,47 +539,6 @@ void ScheduleDAGSDNodes::RegDefIter::Advance() {
   }
 }
 
-// Set isVRegCycle if this node's single use is CopyToReg and its only active
-// data operands are CopyFromReg.
-//
-// This is only relevant for single-block loops, in which case the VRegCycle
-// node is likely an induction variable in which the operand and target virtual
-// registers should be coalesced (e.g. pre/post increment values). Setting the
-// isVRegCycle flag helps the scheduler prioritize other uses of the same
-// CopyFromReg so that this node becomes the virtual register "kill". This
-// avoids interference between the values live in and out of the block and
-// eliminates a copy inside the loop.
-void ScheduleDAGSDNodes::InitVRegCycleFlag(SUnit *SU) {
-  if (!BB->isSuccessor(BB))
-    return;
-
-  SDNode *N = SU->getNode();
-  if (N->getGluedNode())
-    return;
-
-  if (!N->hasOneUse() || N->use_begin()->getOpcode() != ISD::CopyToReg)
-    return;
-
-  bool FoundLiveIn = false;
-  for (SDNode::op_iterator OI = N->op_begin(), E = N->op_end(); OI != E; ++OI) {
-    EVT OpVT = OI->getValueType();
-    assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!");
-
-    if (OpVT == MVT::Other)
-      continue; // ignore chain operands
-
-    if (isPassiveNode(OI->getNode()))
-      continue; // ignore constants and such
-
-    if (OI->getNode()->getOpcode() != ISD::CopyFromReg)
-      return;
-
-    FoundLiveIn = true;
-  }
-  if (FoundLiveIn)
-    SU->isVRegCycle = true;
-}
-
 void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) {
   assert(SU->NumRegDefsLeft == 0 && "expect a new node");
   for (RegDefIter I(SU, this); I.IsValid(); I.Advance()) {
@@ -562,6 +548,16 @@ void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) {
 }
 
 void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
+  SDNode *N = SU->getNode();
+
+  // TokenFactor operands are considered zero latency, and some schedulers
+  // (e.g. Top-Down list) may rely on the fact that operand latency is nonzero
+  // whenever node latency is nonzero.
+  if (N && N->getOpcode() == ISD::TokenFactor) {
+    SU->Latency = 0;
+    return;
+  }
+
   // Check to see if the scheduler cares about latencies.
   if (ForceUnitLatencies()) {
     SU->Latency = 1;
@@ -569,7 +565,6 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
   }
 
   if (!InstrItins || InstrItins->isEmpty()) {
-    SDNode *N = SU->getNode();
     if (N && N->isMachineOpcode() &&
         TII->isHighLatencyDef(N->getMachineOpcode()))
       SU->Latency = HighLatencyCycles;
@@ -641,7 +636,7 @@ namespace {
   };
 }
 
-/// ProcessSDDbgValues - Process SDDbgValues assoicated with this node.
+/// ProcessSDDbgValues - Process SDDbgValues associated with this node.
 static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG,
                                InstrEmitter &Emitter,
                     SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders,
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index b5f68f3..3ad2bd6 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -135,6 +135,14 @@ namespace llvm {
         return ValueType;
       }
 
+      const SDNode *GetNode() const {
+        return Node;
+      }
+
+      unsigned GetIdx() const {
+        return DefIdx;
+      }
+
       void Advance();
     private:
       void InitNodeNumDefs();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index c2711c8..68eeb60 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2050,14 +2050,15 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     break;
 
   default:
-    // Allow the target to implement this method for its nodes.
-    if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
+    if (Op.getOpcode() < ISD::BUILTIN_OP_END)
+      break;
+    // Fallthrough
   case ISD::INTRINSIC_WO_CHAIN:
   case ISD::INTRINSIC_W_CHAIN:
   case ISD::INTRINSIC_VOID:
-      TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this,
-                                         Depth);
-    }
+    // Allow the target to implement this method for its nodes.
+    TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this,
+                                       Depth);
     return;
   }
 }
@@ -2322,6 +2323,13 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op) const {
     return !C->isZero();
 
   // TODO: Recognize more cases here.
+  switch (Op.getOpcode()) {
+  default: break;
+  case ISD::OR:
+    if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
+      return !C->isNullValue();
+    break;
+  }
 
   return false;
 }
@@ -2339,16 +2347,6 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
   return false;
 }
 
-bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const {
-  GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
-  if (!GA) return false;
-  if (GA->getOffset() != 0) return false;
-  const GlobalVariable *GV = dyn_cast<GlobalVariable>(GA->getGlobal());
-  if (!GV) return false;
-  return MF->getMMI().hasDebugInfo();
-}
-
-
 /// getNode - Gets or creates the specified node.
 ///
 SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT) {
@@ -6304,7 +6302,7 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
         Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl,
                               OperandEltVT,
                               Operand,
-                              getConstant(i, MVT::i32));
+                              getConstant(i, TLI.getPointerTy()));
       } else {
         // A scalar operand; just use it as is.
         Operands[j] = Operand;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 24c325e..d79a5ae 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -280,12 +280,35 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
     }
 
     // Vector/Vector bitcast.
-    return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+    if (ValueVT.getSizeInBits() == PartVT.getSizeInBits())
+      return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+
+    assert(PartVT.getVectorNumElements() == ValueVT.getVectorNumElements() &&
+      "Cannot handle this kind of promotion");
+    // Promoted vector extract
+    bool Smaller = ValueVT.bitsLE(PartVT);
+    return DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+                       DL, ValueVT, Val);
+
   }
 
-  assert(ValueVT.getVectorElementType() == PartVT &&
-         ValueVT.getVectorNumElements() == 1 &&
+  // Trivial bitcast if the types are the same size and the destination
+  // vector type is legal.
+  if (PartVT.getSizeInBits() == ValueVT.getSizeInBits() &&
+      TLI.isTypeLegal(ValueVT))
+    return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+
+  // Handle cases such as i8 -> <1 x i1>
+  assert(ValueVT.getVectorNumElements() == 1 &&
          "Only trivial scalar-to-vector conversions should get here!");
+
+  if (ValueVT.getVectorNumElements() == 1 &&
+      ValueVT.getVectorElementType() != PartVT) {
+    bool Smaller = ValueVT.bitsLE(PartVT);
+    Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+                       DL, ValueVT.getScalarType(), Val);
+  }
+
   return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val);
 }
 
@@ -426,7 +449,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
       // Bitconvert vector->vector case.
       Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
     } else if (PartVT.isVector() &&
-               PartVT.getVectorElementType() == ValueVT.getVectorElementType()&&
+               PartVT.getVectorElementType() == ValueVT.getVectorElementType() &&
                PartVT.getVectorNumElements() > ValueVT.getVectorNumElements()) {
       EVT ElementVT = PartVT.getVectorElementType();
       // Vector widening case, e.g. <2 x float> -> <4 x float>.  Shuffle in
@@ -446,13 +469,33 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
 
       //SDValue UndefElts = DAG.getUNDEF(VectorTy);
       //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts);
-    } else {
+    } else if (PartVT.isVector() &&
+               PartVT.getVectorElementType().bitsGE(
+                 ValueVT.getVectorElementType()) &&
+               PartVT.getVectorNumElements() == ValueVT.getVectorNumElements()) {
+
+      // Promoted vector extract
+      unsigned NumElts = ValueVT.getVectorNumElements();
+      SmallVector<SDValue, 8> NewOps;
+      for (unsigned i = 0; i < NumElts; ++i) {
+        SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+                       ValueVT.getScalarType(), Val ,DAG.getIntPtrConstant(i));
+        SDValue Cast = DAG.getNode(ISD::ANY_EXTEND,
+                       DL, PartVT.getScalarType(), Ext);
+        NewOps.push_back(Cast);
+      }
+      Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT,
+                        &NewOps[0], NewOps.size());
+    } else{
       // Vector -> scalar conversion.
-      assert(ValueVT.getVectorElementType() == PartVT &&
-             ValueVT.getVectorNumElements() == 1 &&
+      assert(ValueVT.getVectorNumElements() == 1 &&
              "Only trivial vector-to-scalar conversions should get here!");
       Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
                         PartVT, Val, DAG.getIntPtrConstant(0));
+
+      bool Smaller = ValueVT.bitsLE(PartVT);
+      Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+                         DL, PartVT, Val);
     }
 
     Parts[0] = Val;
@@ -783,11 +826,20 @@ void SelectionDAGBuilder::clear() {
   UnusedArgNodeMap.clear();
   PendingLoads.clear();
   PendingExports.clear();
-  DanglingDebugInfoMap.clear();
   CurDebugLoc = DebugLoc();
   HasTailCall = false;
 }
 
+/// clearDanglingDebugInfo - Clear the dangling debug information
+/// map. This function is seperated from the clear so that debug
+/// information that is dangling in a basic block can be properly
+/// resolved in a different basic block. This allows the
+/// SelectionDAG to resolve dangling debug information attached
+/// to PHI nodes.
+void SelectionDAGBuilder::clearDanglingDebugInfo() {
+  DanglingDebugInfoMap.clear();
+}
+
 /// getRoot - Return the current virtual root of the Selection DAG,
 /// flushing any PendingLoad items. This must be done before emitting
 /// a store or any other node that may need to be ordered after any
@@ -1175,6 +1227,10 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
 /// created for it, emit nodes to copy the value into the virtual
 /// registers.
 void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) {
+  // Skip empty types
+  if (V->getType()->isEmptyTy())
+    return;
+
   DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
   if (VMI != FuncInfo.ValueMap.end()) {
     assert(!V->use_empty() && "Unused value assigned virtual registers!");
@@ -1223,6 +1279,24 @@ bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V,
   return true;
 }
 
+/// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
+uint32_t SelectionDAGBuilder::getEdgeWeight(MachineBasicBlock *Src,
+                                            MachineBasicBlock *Dst) {
+  BranchProbabilityInfo *BPI = FuncInfo.BPI;
+  if (!BPI)
+    return 0;
+  BasicBlock *SrcBB = const_cast<BasicBlock*>(Src->getBasicBlock());
+  BasicBlock *DstBB = const_cast<BasicBlock*>(Dst->getBasicBlock());
+  return BPI->getEdgeWeight(SrcBB, DstBB);
+}
+
+void SelectionDAGBuilder::addSuccessorWithWeight(MachineBasicBlock *Src,
+                                                 MachineBasicBlock *Dst) {
+  uint32_t weight = getEdgeWeight(Src, Dst);
+  Src->addSuccessor(Dst, weight);
+}
+
+
 static bool InBlock(const Value *V, const BasicBlock *BB) {
   if (const Instruction *I = dyn_cast<Instruction>(V))
     return I->getParent() == BB;
@@ -1492,8 +1566,8 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
   }
 
   // Update successor info
-  SwitchBB->addSuccessor(CB.TrueBB);
-  SwitchBB->addSuccessor(CB.FalseBB);
+  addSuccessorWithWeight(SwitchBB, CB.TrueBB);
+  addSuccessorWithWeight(SwitchBB, CB.FalseBB);
 
   // Set NextBlock to be the MBB immediately after the current one, if any.
   // This is used to avoid emitting unnecessary branches to the next block.
@@ -1637,8 +1711,8 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
 
   MachineBasicBlock* MBB = B.Cases[0].ThisBB;
 
-  SwitchBB->addSuccessor(B.Default);
-  SwitchBB->addSuccessor(MBB);
+  addSuccessorWithWeight(SwitchBB, B.Default);
+  addSuccessorWithWeight(SwitchBB, MBB);
 
   SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
                                 MVT::Other, CopyTo, RangeCmp,
@@ -1683,8 +1757,8 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
                        ISD::SETNE);
   }
 
-  SwitchBB->addSuccessor(B.TargetBB);
-  SwitchBB->addSuccessor(NextMBB);
+  addSuccessorWithWeight(SwitchBB, B.TargetBB);
+  addSuccessorWithWeight(SwitchBB, NextMBB);
 
   SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
                               MVT::Other, getControlRoot(),
@@ -1924,8 +1998,9 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
   // table.
   MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB);
   CurMF->insert(BBI, JumpTableBB);
-  CR.CaseBB->addSuccessor(Default);
-  CR.CaseBB->addSuccessor(JumpTableBB);
+
+  addSuccessorWithWeight(CR.CaseBB, Default);
+  addSuccessorWithWeight(CR.CaseBB, JumpTableBB);
 
   // Build a vector of destination BBs, corresponding to each target
   // of the jump table. If the value of the jump table slot corresponds to
@@ -1952,7 +2027,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
          E = DestBBs.end(); I != E; ++I) {
     if (!SuccsHandled[(*I)->getNumber()]) {
       SuccsHandled[(*I)->getNumber()] = true;
-      JumpTableBB->addSuccessor(*I);
+      addSuccessorWithWeight(JumpTableBB, *I);
     }
   }
 
@@ -2019,9 +2094,13 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
     APInt Range = ComputeRange(LEnd, RBegin);
     assert((Range - 2ULL).isNonNegative() &&
            "Invalid case distance");
-    double LDensity = (double)LSize.roundToDouble() /
+    // Use volatile double here to avoid excess precision issues on some hosts,
+    // e.g. that use 80-bit X87 registers.
+    volatile double LDensity =
+       (double)LSize.roundToDouble() /
                            (LEnd - First + 1ULL).roundToDouble();
-    double RDensity = (double)RSize.roundToDouble() /
+    volatile double RDensity =
+      (double)RSize.roundToDouble() /
                            (Last - RBegin + 1ULL).roundToDouble();
     double Metric = Range.logBase2()*(LDensity+RDensity);
     // Should always split in some non-trivial place
@@ -2367,8 +2446,10 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
     succs.push_back(I.getSuccessor(i));
   array_pod_sort(succs.begin(), succs.end());
   succs.erase(std::unique(succs.begin(), succs.end()), succs.end());
-  for (unsigned i = 0, e = succs.size(); i != e; ++i)
-    IndirectBrMBB->addSuccessor(FuncInfo.MBBMap[succs[i]]);
+  for (unsigned i = 0, e = succs.size(); i != e; ++i) {
+    MachineBasicBlock *Succ = FuncInfo.MBBMap[succs[i]];
+    addSuccessorWithWeight(IndirectBrMBB, Succ);
+  }
 
   DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(),
                           MVT::Other, getControlRoot(),
@@ -2806,16 +2887,18 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
   SmallVector<SDValue, 4> Values(NumAggValues);
 
   SDValue Agg = getValue(Op0);
-  SDValue Val = getValue(Op1);
   unsigned i = 0;
   // Copy the beginning value(s) from the original aggregate.
   for (; i != LinearIndex; ++i)
     Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
                 SDValue(Agg.getNode(), Agg.getResNo() + i);
   // Copy values from the inserted value(s).
-  for (; i != LinearIndex + NumValValues; ++i)
-    Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :
-                SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
+  if (NumValValues) {
+    SDValue Val = getValue(Op1);
+    for (; i != LinearIndex + NumValValues; ++i)
+      Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :
+                  SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
+  }
   // Copy remaining value(s) from the original aggregate.
   for (; i != NumAggValues; ++i)
     Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
@@ -2838,6 +2921,13 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
   ComputeValueVTs(TLI, ValTy, ValValueVTs);
 
   unsigned NumValValues = ValValueVTs.size();
+
+  // Ignore a extractvalue that produces an empty object
+  if (!NumValValues) {
+    setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
+    return;
+  }
+
   SmallVector<SDValue, 4> Values(NumValValues);
 
   SDValue Agg = getValue(Op0);
@@ -4009,6 +4099,24 @@ static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS,
   return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
 }
 
+// getTruncatedArgReg - Find underlying register used for an truncated
+// argument.
+static unsigned getTruncatedArgReg(const SDValue &N) {
+  if (N.getOpcode() != ISD::TRUNCATE)
+    return 0;
+
+  const SDValue &Ext = N.getOperand(0);
+  if (Ext.getOpcode() == ISD::AssertZext || Ext.getOpcode() == ISD::AssertSext){
+    const SDValue &CFR = Ext.getOperand(0);
+    if (CFR.getOpcode() == ISD::CopyFromReg)
+      return cast<RegisterSDNode>(CFR.getOperand(1))->getReg();
+    else
+      if (CFR.getOpcode() == ISD::TRUNCATE)
+        return getTruncatedArgReg(CFR);
+  }
+  return 0;
+}
+
 /// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function
 /// argument, create the corresponding DBG_VALUE machine instruction for it now.
 /// At the end of instruction selection, they will be inserted to the entry BB.
@@ -4029,10 +4137,6 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
   if (DV.isInlinedFnArgument(MF.getFunction()))
     return false;
 
-  MachineBasicBlock *MBB = FuncInfo.MBB;
-  if (MBB != &MF.front())
-    return false;
-
   unsigned Reg = 0;
   if (Arg->hasByValAttr()) {
     // Byval arguments' frame index is recorded during argument lowering.
@@ -4044,9 +4148,12 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
       Reg = 0;
   }
 
-  if (N.getNode() && N.getOpcode() == ISD::CopyFromReg) {
-    Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg();
-    if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+  if (N.getNode()) {
+    if (N.getOpcode() == ISD::CopyFromReg)
+      Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg();
+    else
+      Reg = getTruncatedArgReg(N);
+    if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) {
       MachineRegisterInfo &RegInfo = MF.getRegInfo();
       unsigned PR = RegInfo.getLiveInPhysReg(Reg);
       if (PR)
@@ -4208,9 +4315,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
           SDV = DAG.getDbgValue(Variable, FINode->getIndex(),
                                 0, dl, SDNodeOrder);
         else {
-          // Can't do anything with other non-AI cases yet.  This might be a
-          // parameter of a callee function that got inlined, for example.
-          DEBUG(dbgs() << "Dropping debug info for " << DI);
+          // Address is an argument, so try to emit its dbg value using
+          // virtual register info from the FuncInfo.ValueMap.
+          EmitFuncArgumentDbgValue(Address, Variable, 0, N);
           return 0;
         }
       } else if (AI)
@@ -4403,7 +4510,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   }
   case Intrinsic::eh_sjlj_dispatch_setup: {
     DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other,
-                            getRoot()));
+                            getRoot(), getValue(I.getArgOperand(0))));
     return 0;
   }
 
@@ -4672,9 +4779,22 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::flt_rounds:
     setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32));
     return 0;
-  case Intrinsic::trap:
-    DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot()));
+  case Intrinsic::trap: {
+    StringRef TrapFuncName = getTrapFunctionName();
+    if (TrapFuncName.empty()) {
+      DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot()));
+      return 0;
+    }
+    TargetLowering::ArgListTy Args;
+    std::pair<SDValue, SDValue> Result =
+      TLI.LowerCallTo(getRoot(), I.getType(),
+                 false, false, false, false, 0, CallingConv::C,
+                 /*isTailCall=*/false, /*isReturnValueUsed=*/true,
+                 DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()),
+                 Args, DAG, getCurDebugLoc());
+    DAG.setRoot(Result.second);
     return 0;
+  }
   case Intrinsic::uadd_with_overflow:
     return implVisitAluOverflow(I, ISD::UADDO);
   case Intrinsic::sadd_with_overflow:
@@ -4689,15 +4809,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     return implVisitAluOverflow(I, ISD::SMULO);
 
   case Intrinsic::prefetch: {
-    SDValue Ops[4];
+    SDValue Ops[5];
     unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
     Ops[0] = getRoot();
     Ops[1] = getValue(I.getArgOperand(0));
     Ops[2] = getValue(I.getArgOperand(1));
     Ops[3] = getValue(I.getArgOperand(2));
+    Ops[4] = getValue(I.getArgOperand(3));
     DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, dl,
                                         DAG.getVTList(MVT::Other),
-                                        &Ops[0], 4,
+                                        &Ops[0], 5,
                                         EVT::getIntegerVT(*Context, 8),
                                         MachinePointerInfo(I.getArgOperand(0)),
                                         0, /* align */
@@ -4784,7 +4905,9 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
                 Outs, TLI, &Offsets);
 
   bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
-                        FTy->isVarArg(), Outs, FTy->getContext());
+					   DAG.getMachineFunction(),
+					   FTy->isVarArg(), Outs,
+					   FTy->getContext());
 
   SDValue DemoteStackSlot;
   int DemoteStackIdx = -100;
@@ -4814,8 +4937,14 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
 
   for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
        i != e; ++i) {
-    SDValue ArgNode = getValue(*i);
-    Entry.Node = ArgNode; Entry.Ty = (*i)->getType();
+    const Value *V = *i;
+
+    // Skip empty types
+    if (V->getType()->isEmptyTy())
+      continue;
+
+    SDValue ArgNode = getValue(V);
+    Entry.Node = ArgNode; Entry.Ty = V->getType();
 
     unsigned attrInd = i - CS.arg_begin() + 1;
     Entry.isSExt  = CS.paramHasAttr(attrInd, Attribute::SExt);
@@ -5255,6 +5384,7 @@ public:
 
     const llvm::Type *OpTy = CallOperandVal->getType();
 
+    // FIXME: code duplicated from TargetLowering::ParseConstraints().
     // If this is an indirect operand, the operand is a pointer to the
     // accessed type.
     if (isIndirect) {
@@ -5264,6 +5394,11 @@ public:
       OpTy = PtrTy->getElementType();
     }
 
+    // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
+    if (const StructType *STy = dyn_cast<StructType>(OpTy))
+      if (STy->getNumElements() == 1)
+        OpTy = STy->getElementType(0);
+
     // If OpTy is not a single value, it may be a struct/union that we
     // can tile with integers.
     if (!OpTy->isSingleValueType() && OpTy->isSized()) {
@@ -5315,6 +5450,8 @@ isAllocatableRegister(unsigned Reg, MachineFunction &MF,
     EVT ThisVT = MVT::Other;
 
     const TargetRegisterClass *RC = *RCI;
+    if (!RC->isAllocatable())
+      continue;
     // If none of the value types for this register class are valid, we
     // can't use it.  For example, 64-bit reg classes on 32-bit targets.
     for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
@@ -5336,15 +5473,14 @@ isAllocatableRegister(unsigned Reg, MachineFunction &MF,
     // frame pointer in functions that need it (due to them not being taken
     // out of allocation, because a variable sized allocation hasn't been seen
     // yet).  This is a slight code pessimization, but should still work.
-    for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF),
-         E = RC->allocation_order_end(MF); I != E; ++I)
-      if (*I == Reg) {
-        // We found a matching register class.  Keep looking at others in case
-        // we find one with larger registers that this physreg is also in.
-        FoundRC = RC;
-        FoundVT = ThisVT;
-        break;
-      }
+    ArrayRef<unsigned> RawOrder = RC->getRawAllocationOrder(MF);
+    if (std::find(RawOrder.begin(), RawOrder.end(), Reg) != RawOrder.end()) {
+      // We found a matching register class.  Keep looking at others in case
+      // we find one with larger registers that this physreg is also in.
+      FoundRC = RC;
+      FoundVT = ThisVT;
+      break;
+    }
   }
   return FoundRC;
 }
@@ -5491,9 +5627,15 @@ static void GetRegistersForValue(SelectionDAG &DAG,
                                             OpInfo.ConstraintVT);
 
   const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
+  BitVector Reserved = TRI->getReservedRegs(MF);
   unsigned NumAllocated = 0;
   for (unsigned i = 0, e = RegClassRegs.size(); i != e; ++i) {
     unsigned Reg = RegClassRegs[i];
+    // Filter out the reserved registers, but note that reserved registers are
+    // not fully determined at this point. We may still decide we need a frame
+    // pointer.
+    if (Reserved.test(Reg))
+      continue;
     // See if this register is available.
     if ((isOutReg && OutputRegs.count(Reg)) ||   // Already used.
         (isInReg  && InputRegs.count(Reg))) {    // Already used.
@@ -5542,7 +5684,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
 
   std::set<unsigned> OutputRegs, InputRegs;
 
-  TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(CS);
+  TargetLowering::AsmOperandInfoVector
+    TargetConstraints = TLI.ParseConstraints(CS);
+
   bool hasMemory = false;
 
   unsigned ArgNo = 0;   // ArgNo - The argument of the CallInst.
@@ -5601,7 +5745,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
       hasMemory = true;
     else {
       for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) {
-        TargetLowering::ConstraintType CType = TLI.getConstraintType(OpInfo.Codes[j]);
+        TargetLowering::ConstraintType
+          CType = TLI.getConstraintType(OpInfo.Codes[j]);
         if (CType == TargetLowering::C_Memory) {
           hasMemory = true;
           break;
@@ -5651,12 +5796,17 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
     // need to to provide an address for the memory input.
     if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
         !OpInfo.isIndirect) {
-      assert((OpInfo.isMultipleAlternative || (OpInfo.Type == InlineAsm::isInput)) &&
+      assert((OpInfo.isMultipleAlternative ||
+              (OpInfo.Type == InlineAsm::isInput)) &&
              "Can only indirectify direct input operands!");
 
       // Memory operands really want the address of the value.  If we don't have
       // an indirect input, put it in the constpool if we can, otherwise spill
       // it to a stack slot.
+      // TODO: This isn't quite right. We need to handle these according to
+      // the addressing mode that the constraint wants. Also, this may take
+      // an additional register for the computation and we don't want that
+      // either.
 
       // If the operand is a float, integer, or vector constant, spill to a
       // constant pool entry to get its address.
@@ -5858,7 +6008,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
 
       if (OpInfo.ConstraintType == TargetLowering::C_Other) {
         std::vector<SDValue> Ops;
-        TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0],
+        TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode,
                                          Ops, DAG);
         if (Ops.empty())
           report_fatal_error("Invalid operand for inline asm constraint '" +
@@ -6067,14 +6217,15 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
         Flags.setByVal();
         const PointerType *Ty = cast<PointerType>(Args[i].Ty);
         const Type *ElementTy = Ty->getElementType();
-        unsigned FrameAlign = getByValTypeAlignment(ElementTy);
-        unsigned FrameSize  = getTargetData()->getTypeAllocSize(ElementTy);
+        Flags.setByValSize(getTargetData()->getTypeAllocSize(ElementTy));
         // For ByVal, alignment should come from FE.  BE will guess if this
         // info is not there but there are cases it cannot get right.
+        unsigned FrameAlign;
         if (Args[i].Alignment)
           FrameAlign = Args[i].Alignment;
+        else
+          FrameAlign = getByValTypeAlignment(ElementTy);
         Flags.setByValAlign(FrameAlign);
-        Flags.setByValSize(FrameSize);
       }
       if (Args[i].isNest)
         Flags.setNest();
@@ -6180,7 +6331,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
 
   // For a function returning void, there is no return value. We can't create
   // such a node, so we just return a null return value in that case. In
-  // that case, nothing will actualy look at the value.
+  // that case, nothing will actually look at the value.
   if (ReturnValues.empty())
     return std::make_pair(SDValue(), Chain);
 
@@ -6219,6 +6370,25 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
 
 #include "llvm/CodeGen/SelectionDAGISel.h"
 
+/// isOnlyUsedInEntryBlock - If the specified argument is only used in the
+/// entry block, return true.  This includes arguments used by switches, since
+/// the switch may expand into multiple basic blocks.
+static bool isOnlyUsedInEntryBlock(const Argument *A) {
+  // With FastISel active, we may be splitting blocks, so force creation
+  // of virtual registers for all non-dead arguments.
+  if (EnableFastISel)
+    return A->use_empty();
+
+  const BasicBlock *Entry = A->getParent()->begin();
+  for (Value::const_use_iterator UI = A->use_begin(), E = A->use_end();
+       UI != E; ++UI) {
+    const User *U = *UI;
+    if (cast<Instruction>(U)->getParent() != Entry || isa<SwitchInst>(U))
+      return false;  // Use not in entry block.
+  }
+  return true;
+}
+
 void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
   // If this is the entry block, emit arguments.
   const Function &F = *LLVMBB->getParent();
@@ -6273,14 +6443,15 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
         Flags.setByVal();
         const PointerType *Ty = cast<PointerType>(I->getType());
         const Type *ElementTy = Ty->getElementType();
-        unsigned FrameAlign = TLI.getByValTypeAlignment(ElementTy);
-        unsigned FrameSize  = TD->getTypeAllocSize(ElementTy);
+        Flags.setByValSize(TD->getTypeAllocSize(ElementTy));
         // For ByVal, alignment should be passed from FE.  BE will guess if
         // this info is not there but there are cases it cannot get right.
+        unsigned FrameAlign;
         if (F.getParamAlignment(Idx))
           FrameAlign = F.getParamAlignment(Idx);
+        else
+          FrameAlign = TLI.getByValTypeAlignment(ElementTy);
         Flags.setByValAlign(FrameAlign);
-        Flags.setByValSize(FrameSize);
       }
       if (F.paramHasAttr(Idx, Attribute::Nest))
         Flags.setNest();
@@ -6362,8 +6533,8 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
     if (I->use_empty() && NumValues)
       SDB->setUnusedArgValue(I, InVals[i]);
 
-    for (unsigned Value = 0; Value != NumValues; ++Value) {
-      EVT VT = ValueVTs[Value];
+    for (unsigned Val = 0; Val != NumValues; ++Val) {
+      EVT VT = ValueVTs[Val];
       EVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
       unsigned NumParts = TLI.getNumRegisters(*CurDAG->getContext(), VT);
 
@@ -6382,21 +6553,35 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
       i += NumParts;
     }
 
+    // We don't need to do anything else for unused arguments.
+    if (ArgValues.empty())
+      continue;
+
     // Note down frame index for byval arguments.
-    if (I->hasByValAttr() && !ArgValues.empty())
+    if (I->hasByValAttr())
       if (FrameIndexSDNode *FI =
           dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
         FuncInfo->setByValArgumentFrameIndex(I, FI->getIndex());
 
-    if (!I->use_empty()) {
-      SDValue Res;
-      if (!ArgValues.empty())
-        Res = DAG.getMergeValues(&ArgValues[0], NumValues,
-                                 SDB->getCurDebugLoc());
-      SDB->setValue(I, Res);
-
-      // If this argument is live outside of the entry block, insert a copy from
-      // whereever we got it to the vreg that other BB's will reference it as.
+    SDValue Res = DAG.getMergeValues(&ArgValues[0], NumValues,
+                                     SDB->getCurDebugLoc());
+    SDB->setValue(I, Res);
+
+    // If this argument is live outside of the entry block, insert a copy from
+    // wherever we got it to the vreg that other BB's will reference it as.
+    if (!EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) {
+      // If we can, though, try to skip creating an unnecessary vreg.
+      // FIXME: This isn't very clean... it would be nice to make this more
+      // general.  It's also subtly incompatible with the hacks FastISel
+      // uses with vregs.
+      unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
+      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+        FuncInfo->ValueMap[I] = Reg;
+        continue;
+      }
+    }
+    if (!isOnlyUsedInEntryBlock(I)) {
+      FuncInfo->InitializeRegForValue(I);
       SDB->CopyToExportRegsIfNeeded(I);
     }
   }
@@ -6442,6 +6627,10 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
       // Ignore dead phi's.
       if (PN->use_empty()) continue;
 
+      // Skip empty types
+      if (PN->getType()->isEmptyTy())
+        continue;
+
       unsigned Reg;
       const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
 
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index f546612..a1ca891 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -23,7 +23,6 @@
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <vector>
-#include <set>
 
 namespace llvm {
 
@@ -333,6 +332,14 @@ public:
   /// consumed.
   void clear();
 
+  /// clearDanglingDebugInfo - Clear the dangling debug information
+  /// map. This function is seperated from the clear so that debug
+  /// information that is dangling in a basic block can be properly
+  /// resolved in a different basic block. This allows the
+  /// SelectionDAG to resolve dangling debug information attached
+  /// to PHI nodes.
+  void clearDanglingDebugInfo();
+
   /// getRoot - Return the current virtual root of the Selection DAG,
   /// flushing any PendingLoad items. This must be done before emitting
   /// a store or any other node that may need to be ordered after any
@@ -427,6 +434,9 @@ private:
                                 const Value* SV,
                                 MachineBasicBlock* Default,
                                 MachineBasicBlock *SwitchBB);
+
+  uint32_t getEdgeWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst);
+  void addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst);
 public:
   void visitSwitchCase(CaseBlock &CB,
                        MachineBasicBlock *SwitchBB);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 4b6066e..dc8044b 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -17,6 +17,7 @@
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
@@ -55,17 +56,11 @@
 using namespace llvm;
 
 STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on");
+STATISTIC(NumFastIselSuccess, "Number of instructions fast isel selected");
 STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel");
 STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG");
 STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path");
 
-#ifndef NDEBUG
-STATISTIC(NumBBWithOutOfOrderLineInfo,
-          "Number of blocks with out of order line number info");
-STATISTIC(NumMBBWithOutOfOrderLineInfo,
-          "Number of machine blocks with out of order line number info");
-#endif
-
 static cl::opt<bool>
 EnableFastISelVerbose("fast-isel-verbose", cl::Hidden,
           cl::desc("Enable verbose messages in the \"fast\" "
@@ -74,6 +69,11 @@ static cl::opt<bool>
 EnableFastISelAbort("fast-isel-abort", cl::Hidden,
           cl::desc("Enable abort calls when \"fast\" instruction fails"));
 
+static cl::opt<bool>
+UseMBPI("use-mbpi",
+        cl::desc("use Machine Branch Probability Info"),
+        cl::init(true), cl::Hidden);
+
 #ifndef NDEBUG
 static cl::opt<bool>
 ViewDAGCombine1("view-dag-combine1-dags", cl::Hidden,
@@ -192,6 +192,7 @@ SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm,
   DAGSize(0) {
     initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
     initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry());
+    initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry());
   }
 
 SelectionDAGISel::~SelectionDAGISel() {
@@ -205,43 +206,11 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addPreserved<AliasAnalysis>();
   AU.addRequired<GCModuleInfo>();
   AU.addPreserved<GCModuleInfo>();
+  if (UseMBPI && OptLevel != CodeGenOpt::None)
+    AU.addRequired<BranchProbabilityInfo>();
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
-/// FunctionCallsSetJmp - Return true if the function has a call to setjmp or
-/// other function that gcc recognizes as "returning twice". This is used to
-/// limit code-gen optimizations on the machine function.
-///
-/// FIXME: Remove after <rdar://problem/8031714> is fixed.
-static bool FunctionCallsSetJmp(const Function *F) {
-  const Module *M = F->getParent();
-  static const char *ReturnsTwiceFns[] = {
-    "_setjmp",
-    "setjmp",
-    "sigsetjmp",
-    "setjmp_syscall",
-    "savectx",
-    "qsetjmp",
-    "vfork",
-    "getcontext"
-  };
-#define NUM_RETURNS_TWICE_FNS sizeof(ReturnsTwiceFns) / sizeof(const char *)
-
-  for (unsigned I = 0; I < NUM_RETURNS_TWICE_FNS; ++I)
-    if (const Function *Callee = M->getFunction(ReturnsTwiceFns[I])) {
-      if (!Callee->use_empty())
-        for (Value::const_use_iterator
-               I = Callee->use_begin(), E = Callee->use_end();
-             I != E; ++I)
-          if (const CallInst *CI = dyn_cast<CallInst>(*I))
-            if (CI->getParent()->getParent() == F)
-              return true;
-    }
-
-  return false;
-#undef NUM_RETURNS_TWICE_FNS
-}
-
 /// SplitCriticalSideEffectEdges - Look for critical edges with a PHI value that
 /// may trap on it.  In this case we have to split the edge so that the path
 /// through the predecessor block that doesn't go to the phi block doesn't
@@ -302,6 +271,12 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
 
   CurDAG->init(*MF);
   FuncInfo->set(Fn, *MF);
+
+  if (UseMBPI && OptLevel != CodeGenOpt::None)
+    FuncInfo->BPI = &getAnalysis<BranchProbabilityInfo>();
+  else
+    FuncInfo->BPI = 0;
+
   SDB->init(GFI, *AA);
 
   SelectAllBasicBlocks(Fn);
@@ -392,7 +367,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   }
 
   // Determine if there is a call to setjmp in the machine function.
-  MF->setCallsSetJmp(FunctionCallsSetJmp(&Fn));
+  MF->setCallsSetJmp(Fn.callsFunctionThatReturnsTwice());
 
   // Replace forward-declared registers with the registers containing
   // the desired value.
@@ -421,10 +396,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   return true;
 }
 
-void
-SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
-                                   BasicBlock::const_iterator End,
-                                   bool &HadTailCall) {
+void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
+                                        BasicBlock::const_iterator End,
+                                        bool &HadTailCall) {
   // Lower all of the non-terminator instructions. If a call is emitted
   // as a tail call, cease emitting nodes for this block. Terminators
   // are handled below.
@@ -438,7 +412,6 @@ SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
 
   // Final step, emit the lowered DAG as machine code.
   CodeGenAndEmitDAG();
-  return;
 }
 
 void SelectionDAGISel::ComputeLiveOutVRegInfo() {
@@ -572,7 +545,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
 
   {
     NamedRegionTimer T("DAG Legalization", GroupName, TimePassesIsEnabled);
-    CurDAG->Legalize(OptLevel);
+    CurDAG->Legalize();
   }
 
   DEBUG(dbgs() << "Legalized selection DAG: BB#" << BlockNumber
@@ -748,16 +721,49 @@ void SelectionDAGISel::PrepareEHLandingPad() {
 
 
 
-
+/// TryToFoldFastISelLoad - We're checking to see if we can fold the specified
+/// load into the specified FoldInst.  Note that we could have a sequence where
+/// multiple LLVM IR instructions are folded into the same machineinstr.  For
+/// example we could have:
+///   A: x = load i32 *P
+///   B: y = icmp A, 42
+///   C: br y, ...
+///
+/// In this scenario, LI is "A", and FoldInst is "C".  We know about "B" (and
+/// any other folded instructions) because it is between A and C.
+///
+/// If we succeed in folding the load into the operation, return true.
+///
 bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI,
+                                             const Instruction *FoldInst,
                                              FastISel *FastIS) {
+  // We know that the load has a single use, but don't know what it is.  If it
+  // isn't one of the folded instructions, then we can't succeed here.  Handle
+  // this by scanning the single-use users of the load until we get to FoldInst.
+  unsigned MaxUsers = 6;  // Don't scan down huge single-use chains of instrs.
+  
+  const Instruction *TheUser = LI->use_back();
+  while (TheUser != FoldInst &&   // Scan up until we find FoldInst.
+         // Stay in the right block.
+         TheUser->getParent() == FoldInst->getParent() &&
+         --MaxUsers) {  // Don't scan too far.
+    // If there are multiple or no uses of this instruction, then bail out.
+    if (!TheUser->hasOneUse())
+      return false;
+    
+    TheUser = TheUser->use_back();
+  }
+  
   // Don't try to fold volatile loads.  Target has to deal with alignment
   // constraints.
   if (LI->isVolatile()) return false;
 
-  // Figure out which vreg this is going into.
+  // Figure out which vreg this is going into.  If there is no assigned vreg yet
+  // then there actually was no reference to it.  Perhaps the load is referenced
+  // by a dead instruction.
   unsigned LoadReg = FastIS->getRegForValue(LI);
-  assert(LoadReg && "Load isn't already assigned a vreg? ");
+  if (LoadReg == 0)
+    return false;
 
   // Check to see what the uses of this vreg are.  If it has no uses, or more
   // than one use (at the machine instr level) then we can't fold it.
@@ -788,48 +794,17 @@ bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI,
   return FastIS->TryToFoldLoad(User, RI.getOperandNo(), LI);
 }
 
-#ifndef NDEBUG
-/// CheckLineNumbers - Check if basic block instructions follow source order
-/// or not.
-static void CheckLineNumbers(const BasicBlock *BB) {
-  unsigned Line = 0;
-  unsigned Col = 0;
-  for (BasicBlock::const_iterator BI = BB->begin(),
-         BE = BB->end(); BI != BE; ++BI) {
-    const DebugLoc DL = BI->getDebugLoc();
-    if (DL.isUnknown()) continue;
-    unsigned L = DL.getLine();
-    unsigned C = DL.getCol();
-    if (L < Line || (L == Line && C < Col)) {
-      ++NumBBWithOutOfOrderLineInfo;
-      return;
-    }
-    Line = L;
-    Col = C;
-  }
+/// isFoldedOrDeadInstruction - Return true if the specified instruction is
+/// side-effect free and is either dead or folded into a generated instruction.
+/// Return false if it needs to be emitted.
+static bool isFoldedOrDeadInstruction(const Instruction *I,
+                                      FunctionLoweringInfo *FuncInfo) {
+  return !I->mayWriteToMemory() && // Side-effecting instructions aren't folded.
+         !isa<TerminatorInst>(I) && // Terminators aren't folded.
+         !isa<DbgInfoIntrinsic>(I) &&  // Debug instructions aren't folded.
+         !FuncInfo->isExportedInst(I); // Exported instrs must be computed.
 }
 
-/// CheckLineNumbers - Check if machine basic block instructions follow source
-/// order or not.
-static void CheckLineNumbers(const MachineBasicBlock *MBB) {
-  unsigned Line = 0;
-  unsigned Col = 0;
-  for (MachineBasicBlock::const_iterator MBI = MBB->begin(),
-         MBE = MBB->end(); MBI != MBE; ++MBI) {
-    const DebugLoc DL = MBI->getDebugLoc();
-    if (DL.isUnknown()) continue;
-    unsigned L = DL.getLine();
-    unsigned C = DL.getCol();
-    if (L < Line || (L == Line && C < Col)) {
-      ++NumMBBWithOutOfOrderLineInfo;
-      return;
-    }
-    Line = L;
-    Col = C;
-  }
-}
-#endif
-
 void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
   // Initialize the Fast-ISel state, if needed.
   FastISel *FastIS = 0;
@@ -841,9 +816,6 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
   for (ReversePostOrderTraversal<const Function*>::rpo_iterator
        I = RPOT.begin(), E = RPOT.end(); I != E; ++I) {
     const BasicBlock *LLVMBB = *I;
-#ifndef NDEBUG
-    CheckLineNumbers(LLVMBB);
-#endif
 
     if (OptLevel != CodeGenOpt::None) {
       bool AllPredsVisited = true;
@@ -856,15 +828,13 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
       }
 
       if (AllPredsVisited) {
-        for (BasicBlock::const_iterator I = LLVMBB->begin(), E = LLVMBB->end();
-             I != E && isa<PHINode>(I); ++I) {
+        for (BasicBlock::const_iterator I = LLVMBB->begin();
+             isa<PHINode>(I); ++I)
           FuncInfo->ComputePHILiveOutRegInfo(cast<PHINode>(I));
-        }
       } else {
-        for (BasicBlock::const_iterator I = LLVMBB->begin(), E = LLVMBB->end();
-             I != E && isa<PHINode>(I); ++I) {
+        for (BasicBlock::const_iterator I = LLVMBB->begin();
+             isa<PHINode>(I); ++I)
           FuncInfo->InvalidatePHILiveOutRegInfo(cast<PHINode>(I));
-        }
       }
 
       FuncInfo->VisitedBBs.insert(LLVMBB);
@@ -912,10 +882,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
         const Instruction *Inst = llvm::prior(BI);
 
         // If we no longer require this instruction, skip it.
-        if (!Inst->mayWriteToMemory() &&
-            !isa<TerminatorInst>(Inst) &&
-            !isa<DbgInfoIntrinsic>(Inst) &&
-            !FuncInfo->isExportedInst(Inst))
+        if (isFoldedOrDeadInstruction(Inst, FuncInfo))
           continue;
 
         // Bottom-up: reset the insert pos at the top, after any local-value
@@ -924,16 +891,21 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
 
         // Try to select the instruction with FastISel.
         if (FastIS->SelectInstruction(Inst)) {
-          // If fast isel succeeded, check to see if there is a single-use
-          // non-volatile load right before the selected instruction, and see if
-          // the load is used by the instruction.  If so, try to fold it.
-          const Instruction *BeforeInst = 0;
-          if (Inst != Begin)
-            BeforeInst = llvm::prior(llvm::prior(BI));
-          if (BeforeInst && isa<LoadInst>(BeforeInst) &&
-              BeforeInst->hasOneUse() && *BeforeInst->use_begin() == Inst &&
-              TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), FastIS))
-            --BI; // If we succeeded, don't re-select the load.
+          ++NumFastIselSuccess;
+          // If fast isel succeeded, skip over all the folded instructions, and
+          // then see if there is a load right before the selected instructions.
+          // Try to fold the load if so.
+          const Instruction *BeforeInst = Inst;
+          while (BeforeInst != Begin) {
+            BeforeInst = llvm::prior(BasicBlock::const_iterator(BeforeInst));
+            if (!isFoldedOrDeadInstruction(BeforeInst, FuncInfo))
+              break;
+          }
+          if (BeforeInst != Inst && isa<LoadInst>(BeforeInst) &&
+              BeforeInst->hasOneUse() &&
+              TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), Inst, FastIS))
+            // If we succeeded, don't re-select the load.
+            BI = llvm::next(BasicBlock::const_iterator(BeforeInst));
           continue;
         }
 
@@ -963,9 +935,14 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
           continue;
         }
 
-        // Otherwise, give up on FastISel for the rest of the block.
-        // For now, be a little lenient about non-branch terminators.
-        if (!isa<TerminatorInst>(Inst) || isa<BranchInst>(Inst)) {
+        if (isa<TerminatorInst>(Inst) && !isa<BranchInst>(Inst)) {
+          // Don't abort, and use a different message for terminator misses.
+          ++NumFastIselFailures;
+          if (EnableFastISelVerbose || EnableFastISelAbort) {
+            dbgs() << "FastISel missed terminator: ";
+            Inst->dump();
+          }
+        } else {
           ++NumFastIselFailures;
           if (EnableFastISelVerbose || EnableFastISelAbort) {
             dbgs() << "FastISel miss: ";
@@ -987,22 +964,20 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
     else
       ++NumFastIselBlocks;
 
-    // Run SelectionDAG instruction selection on the remainder of the block
-    // not handled by FastISel. If FastISel is not run, this is the entire
-    // block.
-    bool HadTailCall;
-    SelectBasicBlock(Begin, BI, HadTailCall);
+    if (Begin != BI) {
+      // Run SelectionDAG instruction selection on the remainder of the block
+      // not handled by FastISel. If FastISel is not run, this is the entire
+      // block.
+      bool HadTailCall;
+      SelectBasicBlock(Begin, BI, HadTailCall);
+    }
 
     FinishBasicBlock();
     FuncInfo->PHINodesToUpdate.clear();
   }
 
   delete FastIS;
-#ifndef NDEBUG
-  for (MachineFunction::const_iterator MBI = MF->begin(), MBE = MF->end();
-       MBI != MBE; ++MBI)
-    CheckLineNumbers(MBI);
-#endif
+  SDB->clearDanglingDebugInfo();
 }
 
 void
@@ -2634,11 +2609,45 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
       // instructions that access memory and for ComplexPatterns that match
       // loads.
       if (EmitNodeInfo & OPFL_MemRefs) {
+        // Only attach load or store memory operands if the generated
+        // instruction may load or store.
+        const TargetInstrDesc &TID = TM.getInstrInfo()->get(TargetOpc);
+        bool mayLoad = TID.mayLoad();
+        bool mayStore = TID.mayStore();
+
+        unsigned NumMemRefs = 0;
+        for (SmallVector<MachineMemOperand*, 2>::const_iterator I =
+             MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) {
+          if ((*I)->isLoad()) {
+            if (mayLoad)
+              ++NumMemRefs;
+          } else if ((*I)->isStore()) {
+            if (mayStore)
+              ++NumMemRefs;
+          } else {
+            ++NumMemRefs;
+          }
+        }
+
         MachineSDNode::mmo_iterator MemRefs =
-          MF->allocateMemRefsArray(MatchedMemRefs.size());
-        std::copy(MatchedMemRefs.begin(), MatchedMemRefs.end(), MemRefs);
+          MF->allocateMemRefsArray(NumMemRefs);
+
+        MachineSDNode::mmo_iterator MemRefsPos = MemRefs;
+        for (SmallVector<MachineMemOperand*, 2>::const_iterator I =
+             MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) {
+          if ((*I)->isLoad()) {
+            if (mayLoad)
+              *MemRefsPos++ = *I;
+          } else if ((*I)->isStore()) {
+            if (mayStore)
+              *MemRefsPos++ = *I;
+          } else {
+            *MemRefsPos++ = *I;
+          }
+        }
+
         cast<MachineSDNode>(Res)
-          ->setMemRefs(MemRefs, MemRefs + MatchedMemRefs.size());
+          ->setMemRefs(MemRefs, MemRefs + NumMemRefs);
       }
 
       DEBUG(errs() << "  "
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 4b0822d..efbfaa4 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -26,11 +26,19 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include <cctype>
 using namespace llvm;
 
+/// We are in the process of implementing a new TypeLegalization action
+/// - the promotion of vector elements. This feature is disabled by default
+/// and only enabled using this flag.
+static cl::opt<bool>
+AllowPromoteIntElem("promote-elements", cl::Hidden,
+  cl::desc("Allow promotion of integer vector element types"));
+
 namespace llvm {
 TLSModel::Model getTLSModel(const GlobalValue *GV, Reloc::Model reloc) {
   bool isLocal = GV->hasLocalLinkage();
@@ -528,7 +536,8 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
 /// NOTE: The constructor takes ownership of TLOF.
 TargetLowering::TargetLowering(const TargetMachine &tm,
                                const TargetLoweringObjectFile *tlof)
-  : TM(tm), TD(TM.getTargetData()), TLOF(*tlof) {
+  : TM(tm), TD(TM.getTargetData()), TLOF(*tlof),
+  mayPromoteElements(AllowPromoteIntElem) {
   // All operations default to being supported.
   memset(OpActions, 0, sizeof(OpActions));
   memset(LoadExtActions, 0, sizeof(LoadExtActions));
@@ -596,6 +605,8 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
   SchedPreferenceInfo = Sched::Latency;
   JumpBufSize = 0;
   JumpBufAlignment = 0;
+  MinFunctionAlignment = 0;
+  PrefFunctionAlignment = 0;
   PrefLoopAlignment = 0;
   MinStackArgumentAlignment = 1;
   ShouldFoldAtomicFences = false;
@@ -662,10 +673,16 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
     NewVT = EltTy;
   IntermediateVT = NewVT;
 
+  unsigned NewVTSize = NewVT.getSizeInBits();
+
+  // Convert sizes such as i33 to i64.
+  if (!isPowerOf2_32(NewVTSize))
+    NewVTSize = NextPowerOf2(NewVTSize);
+
   EVT DestVT = TLI->getRegisterType(NewVT);
   RegisterVT = DestVT;
   if (EVT(DestVT).bitsLT(NewVT))    // Value is expanded, e.g. i64 -> i16.
-    return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());
+    return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());
 
   // Otherwise, promotion or legal types use the same number of registers as
   // the vector decimated to the appropriate level.
@@ -747,7 +764,7 @@ void TargetLowering::computeRegisterProperties() {
     NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1];
     RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg;
     TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1);
-    ValueTypeActions.setTypeAction(ExpandedVT, Expand);
+    ValueTypeActions.setTypeAction(ExpandedVT, TypeExpandInteger);
   }
 
   // Inspect all of the ValueType's smaller than the largest integer
@@ -761,7 +778,7 @@ void TargetLowering::computeRegisterProperties() {
     } else {
       RegisterTypeForVT[IntReg] = TransformToType[IntReg] =
         (MVT::SimpleValueType)LegalIntReg;
-      ValueTypeActions.setTypeAction(IVT, Promote);
+      ValueTypeActions.setTypeAction(IVT, TypePromoteInteger);
     }
   }
 
@@ -770,7 +787,7 @@ void TargetLowering::computeRegisterProperties() {
     NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64];
     RegisterTypeForVT[MVT::ppcf128] = MVT::f64;
     TransformToType[MVT::ppcf128] = MVT::f64;
-    ValueTypeActions.setTypeAction(MVT::ppcf128, Expand);
+    ValueTypeActions.setTypeAction(MVT::ppcf128, TypeExpandFloat);
   }
 
   // Decide how to handle f64. If the target does not have native f64 support,
@@ -779,7 +796,7 @@ void TargetLowering::computeRegisterProperties() {
     NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64];
     RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64];
     TransformToType[MVT::f64] = MVT::i64;
-    ValueTypeActions.setTypeAction(MVT::f64, Expand);
+    ValueTypeActions.setTypeAction(MVT::f64, TypeSoftenFloat);
   }
 
   // Decide how to handle f32. If the target does not have native support for
@@ -789,12 +806,12 @@ void TargetLowering::computeRegisterProperties() {
       NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::f64];
       RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::f64];
       TransformToType[MVT::f32] = MVT::f64;
-      ValueTypeActions.setTypeAction(MVT::f32, Promote);
+      ValueTypeActions.setTypeAction(MVT::f32, TypePromoteInteger);
     } else {
       NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32];
       RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32];
       TransformToType[MVT::f32] = MVT::i32;
-      ValueTypeActions.setTypeAction(MVT::f32, Expand);
+      ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat);
     }
   }
 
@@ -810,6 +827,30 @@ void TargetLowering::computeRegisterProperties() {
     unsigned NElts = VT.getVectorNumElements();
     if (NElts != 1) {
       bool IsLegalWiderType = false;
+      // If we allow the promotion of vector elements using a flag,
+      // then return TypePromoteInteger on vector elements.
+      // First try to promote the elements of integer vectors. If no legal
+      // promotion was found, fallback to the widen-vector method.
+      if (mayPromoteElements)
+      for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+        EVT SVT = (MVT::SimpleValueType)nVT;
+        // Promote vectors of integers to vectors with the same number
+        // of elements, with a wider element type.
+        if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits()
+            && SVT.getVectorNumElements() == NElts &&
+            isTypeLegal(SVT) && SVT.getScalarType().isInteger()) {
+          TransformToType[i] = SVT;
+          RegisterTypeForVT[i] = SVT;
+          NumRegistersForVT[i] = 1;
+          ValueTypeActions.setTypeAction(VT, TypePromoteInteger);
+          IsLegalWiderType = true;
+          break;
+        }
+      }
+
+      if (IsLegalWiderType) continue;
+
+      // Try to widen the vector.
       for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
         EVT SVT = (MVT::SimpleValueType)nVT;
         if (SVT.getVectorElementType() == EltVT &&
@@ -818,7 +859,7 @@ void TargetLowering::computeRegisterProperties() {
           TransformToType[i] = SVT;
           RegisterTypeForVT[i] = SVT;
           NumRegistersForVT[i] = 1;
-          ValueTypeActions.setTypeAction(VT, Promote);
+          ValueTypeActions.setTypeAction(VT, TypeWidenVector);
           IsLegalWiderType = true;
           break;
         }
@@ -838,10 +879,12 @@ void TargetLowering::computeRegisterProperties() {
     if (NVT == VT) {
       // Type is already a power of 2.  The default action is to split.
       TransformToType[i] = MVT::Other;
-      ValueTypeActions.setTypeAction(VT, Expand);
+      unsigned NumElts = VT.getVectorNumElements();
+      ValueTypeActions.setTypeAction(VT,
+            NumElts > 1 ? TypeSplitVector : TypeScalarizeVector);
     } else {
       TransformToType[i] = NVT;
-      ValueTypeActions.setTypeAction(VT, Promote);
+      ValueTypeActions.setTypeAction(VT, TypeWidenVector);
     }
   }
 
@@ -890,7 +933,7 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
   // If there is a wider vector type with the same element type as this one,
   // we should widen to that legal vector type.  This handles things like
   // <2 x float> -> <4 x float>.
-  if (NumElts != 1 && getTypeAction(VT) == Promote) {
+  if (NumElts != 1 && getTypeAction(Context, VT) == TypeWidenVector) {
     RegisterVT = getTypeToTransformTo(Context, VT);
     if (isTypeLegal(RegisterVT)) {
       IntermediateVT = RegisterVT;
@@ -928,8 +971,14 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
 
   EVT DestVT = getRegisterType(Context, NewVT);
   RegisterVT = DestVT;
+  unsigned NewVTSize = NewVT.getSizeInBits();
+
+  // Convert sizes such as i33 to i64.
+  if (!isPowerOf2_32(NewVTSize))
+    NewVTSize = NextPowerOf2(NewVTSize);
+
   if (DestVT.bitsLT(NewVT))   // Value is expanded, e.g. i64 -> i16.
-    return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());
+    return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());
 
   // Otherwise, promotion or legal types use the same number of registers as
   // the vector decimated to the appropriate level.
@@ -1678,6 +1727,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
         ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1));
         if (!ShAmt)
           break;
+        SDValue Shift = In.getOperand(1);
+        if (TLO.LegalTypes()) {
+          uint64_t ShVal = ShAmt->getZExtValue();
+          Shift =
+            TLO.DAG.getConstant(ShVal, getShiftAmountTy(Op.getValueType()));
+        }
+
         APInt HighBits = APInt::getHighBitsSet(OperandBitWidth,
                                                OperandBitWidth - BitWidth);
         HighBits = HighBits.lshr(ShAmt->getZExtValue()).trunc(BitWidth);
@@ -1691,7 +1747,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
           return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl,
                                                    Op.getValueType(),
                                                    NewTrunc,
-                                                   In.getOperand(1)));
+                                                   Shift));
         }
         break;
       }
@@ -1716,26 +1772,28 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     break;
   }
   case ISD::BITCAST:
-#if 0
-    // If this is an FP->Int bitcast and if the sign bit is the only thing that
-    // is demanded, turn this into a FGETSIGN.
-    if (NewMask == EVT::getIntegerVTSignBit(Op.getValueType()) &&
-        MVT::isFloatingPoint(Op.getOperand(0).getValueType()) &&
-        !MVT::isVector(Op.getOperand(0).getValueType())) {
-      // Only do this xform if FGETSIGN is valid or if before legalize.
-      if (!TLO.AfterLegalize ||
-          isOperationLegal(ISD::FGETSIGN, Op.getValueType())) {
+    // If this is an FP->Int bitcast and if the sign bit is the only
+    // thing demanded, turn this into a FGETSIGN.
+    if (!Op.getOperand(0).getValueType().isVector() &&
+        NewMask == APInt::getSignBit(Op.getValueType().getSizeInBits()) &&
+        Op.getOperand(0).getValueType().isFloatingPoint()) {
+      bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType());
+      bool i32Legal  = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
+      if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple()) {
+        EVT Ty = OpVTLegal ? Op.getValueType() : MVT::i32;
         // Make a FGETSIGN + SHL to move the sign bit into the appropriate
         // place.  We expect the SHL to be eliminated by other optimizations.
-        SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, Op.getValueType(),
-                                         Op.getOperand(0));
+        SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Op.getOperand(0));
+        unsigned OpVTSizeInBits = Op.getValueType().getSizeInBits();
+        if (!OpVTLegal && OpVTSizeInBits > 32)
+          Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), Sign);
         unsigned ShVal = Op.getValueType().getSizeInBits()-1;
-        SDValue ShAmt = TLO.DAG.getConstant(ShVal, getShiftAmountTy());
-        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, Op.getValueType(),
+        SDValue ShAmt = TLO.DAG.getConstant(ShVal, Op.getValueType());
+        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
+                                                 Op.getValueType(),
                                                  Sign, ShAmt));
       }
     }
-#endif
     break;
   case ISD::ADD:
   case ISD::MUL:
@@ -1842,7 +1900,6 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
                               ISD::CondCode Cond, bool foldBooleans,
                               DAGCombinerInfo &DCI, DebugLoc dl) const {
   SelectionDAG &DAG = DCI.DAG;
-  LLVMContext &Context = *DAG.getContext();
 
   // These setcc operations always fold.
   switch (Cond) {
@@ -1853,12 +1910,11 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
   case ISD::SETTRUE2:  return DAG.getConstant(1, VT);
   }
 
-  if (isa<ConstantSDNode>(N0.getNode())) {
-    // Ensure that the constant occurs on the RHS, and fold constant
-    // comparisons.
+  // Ensure that the constant occurs on the RHS, and fold constant
+  // comparisons.
+  if (isa<ConstantSDNode>(N0.getNode()))
     return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond));
-  }
-
+  
   if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
     const APInt &C1 = N1C->getAPIntValue();
 
@@ -1911,6 +1967,42 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
       // TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal.
     }
 
+    // (zext x) == C --> x == (trunc C)
+    if (DCI.isBeforeLegalize() && N0->hasOneUse() &&
+        (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+      unsigned MinBits = N0.getValueSizeInBits();
+      SDValue PreZExt;
+      if (N0->getOpcode() == ISD::ZERO_EXTEND) {
+        // ZExt
+        MinBits = N0->getOperand(0).getValueSizeInBits();
+        PreZExt = N0->getOperand(0);
+      } else if (N0->getOpcode() == ISD::AND) {
+        // DAGCombine turns costly ZExts into ANDs
+        if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
+          if ((C->getAPIntValue()+1).isPowerOf2()) {
+            MinBits = C->getAPIntValue().countTrailingOnes();
+            PreZExt = N0->getOperand(0);
+          }
+      } else if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(N0)) {
+        // ZEXTLOAD
+        if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
+          MinBits = LN0->getMemoryVT().getSizeInBits();
+          PreZExt = N0;
+        }
+      }
+
+      // Make sure we're not loosing bits from the constant.
+      if (MinBits < C1.getBitWidth() && MinBits > C1.getActiveBits()) {
+        EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
+        if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
+          // Will get folded away.
+          SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreZExt);
+          SDValue C = DAG.getConstant(C1.trunc(MinBits), MinVT);
+          return DAG.getSetCC(dl, VT, Trunc, C, Cond);
+        }
+      }
+    }
+
     // If the LHS is '(and load, const)', the RHS is 0,
     // the test is for equality or unsigned, and all 1 bits of the const are
     // in the same partial word, see if we can shorten the load.
@@ -1949,7 +2041,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         }
       }
       if (bestWidth) {
-        EVT newVT = EVT::getIntegerVT(Context, bestWidth);
+        EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
         if (newVT.isRound()) {
           EVT PtrType = Lod->getOperand(1).getValueType();
           SDValue Ptr = Lod->getBasePtr();
@@ -2578,9 +2670,13 @@ const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{
 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
 /// vector.  If it is invalid, don't add anything to Ops.
 void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
-                                                  char ConstraintLetter,
+                                                  std::string &Constraint,
                                                   std::vector<SDValue> &Ops,
                                                   SelectionDAG &DAG) const {
+  
+  if (Constraint.length() > 1) return;
+  
+  char ConstraintLetter = Constraint[0];
   switch (ConstraintLetter) {
   default: break;
   case 'X':     // Allows any operand; labels (basic block) use this.
@@ -2769,6 +2865,12 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
           report_fatal_error("Indirect operand for inline asm not a pointer!");
         OpTy = PtrTy->getElementType();
       }
+      
+      // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
+      if (const StructType *STy = dyn_cast<StructType>(OpTy))
+        if (STy->getNumElements() == 1)
+          OpTy = STy->getElementType(0);
+
       // If OpTy is not a single value, it may be a struct/union that we
       // can tile with integers.
       if (!OpTy->isSingleValueType() && OpTy->isSized()) {
@@ -3013,7 +3115,7 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
       assert(OpInfo.Codes[i].size() == 1 &&
              "Unhandled multi-letter 'other' constraint");
       std::vector<SDValue> ResultOps;
-      TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i][0],
+      TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i],
                                        ResultOps, *DAG);
       if (!ResultOps.empty()) {
         BestType = CType;
diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp
index 7b5bca4..160f38f 100644
--- a/lib/CodeGen/ShrinkWrapping.cpp
+++ b/lib/CodeGen/ShrinkWrapping.cpp
@@ -277,7 +277,7 @@ void PEI::calculateAnticAvail(MachineFunction &Fn) {
   // Initialize data flow sets.
   clearAnticAvailSets();
 
-  // Calulate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG.
+  // Calculate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG.
   bool changed = true;
   unsigned iterations = 0;
   while (changed) {
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
index c621726..221bec5 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -47,7 +47,6 @@ STATISTIC(numExtends  , "Number of copies extended");
 STATISTIC(NumReMats   , "Number of instructions re-materialized");
 STATISTIC(numPeep     , "Number of identity moves eliminated after coalescing");
 STATISTIC(numAborts   , "Number of times interval joining aborted");
-STATISTIC(numDeadValNo, "Number of valno def marked dead");
 
 char SimpleRegisterCoalescing::ID = 0;
 static cl::opt<bool>
@@ -61,9 +60,9 @@ DisableCrossClassJoin("disable-cross-class-join",
                cl::init(false), cl::Hidden);
 
 static cl::opt<bool>
-DisablePhysicalJoin("disable-physical-join",
-               cl::desc("Avoid coalescing physical register copies"),
-               cl::init(false), cl::Hidden);
+EnablePhysicalJoin("join-physregs",
+                   cl::desc("Join physical register copies"),
+                   cl::init(false), cl::Hidden);
 
 static cl::opt<bool>
 VerifyCoalescing("verify-coalescing",
@@ -208,15 +207,14 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(const CoalescerPair &CP,
   if (ValLR+1 != BLR) return false;
 
   // If a live interval is a physical register, conservatively check if any
-  // of its sub-registers is overlapping the live interval of the virtual
-  // register. If so, do not coalesce.
-  if (TargetRegisterInfo::isPhysicalRegister(IntB.reg) &&
-      *tri_->getSubRegisters(IntB.reg)) {
-    for (const unsigned* SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR)
-      if (li_->hasInterval(*SR) && IntA.overlaps(li_->getInterval(*SR))) {
+  // of its aliases is overlapping the live interval of the virtual register.
+  // If so, do not coalesce.
+  if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) {
+    for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS)
+      if (li_->hasInterval(*AS) && IntA.overlaps(li_->getInterval(*AS))) {
         DEBUG({
-            dbgs() << "\t\tInterfere with sub-register ";
-            li_->getInterval(*SR).print(dbgs(), tri_);
+            dbgs() << "\t\tInterfere with alias ";
+            li_->getInterval(*AS).print(dbgs(), tri_);
           });
         return false;
       }
@@ -254,7 +252,12 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(const CoalescerPair &CP,
 
   // Okay, merge "B1" into the same value number as "B0".
   if (BValNo != ValLR->valno) {
+    // If B1 is killed by a PHI, then the merged live range must also be killed
+    // by the same PHI, as B0 and B1 can not overlap.
+    bool HasPHIKill = BValNo->hasPHIKill();
     IntB.MergeValueNumberInto(BValNo, ValLR->valno);
+    if (HasPHIKill)
+      ValLR->valno->setHasPHIKill(true);
   }
   DEBUG({
       dbgs() << "   result = ";
@@ -273,7 +276,7 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(const CoalescerPair &CP,
   // merge, find the last use and trim the live range. That will also add the
   // isKill marker.
   if (ALR->end == CopyIdx)
-    TrimLiveIntervalToLastUse(CopyUseIdx, CopyMI->getParent(), IntA, ALR);
+    li_->shrinkToUses(&IntA);
 
   ++numExtends;
   return true;
@@ -427,6 +430,10 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP,
   MachineInstr *NewMI = tii_->commuteInstruction(DefMI);
   if (!NewMI)
     return false;
+  if (TargetRegisterInfo::isVirtualRegister(IntA.reg) &&
+      TargetRegisterInfo::isVirtualRegister(IntB.reg) &&
+      !mri_->constrainRegClass(IntB.reg, mri_->getRegClass(IntA.reg)))
+    return false;
   if (NewMI != DefMI) {
     li_->ReplaceMachineInstrInMaps(DefMI, NewMI);
     MBB->insert(DefMI, NewMI);
@@ -504,98 +511,6 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP,
   return true;
 }
 
-/// isSameOrFallThroughBB - Return true if MBB == SuccMBB or MBB simply
-/// fallthoughs to SuccMBB.
-static bool isSameOrFallThroughBB(MachineBasicBlock *MBB,
-                                  MachineBasicBlock *SuccMBB,
-                                  const TargetInstrInfo *tii_) {
-  if (MBB == SuccMBB)
-    return true;
-  MachineBasicBlock *TBB = 0, *FBB = 0;
-  SmallVector<MachineOperand, 4> Cond;
-  return !tii_->AnalyzeBranch(*MBB, TBB, FBB, Cond) && !TBB && !FBB &&
-    MBB->isSuccessor(SuccMBB);
-}
-
-/// removeRange - Wrapper for LiveInterval::removeRange. This removes a range
-/// from a physical register live interval as well as from the live intervals
-/// of its sub-registers.
-static void removeRange(LiveInterval &li,
-                        SlotIndex Start, SlotIndex End,
-                        LiveIntervals *li_, const TargetRegisterInfo *tri_) {
-  li.removeRange(Start, End, true);
-  if (TargetRegisterInfo::isPhysicalRegister(li.reg)) {
-    for (const unsigned* SR = tri_->getSubRegisters(li.reg); *SR; ++SR) {
-      if (!li_->hasInterval(*SR))
-        continue;
-      LiveInterval &sli = li_->getInterval(*SR);
-      SlotIndex RemoveStart = Start;
-      SlotIndex RemoveEnd = Start;
-
-      while (RemoveEnd != End) {
-        LiveInterval::iterator LR = sli.FindLiveRangeContaining(RemoveStart);
-        if (LR == sli.end())
-          break;
-        RemoveEnd = (LR->end < End) ? LR->end : End;
-        sli.removeRange(RemoveStart, RemoveEnd, true);
-        RemoveStart = RemoveEnd;
-      }
-    }
-  }
-}
-
-/// TrimLiveIntervalToLastUse - If there is a last use in the same basic block
-/// as the copy instruction, trim the live interval to the last use and return
-/// true.
-bool
-SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(SlotIndex CopyIdx,
-                                                    MachineBasicBlock *CopyMBB,
-                                                    LiveInterval &li,
-                                                    const LiveRange *LR) {
-  SlotIndex MBBStart = li_->getMBBStartIdx(CopyMBB);
-  SlotIndex LastUseIdx;
-  MachineOperand *LastUse =
-    lastRegisterUse(LR->start, CopyIdx.getPrevSlot(), li.reg, LastUseIdx);
-  if (LastUse) {
-    MachineInstr *LastUseMI = LastUse->getParent();
-    if (!isSameOrFallThroughBB(LastUseMI->getParent(), CopyMBB, tii_)) {
-      // r1024 = op
-      // ...
-      // BB1:
-      //       = r1024
-      //
-      // BB2:
-      // r1025<dead> = r1024<kill>
-      if (MBBStart < LR->end)
-        removeRange(li, MBBStart, LR->end, li_, tri_);
-      return true;
-    }
-
-    // There are uses before the copy, just shorten the live range to the end
-    // of last use.
-    LastUse->setIsKill();
-    removeRange(li, LastUseIdx.getDefIndex(), LR->end, li_, tri_);
-    if (LastUseMI->isCopy()) {
-      MachineOperand &DefMO = LastUseMI->getOperand(0);
-      if (DefMO.getReg() == li.reg && !DefMO.getSubReg())
-        DefMO.setIsDead();
-    }
-    return true;
-  }
-
-  // Is it livein?
-  if (LR->start <= MBBStart && LR->end > MBBStart) {
-    if (LR->start == li_->getZeroIndex()) {
-      assert(TargetRegisterInfo::isPhysicalRegister(li.reg));
-      // Live-in to the function but dead. Remove it from entry live-in set.
-      mf_->begin()->removeLiveIn(li.reg);
-    }
-    // FIXME: Shorten intervals in BBs that reaches this BB.
-  }
-
-  return false;
-}
-
 /// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial
 /// computation, replace the copy by rematerialize the definition.
 bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
@@ -782,26 +697,6 @@ static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *li_,
   return false;
 }
 
-/// ShortenDeadCopyLiveRange - Shorten a live range defined by a dead copy.
-/// Return true if live interval is removed.
-bool SimpleRegisterCoalescing::ShortenDeadCopyLiveRange(LiveInterval &li,
-                                                        MachineInstr *CopyMI) {
-  SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI);
-  LiveInterval::iterator MLR =
-    li.FindLiveRangeContaining(CopyIdx.getDefIndex());
-  if (MLR == li.end())
-    return false;  // Already removed by ShortenDeadCopySrcLiveRange.
-  SlotIndex RemoveStart = MLR->start;
-  SlotIndex RemoveEnd = MLR->end;
-  SlotIndex DefIdx = CopyIdx.getDefIndex();
-  // Remove the liverange that's defined by this.
-  if (RemoveStart == DefIdx && RemoveEnd == DefIdx.getStoreIndex()) {
-    removeRange(li, RemoveStart, RemoveEnd, li_, tri_);
-    return removeIntervalIfEmpty(li, li_, tri_);
-  }
-  return false;
-}
-
 /// RemoveDeadDef - If a def of a live interval is now determined dead, remove
 /// the val# it defines. If the live interval becomes empty, remove it as well.
 bool SimpleRegisterCoalescing::RemoveDeadDef(LiveInterval &li,
@@ -835,84 +730,6 @@ void SimpleRegisterCoalescing::RemoveCopyFlag(unsigned DstReg,
   }
 }
 
-/// PropagateDeadness - Propagate the dead marker to the instruction which
-/// defines the val#.
-static void PropagateDeadness(LiveInterval &li, MachineInstr *CopyMI,
-                              SlotIndex &LRStart, LiveIntervals *li_,
-                              const TargetRegisterInfo* tri_) {
-  MachineInstr *DefMI =
-    li_->getInstructionFromIndex(LRStart.getDefIndex());
-  if (DefMI && DefMI != CopyMI) {
-    int DeadIdx = DefMI->findRegisterDefOperandIdx(li.reg);
-    if (DeadIdx != -1)
-      DefMI->getOperand(DeadIdx).setIsDead();
-    else
-      DefMI->addOperand(MachineOperand::CreateReg(li.reg,
-                   /*def*/true, /*implicit*/true, /*kill*/false, /*dead*/true));
-    LRStart = LRStart.getNextSlot();
-  }
-}
-
-/// ShortenDeadCopySrcLiveRange - Shorten a live range as it's artificially
-/// extended by a dead copy. Mark the last use (if any) of the val# as kill as
-/// ends the live range there. If there isn't another use, then this live range
-/// is dead. Return true if live interval is removed.
-bool
-SimpleRegisterCoalescing::ShortenDeadCopySrcLiveRange(LiveInterval &li,
-                                                      MachineInstr *CopyMI) {
-  SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI);
-  if (CopyIdx == SlotIndex()) {
-    // FIXME: special case: function live in. It can be a general case if the
-    // first instruction index starts at > 0 value.
-    assert(TargetRegisterInfo::isPhysicalRegister(li.reg));
-    // Live-in to the function but dead. Remove it from entry live-in set.
-    if (mf_->begin()->isLiveIn(li.reg))
-      mf_->begin()->removeLiveIn(li.reg);
-    if (const LiveRange *LR = li.getLiveRangeContaining(CopyIdx))
-      removeRange(li, LR->start, LR->end, li_, tri_);
-    return removeIntervalIfEmpty(li, li_, tri_);
-  }
-
-  LiveInterval::iterator LR =
-    li.FindLiveRangeContaining(CopyIdx.getPrevIndex().getStoreIndex());
-  if (LR == li.end())
-    // Livein but defined by a phi.
-    return false;
-
-  SlotIndex RemoveStart = LR->start;
-  SlotIndex RemoveEnd = CopyIdx.getStoreIndex();
-  if (LR->end > RemoveEnd)
-    // More uses past this copy? Nothing to do.
-    return false;
-
-  // If there is a last use in the same bb, we can't remove the live range.
-  // Shorten the live interval and return.
-  MachineBasicBlock *CopyMBB = CopyMI->getParent();
-  if (TrimLiveIntervalToLastUse(CopyIdx, CopyMBB, li, LR))
-    return false;
-
-  // There are other kills of the val#. Nothing to do.
-  if (!li.isOnlyLROfValNo(LR))
-    return false;
-
-  MachineBasicBlock *StartMBB = li_->getMBBFromIndex(RemoveStart);
-  if (!isSameOrFallThroughBB(StartMBB, CopyMBB, tii_))
-    // If the live range starts in another mbb and the copy mbb is not a fall
-    // through mbb, then we can only cut the range from the beginning of the
-    // copy mbb.
-    RemoveStart = li_->getMBBStartIdx(CopyMBB).getNextIndex().getBaseIndex();
-
-  if (LR->valno->def == RemoveStart) {
-    // If the def MI defines the val# and this copy is the only kill of the
-    // val#, then propagate the dead marker.
-    PropagateDeadness(li, CopyMI, RemoveStart, li_, tri_);
-    ++numDeadValNo;
-  }
-
-  removeRange(li, RemoveStart, RemoveEnd, li_, tri_);
-  return removeIntervalIfEmpty(li, li_, tri_);
-}
-
 /// shouldJoinPhys - Return true if a copy involving a physreg should be joined.
 /// We need to be careful about coalescing a source physical register with a
 /// virtual register. Once the coalescing is done, it cannot be broken and these
@@ -928,7 +745,7 @@ bool SimpleRegisterCoalescing::shouldJoinPhys(CoalescerPair &CP) {
   if (!Allocatable && CP.isFlipped() && JoinVInt.containsOneValue())
     return true;
 
-  if (DisablePhysicalJoin) {
+  if (!EnablePhysicalJoin) {
     DEBUG(dbgs() << "\tPhysreg joins disabled.\n");
     return false;
   }
@@ -955,7 +772,7 @@ bool SimpleRegisterCoalescing::shouldJoinPhys(CoalescerPair &CP) {
   //        CodeGen/X86/phys_subreg_coalesce-3.ll needs it.
   if (!CP.isPartial()) {
     const TargetRegisterClass *RC = mri_->getRegClass(CP.getSrcReg());
-    unsigned Threshold = allocatableRCRegs_[RC].count() * 2;
+    unsigned Threshold = RegClassInfo.getNumAllocatableRegs(RC) * 2;
     unsigned Length = li_->getApproximateInstructionCount(JoinVInt);
     if (Length > Threshold) {
       ++numAborts;
@@ -974,7 +791,7 @@ SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned SrcReg,
                                              const TargetRegisterClass *SrcRC,
                                              const TargetRegisterClass *DstRC,
                                              const TargetRegisterClass *NewRC) {
-  unsigned NewRCCount = allocatableRCRegs_[NewRC].count();
+  unsigned NewRCCount = RegClassInfo.getNumAllocatableRegs(NewRC);
   // This heuristics is good enough in practice, but it's obviously not *right*.
   // 4 is a magic number that works well enough for x86, ARM, etc. It filter
   // out all but the most restrictive register classes.
@@ -988,8 +805,14 @@ SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned SrcReg,
   LiveInterval &DstInt = li_->getInterval(DstReg);
   unsigned SrcSize = li_->getApproximateInstructionCount(SrcInt);
   unsigned DstSize = li_->getApproximateInstructionCount(DstInt);
-  if (SrcSize <= NewRCCount && DstSize <= NewRCCount)
+
+  // Coalesce aggressively if the intervals are small compared to the number of
+  // registers in the new class. The number 4 is fairly arbitrary, chosen to be
+  // less aggressive than the 8 used for the whole function size.
+  const unsigned ThresSize = 4 * NewRCCount;
+  if (SrcSize <= ThresSize && DstSize <= ThresSize)
     return true;
+
   // Estimate *register use density*. If it doubles or more, abort.
   unsigned SrcUses = std::distance(mri_->use_nodbg_begin(SrcReg),
                                    mri_->use_nodbg_end());
@@ -997,13 +820,13 @@ SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned SrcReg,
                                    mri_->use_nodbg_end());
   unsigned NewUses = SrcUses + DstUses;
   unsigned NewSize = SrcSize + DstSize;
-  if (SrcRC != NewRC && SrcSize > NewRCCount) {
-    unsigned SrcRCCount = allocatableRCRegs_[SrcRC].count();
+  if (SrcRC != NewRC && SrcSize > ThresSize) {
+    unsigned SrcRCCount = RegClassInfo.getNumAllocatableRegs(SrcRC);
     if (NewUses*SrcSize*SrcRCCount > 2*SrcUses*NewSize*NewRCCount)
       return false;
   }
-  if (DstRC != NewRC && DstSize > NewRCCount) {
-    unsigned DstRCCount = allocatableRCRegs_[DstRC].count();
+  if (DstRC != NewRC && DstSize > ThresSize) {
+    unsigned DstRCCount = RegClassInfo.getNumAllocatableRegs(DstRC);
     if (NewUses*DstSize*DstRCCount > 2*DstUses*NewSize*NewRCCount)
       return false;
   }
@@ -1033,6 +856,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
 
   // If they are already joined we continue.
   if (CP.getSrcReg() == CP.getDstReg()) {
+    markAsJoined(CopyMI);
     DEBUG(dbgs() << "\tCopy already coalesced.\n");
     return false;  // Not coalescable.
   }
@@ -1552,81 +1376,6 @@ void SimpleRegisterCoalescing::joinIntervals() {
   }
 }
 
-/// Return true if the two specified registers belong to different register
-/// classes.  The registers may be either phys or virt regs.
-bool
-SimpleRegisterCoalescing::differingRegisterClasses(unsigned RegA,
-                                                   unsigned RegB) const {
-  // Get the register classes for the first reg.
-  if (TargetRegisterInfo::isPhysicalRegister(RegA)) {
-    assert(TargetRegisterInfo::isVirtualRegister(RegB) &&
-           "Shouldn't consider two physregs!");
-    return !mri_->getRegClass(RegB)->contains(RegA);
-  }
-
-  // Compare against the regclass for the second reg.
-  const TargetRegisterClass *RegClassA = mri_->getRegClass(RegA);
-  if (TargetRegisterInfo::isVirtualRegister(RegB)) {
-    const TargetRegisterClass *RegClassB = mri_->getRegClass(RegB);
-    return RegClassA != RegClassB;
-  }
-  return !RegClassA->contains(RegB);
-}
-
-/// lastRegisterUse - Returns the last (non-debug) use of the specific register
-/// between cycles Start and End or NULL if there are no uses.
-MachineOperand *
-SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start,
-                                          SlotIndex End,
-                                          unsigned Reg,
-                                          SlotIndex &UseIdx) const{
-  UseIdx = SlotIndex();
-  if (TargetRegisterInfo::isVirtualRegister(Reg)) {
-    MachineOperand *LastUse = NULL;
-    for (MachineRegisterInfo::use_nodbg_iterator I = mri_->use_nodbg_begin(Reg),
-           E = mri_->use_nodbg_end(); I != E; ++I) {
-      MachineOperand &Use = I.getOperand();
-      MachineInstr *UseMI = Use.getParent();
-      if (UseMI->isIdentityCopy())
-        continue;
-      SlotIndex Idx = li_->getInstructionIndex(UseMI);
-      if (Idx >= Start && Idx < End && (!UseIdx.isValid() || Idx >= UseIdx)) {
-        LastUse = &Use;
-        UseIdx = Idx.getUseIndex();
-      }
-    }
-    return LastUse;
-  }
-
-  SlotIndex s = Start;
-  SlotIndex e = End.getPrevSlot().getBaseIndex();
-  while (e >= s) {
-    // Skip deleted instructions
-    MachineInstr *MI = li_->getInstructionFromIndex(e);
-    while (e != SlotIndex() && e.getPrevIndex() >= s && !MI) {
-      e = e.getPrevIndex();
-      MI = li_->getInstructionFromIndex(e);
-    }
-    if (e < s || MI == NULL)
-      return NULL;
-
-    // Ignore identity copies.
-    if (!MI->isIdentityCopy())
-      for (unsigned i = 0, NumOps = MI->getNumOperands(); i != NumOps; ++i) {
-        MachineOperand &Use = MI->getOperand(i);
-        if (Use.isReg() && Use.isUse() && Use.getReg() &&
-            tri_->regsOverlap(Use.getReg(), Reg)) {
-          UseIdx = e.getUseIndex();
-          return &Use;
-        }
-      }
-
-    e = e.getPrevIndex();
-  }
-
-  return NULL;
-}
-
 void SimpleRegisterCoalescing::releaseMemory() {
   JoinedCopies.clear();
   ReMatCopies.clear();
@@ -1651,10 +1400,7 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
   if (VerifyCoalescing)
     mf_->verify(this, "Before register coalescing");
 
-  for (TargetRegisterInfo::regclass_iterator I = tri_->regclass_begin(),
-         E = tri_->regclass_end(); I != E; ++I)
-    allocatableRCRegs_.insert(std::make_pair(*I,
-                                             tri_->getAllocatableSet(fn, *I)));
+  RegClassInfo.runOnMachineFunction(fn);
 
   // Join (coalesce) intervals if requested.
   if (EnableJoining) {
@@ -1691,13 +1437,11 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
           // or else the scavenger may complain. LowerSubregs will
           // delete them later.
           DoDelete = false;
-        
+
         if (MI->allDefsAreDead()) {
-          if (li_->hasInterval(SrcReg)) {
-            LiveInterval &li = li_->getInterval(SrcReg);
-            if (!ShortenDeadCopySrcLiveRange(li, MI))
-              ShortenDeadCopyLiveRange(li, MI);
-          }
+          if (TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+              li_->hasInterval(SrcReg))
+            li_->shrinkToUses(&li_->getInterval(SrcReg));
           DoDelete = true;
         }
         if (!DoDelete) {
@@ -1749,24 +1493,6 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
           DeadDefs.clear();
       }
 
-      // If the move will be an identity move delete it
-      if (MI->isIdentityCopy()) {
-        unsigned SrcReg = MI->getOperand(1).getReg();
-        if (li_->hasInterval(SrcReg)) {
-          LiveInterval &RegInt = li_->getInterval(SrcReg);
-          // If def of this move instruction is dead, remove its live range
-          // from the destination register's live interval.
-          if (MI->allDefsAreDead()) {
-            if (!ShortenDeadCopySrcLiveRange(RegInt, MI))
-              ShortenDeadCopyLiveRange(RegInt, MI);
-          }
-        }
-        li_->RemoveMachineInstrFromMaps(MI);
-        mii = mbbi->erase(mii);
-        ++numPeep;
-        continue;
-      }
-
       ++mii;
 
       // Check for now unnecessary kill flags.
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h
index 65cf542..92f6c64 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.h
+++ b/lib/CodeGen/SimpleRegisterCoalescing.h
@@ -17,7 +17,7 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/RegisterCoalescer.h"
-#include "llvm/ADT/BitVector.h"
+#include "RegisterClassInfo.h"
 
 namespace llvm {
   class SimpleRegisterCoalescing;
@@ -47,8 +47,7 @@ namespace llvm {
     LiveDebugVariables *ldv_;
     const MachineLoopInfo* loopInfo;
     AliasAnalysis *AA;
-    
-    DenseMap<const TargetRegisterClass*, BitVector> allocatableRCRegs_;
+    RegisterClassInfo RegClassInfo;
 
     /// JoinedCopies - Keep track of copies eliminated due to coalescing.
     ///
@@ -103,10 +102,6 @@ namespace llvm {
     /// use this information below to update aliases.
     bool JoinIntervals(CoalescerPair &CP);
 
-    /// Return true if the two specified registers belong to different register
-    /// classes.  The registers may be either phys or virt regs.
-    bool differingRegisterClasses(unsigned RegA, unsigned RegB) const;
-
     /// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy. If
     /// the source value number is defined by a copy from the destination reg
     /// see if we can merge these two destination reg valno# into a single
@@ -124,13 +119,6 @@ namespace llvm {
     /// can transform the copy into a noop by commuting the definition.
     bool RemoveCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI);
 
-    /// TrimLiveIntervalToLastUse - If there is a last use in the same basic
-    /// block as the copy instruction, trim the ive interval to the last use
-    /// and return true.
-    bool TrimLiveIntervalToLastUse(SlotIndex CopyIdx,
-                                   MachineBasicBlock *CopyMBB,
-                                   LiveInterval &li, const LiveRange *LR);
-
     /// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial
     /// computation, replace the copy by rematerialize the definition.
     /// If PreserveSrcInt is true, make sure SrcInt is valid after the call.
@@ -156,16 +144,6 @@ namespace llvm {
     /// subregister.
     void UpdateRegDefsUses(const CoalescerPair &CP);
 
-    /// ShortenDeadCopyLiveRange - Shorten a live range defined by a dead copy.
-    /// Return true if live interval is removed.
-    bool ShortenDeadCopyLiveRange(LiveInterval &li, MachineInstr *CopyMI);
-
-    /// ShortenDeadCopyLiveRange - Shorten a live range as it's artificially
-    /// extended by a dead copy. Mark the last use (if any) of the val# as kill
-    /// as ends the live range there. If there isn't another use, then this
-    /// live range is dead. Return true if live interval is removed.
-    bool ShortenDeadCopySrcLiveRange(LiveInterval &li, MachineInstr *CopyMI);
-
     /// RemoveDeadDef - If a def of a live interval is now determined dead,
     /// remove the val# it defines. If the live interval becomes empty, remove
     /// it as well.
@@ -175,11 +153,6 @@ namespace llvm {
     /// VNInfo copy flag for DstReg and all aliases.
     void RemoveCopyFlag(unsigned DstReg, const MachineInstr *CopyMI);
 
-    /// lastRegisterUse - Returns the last use of the specific register between
-    /// cycles Start and End or NULL if there are no uses.
-    MachineOperand *lastRegisterUse(SlotIndex Start, SlotIndex End,
-                                    unsigned Reg, SlotIndex &LastUseIdx) const;
-
     /// markAsJoined - Remember that CopyMI has already been joined.
     void markAsJoined(MachineInstr *CopyMI);
   };
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index aaa8568..92970e4 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -443,16 +443,17 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
     BasicBlock::Create(F.getContext(), "eh.sjlj.setjmp.catch", &F);
 
   // Insert a load of the callsite in the dispatch block, and a switch on its
-  // value.  By default, we go to a block that just does an unwind (which is the
-  // correct action for a standard call).
-  BasicBlock *UnwindBlock =
-    BasicBlock::Create(F.getContext(), "unwindbb", &F);
-  Unwinds.push_back(new UnwindInst(F.getContext(), UnwindBlock));
+  // value. By default, we issue a trap statement.
+  BasicBlock *TrapBlock =
+    BasicBlock::Create(F.getContext(), "trapbb", &F);
+  CallInst::Create(Intrinsic::getDeclaration(F.getParent(), Intrinsic::trap),
+                   "", TrapBlock);
+  new UnreachableInst(F.getContext(), TrapBlock);
 
   Value *DispatchLoad = new LoadInst(CallSite, "invoke.num", true,
                                      DispatchBlock);
   SwitchInst *DispatchSwitch =
-    SwitchInst::Create(DispatchLoad, UnwindBlock, Invokes.size(),
+    SwitchInst::Create(DispatchLoad, TrapBlock, Invokes.size(),
                        DispatchBlock);
   // Split the entry block to insert the conditional branch for the setjmp.
   BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(),
@@ -519,7 +520,7 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
 
   // Add a call to dispatch_setup after the setjmp call. This is expanded to any
   // target-specific setup that needs to be done.
-  CallInst::Create(DispatchSetupFn, "", EntryBB->getTerminator());
+  CallInst::Create(DispatchSetupFn, DispatchVal, "", EntryBB->getTerminator());
 
   // check the return value of the setjmp. non-zero goes to dispatcher.
   Value *IsNormal = new ICmpInst(EntryBB->getTerminator(),
@@ -561,7 +562,7 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
   // Replace all unwinds with a branch to the unwind handler.
   // ??? Should this ever happen with sjlj exceptions?
   for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) {
-    BranchInst::Create(UnwindBlock, Unwinds[i]);
+    BranchInst::Create(TrapBlock, Unwinds[i]);
     Unwinds[i]->eraseFromParent();
   }
 
diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp
index cab18a1..6949618 100644
--- a/lib/CodeGen/SpillPlacement.cpp
+++ b/lib/CodeGen/SpillPlacement.cpp
@@ -135,13 +135,10 @@ struct SpillPlacement::Node {
 
   /// addBias - Bias this node from an ingoing[0] or outgoing[1] link.
   /// Return the change to the total number of positive biases.
-  int addBias(float w, bool out) {
+  void addBias(float w, bool out) {
     // Normalize w relative to all connected blocks from that direction.
     w *= Scale[out];
-    int Before = Bias > 0;
     Bias += w;
-    int After = Bias > 0;
-    return After - Before;
   }
 
   /// update - Recompute Value from Bias and Links. Return true when node
@@ -230,14 +227,14 @@ void SpillPlacement::addConstraints(ArrayRef<BlockConstraint> LiveBlocks) {
     if (I->Entry != DontCare) {
       unsigned ib = bundles->getBundle(I->Number, 0);
       activate(ib);
-      PositiveNodes += nodes[ib].addBias(Freq * Bias[I->Entry], 1);
+      nodes[ib].addBias(Freq * Bias[I->Entry], 1);
     }
 
     // Live-out from block?
     if (I->Exit != DontCare) {
       unsigned ob = bundles->getBundle(I->Number, 1);
       activate(ob);
-      PositiveNodes += nodes[ob].addBias(Freq * Bias[I->Exit], 0);
+      nodes[ob].addBias(Freq * Bias[I->Exit], 0);
     }
   }
 }
@@ -254,16 +251,42 @@ void SpillPlacement::addLinks(ArrayRef<unsigned> Links) {
       continue;
     activate(ib);
     activate(ob);
+    if (nodes[ib].Links.empty() && !nodes[ib].mustSpill())
+      Linked.push_back(ib);
+    if (nodes[ob].Links.empty() && !nodes[ob].mustSpill())
+      Linked.push_back(ob);
     float Freq = getBlockFrequency(Number);
     nodes[ib].addLink(ob, Freq, 1);
     nodes[ob].addLink(ib, Freq, 0);
   }
 }
 
+bool SpillPlacement::scanActiveBundles() {
+  Linked.clear();
+  RecentPositive.clear();
+  for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n)) {
+    nodes[n].update(nodes);
+    // A node that must spill, or a node without any links is not going to
+    // change its value ever again, so exclude it from iterations.
+    if (nodes[n].mustSpill())
+      continue;
+    if (!nodes[n].Links.empty())
+      Linked.push_back(n);
+    if (nodes[n].preferReg())
+      RecentPositive.push_back(n);
+  }
+  return !RecentPositive.empty();
+}
+
 /// iterate - Repeatedly update the Hopfield nodes until stability or the
 /// maximum number of iterations is reached.
 /// @param Linked - Numbers of linked nodes that need updating.
-void SpillPlacement::iterate(const SmallVectorImpl<unsigned> &Linked) {
+void SpillPlacement::iterate() {
+  // First update the recently positive nodes. They have likely received new
+  // negative bias that will turn them off.
+  while (!RecentPositive.empty())
+    nodes[RecentPositive.pop_back_val()].update(nodes);
+
   if (Linked.empty())
     return;
 
@@ -279,10 +302,13 @@ void SpillPlacement::iterate(const SmallVectorImpl<unsigned> &Linked) {
     for (SmallVectorImpl<unsigned>::const_reverse_iterator I =
            llvm::next(Linked.rbegin()), E = Linked.rend(); I != E; ++I) {
       unsigned n = *I;
-      bool C = nodes[n].update(nodes);
-      Changed |= C;
+      if (nodes[n].update(nodes)) {
+        Changed = true;
+        if (nodes[n].preferReg())
+          RecentPositive.push_back(n);
+      }
     }
-    if (!Changed)
+    if (!Changed || !RecentPositive.empty())
       return;
 
     // Scan forwards, skipping the first node which was just updated.
@@ -290,38 +316,29 @@ void SpillPlacement::iterate(const SmallVectorImpl<unsigned> &Linked) {
     for (SmallVectorImpl<unsigned>::const_iterator I =
            llvm::next(Linked.begin()), E = Linked.end(); I != E; ++I) {
       unsigned n = *I;
-      bool C = nodes[n].update(nodes);
-      Changed |= C;
+      if (nodes[n].update(nodes)) {
+        Changed = true;
+        if (nodes[n].preferReg())
+          RecentPositive.push_back(n);
+      }
     }
-    if (!Changed)
+    if (!Changed || !RecentPositive.empty())
       return;
   }
 }
 
 void SpillPlacement::prepare(BitVector &RegBundles) {
+  Linked.clear();
+  RecentPositive.clear();
   // Reuse RegBundles as our ActiveNodes vector.
   ActiveNodes = &RegBundles;
   ActiveNodes->clear();
   ActiveNodes->resize(bundles->getNumBundles());
-  PositiveNodes = 0;
 }
 
 bool
 SpillPlacement::finish() {
   assert(ActiveNodes && "Call prepare() first");
-  // Update all active nodes, and find the ones that are actually linked to
-  // something so their value may change when iterating.
-  SmallVector<unsigned, 8> Linked;
-  for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n)) {
-    nodes[n].update(nodes);
-    // A node that must spill, or a node without any links is not going to
-    // change its value ever again, so exclude it from iterations.
-    if (!nodes[n].Links.empty() && !nodes[n].mustSpill())
-      Linked.push_back(n);
-  }
-
-  // Iterate the network to convergence.
-  iterate(Linked);
 
   // Write preferences back to ActiveNodes.
   bool Perfect = true;
diff --git a/lib/CodeGen/SpillPlacement.h b/lib/CodeGen/SpillPlacement.h
index 46e64e6..6952ad8 100644
--- a/lib/CodeGen/SpillPlacement.h
+++ b/lib/CodeGen/SpillPlacement.h
@@ -49,8 +49,12 @@ class SpillPlacement  : public MachineFunctionPass {
   // caller.
   BitVector *ActiveNodes;
 
-  // The number of active nodes with a positive bias.
-  unsigned PositiveNodes;
+  // Nodes with active links. Populated by scanActiveBundles.
+  SmallVector<unsigned, 8> Linked;
+
+  // Nodes that went positive during the last call to scanActiveBundles or
+  // iterate.
+  SmallVector<unsigned, 8> RecentPositive;
 
   // Block frequencies are computed once. Indexed by block number.
   SmallVector<float, 4> BlockFrequency;
@@ -95,9 +99,20 @@ public:
   /// addLinks - Add transparent blocks with the given numbers.
   void addLinks(ArrayRef<unsigned> Links);
 
-  /// getPositiveNodes - Return the total number of graph nodes with a positive
-  /// bias after adding constraints.
-  unsigned getPositiveNodes() const { return PositiveNodes; }
+  /// scanActiveBundles - Perform an initial scan of all bundles activated by
+  /// addConstraints and addLinks, updating their state. Add all the bundles
+  /// that now prefer a register to RecentPositive.
+  /// Prepare internal data structures for iterate.
+  /// Return true is there are any positive nodes.
+  bool scanActiveBundles();
+
+  /// iterate - Update the network iteratively until convergence, or new bundles
+  /// are found.
+  void iterate();
+
+  /// getRecentPositive - Return an array of bundles that became positive during
+  /// the previous call to scanActiveBundles or iterate.
+  ArrayRef<unsigned> getRecentPositive() { return RecentPositive; }
 
   /// finish - Compute the optimal spill code placement given the
   /// constraints. No MustSpill constraints will be violated, and the smallest
@@ -120,7 +135,6 @@ private:
   virtual void releaseMemory();
 
   void activate(unsigned);
-  void iterate(const SmallVectorImpl<unsigned>&);
 };
 
 } // end namespace llvm
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
index b89139f..b6bbcd7 100644
--- a/lib/CodeGen/Spiller.cpp
+++ b/lib/CodeGen/Spiller.cpp
@@ -25,7 +25,6 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include <set>
 
 using namespace llvm;
 
diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp
index 201e9b1..bf27cc8 100644
--- a/lib/CodeGen/SplitKit.cpp
+++ b/lib/CodeGen/SplitKit.cpp
@@ -30,6 +30,9 @@ using namespace llvm;
 
 STATISTIC(NumFinished, "Number of splits finished");
 STATISTIC(NumSimple,   "Number of splits that were simple");
+STATISTIC(NumCopies,   "Number of copies inserted for splitting");
+STATISTIC(NumRemats,   "Number of rematerialized defs for splitting");
+STATISTIC(NumRepairs,  "Number of invalid live ranges repaired");
 
 //===----------------------------------------------------------------------===//
 //                                 Split Analysis
@@ -51,6 +54,7 @@ void SplitAnalysis::clear() {
   UseBlocks.clear();
   ThroughBlocks.clear();
   CurLI = 0;
+  DidRepairRange = false;
 }
 
 SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) {
@@ -119,6 +123,8 @@ void SplitAnalysis::analyzeUses() {
   if (!calcLiveBlockInfo()) {
     // FIXME: calcLiveBlockInfo found inconsistencies in the live range.
     // I am looking at you, SimpleRegisterCoalescing!
+    DidRepairRange = true;
+    ++NumRepairs;
     DEBUG(dbgs() << "*** Fixing inconsistent live interval! ***\n");
     const_cast<LiveIntervals&>(LIS)
       .shrinkToUses(const_cast<LiveInterval*>(CurLI));
@@ -132,12 +138,14 @@ void SplitAnalysis::analyzeUses() {
   DEBUG(dbgs() << "Analyze counted "
                << UseSlots.size() << " instrs in "
                << UseBlocks.size() << " blocks, through "
-               << ThroughBlocks.size() << " blocks.\n");
+               << NumThroughBlocks << " blocks.\n");
 }
 
 /// calcLiveBlockInfo - Fill the LiveBlocks array with information about blocks
 /// where CurLI is live.
 bool SplitAnalysis::calcLiveBlockInfo() {
+  ThroughBlocks.resize(MF.getNumBlockIDs());
+  NumThroughBlocks = NumGapBlocks = 0;
   if (CurLI->empty())
     return true;
 
@@ -156,54 +164,63 @@ bool SplitAnalysis::calcLiveBlockInfo() {
     SlotIndex Start, Stop;
     tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
 
-    // LVI is the first live segment overlapping MBB.
-    BI.LiveIn = LVI->start <= Start;
-    if (!BI.LiveIn)
-      BI.Def = LVI->start;
-
-    // Find the first and last uses in the block.
-    bool Uses = UseI != UseE && *UseI < Stop;
-    if (Uses) {
+    // If the block contains no uses, the range must be live through. At one
+    // point, SimpleRegisterCoalescing could create dangling ranges that ended
+    // mid-block.
+    if (UseI == UseE || *UseI >= Stop) {
+      ++NumThroughBlocks;
+      ThroughBlocks.set(BI.MBB->getNumber());
+      // The range shouldn't end mid-block if there are no uses. This shouldn't
+      // happen.
+      if (LVI->end < Stop)
+        return false;
+    } else {
+      // This block has uses. Find the first and last uses in the block.
       BI.FirstUse = *UseI;
       assert(BI.FirstUse >= Start);
       do ++UseI;
       while (UseI != UseE && *UseI < Stop);
       BI.LastUse = UseI[-1];
       assert(BI.LastUse < Stop);
-    }
 
-    // Look for gaps in the live range.
-    bool hasGap = false;
-    BI.LiveOut = true;
-    while (LVI->end < Stop) {
-      SlotIndex LastStop = LVI->end;
-      if (++LVI == LVE || LVI->start >= Stop) {
-        BI.Kill = LastStop;
-        BI.LiveOut = false;
-        break;
-      }
-      if (LastStop < LVI->start) {
-        hasGap = true;
-        BI.Kill = LastStop;
-        BI.Def = LVI->start;
+      // LVI is the first live segment overlapping MBB.
+      BI.LiveIn = LVI->start <= Start;
+
+      // Look for gaps in the live range.
+      BI.LiveOut = true;
+      while (LVI->end < Stop) {
+        SlotIndex LastStop = LVI->end;
+        if (++LVI == LVE || LVI->start >= Stop) {
+          BI.LiveOut = false;
+          BI.LastUse = LastStop;
+          break;
+        }
+        if (LastStop < LVI->start) {
+          // There is a gap in the live range. Create duplicate entries for the
+          // live-in snippet and the live-out snippet.
+          ++NumGapBlocks;
+
+          // Push the Live-in part.
+          BI.LiveThrough = false;
+          BI.LiveOut = false;
+          UseBlocks.push_back(BI);
+          UseBlocks.back().LastUse = LastStop;
+
+          // Set up BI for the live-out part.
+          BI.LiveIn = false;
+          BI.LiveOut = true;
+          BI.FirstUse = LVI->start;
+        }
       }
-    }
 
-    // Don't set LiveThrough when the block has a gap.
-    BI.LiveThrough = !hasGap && BI.LiveIn && BI.LiveOut;
-    if (Uses)
+      // Don't set LiveThrough when the block has a gap.
+      BI.LiveThrough = BI.LiveIn && BI.LiveOut;
       UseBlocks.push_back(BI);
-    else
-      ThroughBlocks.push_back(BI.MBB->getNumber());
 
-    // FIXME: This should never happen. The live range stops or starts without a
-    // corresponding use. An earlier pass did something wrong.
-    if (!BI.LiveThrough && !Uses)
-      return false;
-
-    // LVI is now at LVE or LVI->end >= Stop.
-    if (LVI == LVE)
-      break;
+      // LVI is now at LVE or LVI->end >= Stop.
+      if (LVI == LVE)
+        break;
+    }
 
     // Live segment ends exactly at Stop. Move to the next segment.
     if (LVI->end == Stop && ++LVI == LVE)
@@ -215,9 +232,34 @@ bool SplitAnalysis::calcLiveBlockInfo() {
     else
       MFI = LIS.getMBBFromIndex(LVI->start);
   }
+
+  assert(getNumLiveBlocks() == countLiveBlocks(CurLI) && "Bad block count");
   return true;
 }
 
+unsigned SplitAnalysis::countLiveBlocks(const LiveInterval *cli) const {
+  if (cli->empty())
+    return 0;
+  LiveInterval *li = const_cast<LiveInterval*>(cli);
+  LiveInterval::iterator LVI = li->begin();
+  LiveInterval::iterator LVE = li->end();
+  unsigned Count = 0;
+
+  // Loop over basic blocks where li is live.
+  MachineFunction::const_iterator MFI = LIS.getMBBFromIndex(LVI->start);
+  SlotIndex Stop = LIS.getMBBEndIdx(MFI);
+  for (;;) {
+    ++Count;
+    LVI = li->advanceTo(LVI, Stop);
+    if (LVI == LVE)
+      return Count;
+    do {
+      ++MFI;
+      Stop = LIS.getMBBEndIdx(MFI);
+    } while (Stop <= LVI->start);
+  }
+}
+
 bool SplitAnalysis::isOriginalEndpoint(SlotIndex Idx) const {
   unsigned OrigReg = VRM.getOriginal(CurLI->reg);
   const LiveInterval &Orig = LIS.getInterval(OrigReg);
@@ -348,8 +390,33 @@ void SplitEditor::extendRange(unsigned RegIdx, SlotIndex Idx) {
   // Now for the fun part. We know that ParentVNI potentially has multiple defs,
   // and we may need to create even more phi-defs to preserve VNInfo SSA form.
   // Perform a search for all predecessor blocks where we know the dominating
-  // VNInfo. Insert phi-def VNInfos along the path back to IdxMBB.
+  // VNInfo.
+  VNInfo *VNI = findReachingDefs(LI, IdxMBB, Idx.getNextSlot());
 
+  // When there were multiple different values, we may need new PHIs.
+  if (!VNI)
+    return updateSSA();
+
+  // Poor man's SSA update for the single-value case.
+  LiveOutPair LOP(VNI, MDT[LIS.getMBBFromIndex(VNI->def)]);
+  for (SmallVectorImpl<LiveInBlock>::iterator I = LiveInBlocks.begin(),
+         E = LiveInBlocks.end(); I != E; ++I) {
+    MachineBasicBlock *MBB = I->DomNode->getBlock();
+    SlotIndex Start = LIS.getMBBStartIdx(MBB);
+    if (I->Kill.isValid())
+      LI->addRange(LiveRange(Start, I->Kill, VNI));
+    else {
+      LiveOutCache[MBB] = LOP;
+      LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI));
+    }
+  }
+}
+
+/// findReachingDefs - Search the CFG for known live-out values.
+/// Add required live-in blocks to LiveInBlocks.
+VNInfo *SplitEditor::findReachingDefs(LiveInterval *LI,
+                                      MachineBasicBlock *KillMBB,
+                                      SlotIndex Kill) {
   // Initialize the live-out cache the first time it is needed.
   if (LiveOutSeen.empty()) {
     unsigned N = VRM.getMachineFunction().getNumBlockIDs();
@@ -358,16 +425,15 @@ void SplitEditor::extendRange(unsigned RegIdx, SlotIndex Idx) {
   }
 
   // Blocks where LI should be live-in.
-  SmallVector<MachineDomTreeNode*, 16> LiveIn;
-  LiveIn.push_back(MDT[IdxMBB]);
+  SmallVector<MachineBasicBlock*, 16> WorkList(1, KillMBB);
 
   // Remember if we have seen more than one value.
   bool UniqueVNI = true;
-  VNInfo *IdxVNI = 0;
+  VNInfo *TheVNI = 0;
 
   // Using LiveOutCache as a visited set, perform a BFS for all reaching defs.
-  for (unsigned i = 0; i != LiveIn.size(); ++i) {
-    MachineBasicBlock *MBB = LiveIn[i]->getBlock();
+  for (unsigned i = 0; i != WorkList.size(); ++i) {
+    MachineBasicBlock *MBB = WorkList[i];
     assert(!MBB->pred_empty() && "Value live-in to entry block?");
     for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
            PE = MBB->pred_end(); PI != PE; ++PI) {
@@ -377,9 +443,9 @@ void SplitEditor::extendRange(unsigned RegIdx, SlotIndex Idx) {
        // Is this a known live-out block?
        if (LiveOutSeen.test(Pred->getNumber())) {
          if (VNInfo *VNI = LOP.first) {
-           if (IdxVNI && IdxVNI != VNI)
+           if (TheVNI && TheVNI != VNI)
              UniqueVNI = false;
-           IdxVNI = VNI;
+           TheVNI = VNI;
          }
          continue;
        }
@@ -395,64 +461,50 @@ void SplitEditor::extendRange(unsigned RegIdx, SlotIndex Idx) {
        LOP.first = VNI;
        if (VNI) {
          LOP.second = MDT[LIS.getMBBFromIndex(VNI->def)];
-         if (IdxVNI && IdxVNI != VNI)
+         if (TheVNI && TheVNI != VNI)
            UniqueVNI = false;
-         IdxVNI = VNI;
+         TheVNI = VNI;
          continue;
        }
        LOP.second = 0;
 
        // No, we need a live-in value for Pred as well
-       if (Pred != IdxMBB)
-         LiveIn.push_back(MDT[Pred]);
+       if (Pred != KillMBB)
+          WorkList.push_back(Pred);
        else
-         UniqueVNI = false; // Loopback to IdxMBB, ask updateSSA() for help.
+          // Loopback to KillMBB, so value is really live through.
+         Kill = SlotIndex();
     }
   }
 
-  // We may need to add phi-def values to preserve the SSA form.
-  if (UniqueVNI) {
-    LiveOutPair LOP(IdxVNI, MDT[LIS.getMBBFromIndex(IdxVNI->def)]);
-    // Update LiveOutCache, but skip IdxMBB at LiveIn[0].
-    for (unsigned i = 1, e = LiveIn.size(); i != e; ++i)
-      LiveOutCache[LiveIn[i]->getBlock()] = LOP;
-  } else
-    IdxVNI = updateSSA(RegIdx, LiveIn, Idx, IdxMBB);
-
-  // Since we went through the trouble of a full BFS visiting all reaching defs,
-  // the values in LiveIn are now accurate. No more phi-defs are needed
-  // for these blocks, so we can color the live ranges.
-  for (unsigned i = 0, e = LiveIn.size(); i != e; ++i) {
-    MachineBasicBlock *MBB = LiveIn[i]->getBlock();
-    SlotIndex Start = LIS.getMBBStartIdx(MBB);
-    VNInfo *VNI = LiveOutCache[MBB].first;
+  // Transfer WorkList to LiveInBlocks in reverse order.
+  // This ordering works best with updateSSA().
+  LiveInBlocks.clear();
+  LiveInBlocks.reserve(WorkList.size());
+  while(!WorkList.empty())
+    LiveInBlocks.push_back(MDT[WorkList.pop_back_val()]);
 
-    // Anything in LiveIn other than IdxMBB is live-through.
-    // In IdxMBB, we should stop at Idx unless the same value is live-out.
-    if (MBB == IdxMBB && IdxVNI != VNI)
-      LI->addRange(LiveRange(Start, Idx.getNextSlot(), IdxVNI));
-    else
-      LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI));
-  }
+  // The kill block may not be live-through.
+  assert(LiveInBlocks.back().DomNode->getBlock() == KillMBB);
+  LiveInBlocks.back().Kill = Kill;
+
+  return UniqueVNI ? TheVNI : 0;
 }
 
-VNInfo *SplitEditor::updateSSA(unsigned RegIdx,
-                               SmallVectorImpl<MachineDomTreeNode*> &LiveIn,
-                               SlotIndex Idx,
-                               const MachineBasicBlock *IdxMBB) {
+void SplitEditor::updateSSA() {
   // This is essentially the same iterative algorithm that SSAUpdater uses,
   // except we already have a dominator tree, so we don't have to recompute it.
-  LiveInterval *LI = Edit->get(RegIdx);
-  VNInfo *IdxVNI = 0;
   unsigned Changes;
   do {
     Changes = 0;
     // Propagate live-out values down the dominator tree, inserting phi-defs
-    // when necessary. Since LiveIn was created by a BFS, going backwards makes
-    // it more likely for us to visit immediate dominators before their
-    // children.
-    for (unsigned i = LiveIn.size(); i; --i) {
-      MachineDomTreeNode *Node = LiveIn[i-1];
+    // when necessary.
+    for (SmallVectorImpl<LiveInBlock>::iterator I = LiveInBlocks.begin(),
+           E = LiveInBlocks.end(); I != E; ++I) {
+      MachineDomTreeNode *Node = I->DomNode;
+      // Skip block if the live-in value has already been determined.
+      if (!Node)
+        continue;
       MachineBasicBlock *MBB = Node->getBlock();
       MachineDomTreeNode *IDom = Node->getIDom();
       LiveOutPair IDomValue;
@@ -461,9 +513,9 @@ VNInfo *SplitEditor::updateSSA(unsigned RegIdx,
       // This is probably an unreachable block that has survived somehow.
       bool needPHI = !IDom || !LiveOutSeen.test(IDom->getBlock()->getNumber());
 
-      // IDom dominates all of our predecessors, but it may not be the immediate
-      // dominator. Check if any of them have live-out values that are properly
-      // dominated by IDom. If so, we need a phi-def here.
+      // IDom dominates all of our predecessors, but it may not be their
+      // immediate dominator. Check if any of them have live-out values that are
+      // properly dominated by IDom. If so, we need a phi-def here.
       if (!needPHI) {
         IDomValue = LiveOutCache[IDom->getBlock()];
         for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
@@ -481,40 +533,35 @@ VNInfo *SplitEditor::updateSSA(unsigned RegIdx,
         }
       }
 
+      // The value may be live-through even if Kill is set, as can happen when
+      // we are called from extendRange. In that case LiveOutSeen is true, and
+      // LiveOutCache indicates a foreign or missing value.
+      LiveOutPair &LOP = LiveOutCache[MBB];
+
       // Create a phi-def if required.
       if (needPHI) {
         ++Changes;
         SlotIndex Start = LIS.getMBBStartIdx(MBB);
+        unsigned RegIdx = RegAssign.lookup(Start);
+        LiveInterval *LI = Edit->get(RegIdx);
         VNInfo *VNI = LI->getNextValue(Start, 0, LIS.getVNInfoAllocator());
         VNI->setIsPHIDef(true);
-        // We no longer need LI to be live-in.
-        LiveIn.erase(LiveIn.begin()+(i-1));
-        // Blocks in LiveIn are either IdxMBB, or have a value live-through.
-        if (MBB == IdxMBB)
-          IdxVNI = VNI;
-        // Check if we need to update live-out info.
-        LiveOutPair &LOP = LiveOutCache[MBB];
-        if (LOP.second == Node || !LiveOutSeen.test(MBB->getNumber())) {
-          // We already have a live-out defined in MBB, so this must be IdxMBB.
-          assert(MBB == IdxMBB && "Adding phi-def to known live-out");
-          LI->addRange(LiveRange(Start, Idx.getNextSlot(), VNI));
-        } else {
-          // This phi-def is also live-out, so color the whole block.
+        I->Value = VNI;
+        // This block is done, we know the final value.
+        I->DomNode = 0;
+        if (I->Kill.isValid())
+          LI->addRange(LiveRange(Start, I->Kill, VNI));
+        else {
           LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI));
           LOP = LiveOutPair(VNI, Node);
         }
       } else if (IDomValue.first) {
-        // No phi-def here. Remember incoming value for IdxMBB.
-        if (MBB == IdxMBB) {
-          IdxVNI = IDomValue.first;
-          // IdxMBB need not be live-out.
-          if (!LiveOutSeen.test(MBB->getNumber()))
-            continue;
-        }
-        assert(LiveOutSeen.test(MBB->getNumber()) && "Expected live-out block");
+        // No phi-def here. Remember incoming value.
+        I->Value = IDomValue.first;
+        if (I->Kill.isValid())
+          continue;
         // Propagate IDomValue if needed:
         // MBB is live-out and doesn't define its own value.
-        LiveOutPair &LOP = LiveOutCache[MBB];
         if (LOP.second != Node && LOP.first != IDomValue.first) {
           ++Changes;
           LOP = IDomValue;
@@ -523,8 +570,20 @@ VNInfo *SplitEditor::updateSSA(unsigned RegIdx,
     }
   } while (Changes);
 
-  assert(IdxVNI && "Didn't find value for Idx");
-  return IdxVNI;
+  // The values in LiveInBlocks are now accurate. No more phi-defs are needed
+  // for these blocks, so we can color the live ranges.
+  for (SmallVectorImpl<LiveInBlock>::iterator I = LiveInBlocks.begin(),
+         E = LiveInBlocks.end(); I != E; ++I) {
+    if (!I->DomNode)
+      continue;
+    assert(I->Value && "No live-in value found");
+    MachineBasicBlock *MBB = I->DomNode->getBlock();
+    SlotIndex Start = LIS.getMBBStartIdx(MBB);
+    unsigned RegIdx = RegAssign.lookup(Start);
+    LiveInterval *LI = Edit->get(RegIdx);
+    LI->addRange(LiveRange(Start, I->Kill.isValid() ?
+                                  I->Kill : LIS.getMBBEndIdx(MBB), I->Value));
+  }
 }
 
 VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
@@ -536,15 +595,22 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
   SlotIndex Def;
   LiveInterval *LI = Edit->get(RegIdx);
 
+  // We may be trying to avoid interference that ends at a deleted instruction,
+  // so always begin RegIdx 0 early and all others late.
+  bool Late = RegIdx != 0;
+
   // Attempt cheap-as-a-copy rematerialization.
   LiveRangeEdit::Remat RM(ParentVNI);
   if (Edit->canRematerializeAt(RM, UseIdx, true, LIS)) {
-    Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, LIS, TII, TRI);
+    Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, LIS, TII, TRI, Late);
+    ++NumRemats;
   } else {
     // Can't remat, just insert a copy from parent.
     CopyMI = BuildMI(MBB, I, DebugLoc(), TII.get(TargetOpcode::COPY), LI->reg)
                .addReg(Edit->getReg());
-    Def = LIS.InsertMachineInstrInMaps(CopyMI).getDefIndex();
+    Def = LIS.getSlotIndexes()->insertMachineInstrInMaps(CopyMI, Late)
+            .getDefIndex();
+    ++NumCopies;
   }
 
   // Define the value in Reg.
@@ -554,9 +620,7 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
 }
 
 /// Create a new virtual register and live interval.
-void SplitEditor::openIntv() {
-  assert(!OpenIdx && "Previous LI not closed before openIntv");
-
+unsigned SplitEditor::openIntv() {
   // Create the complement as index 0.
   if (Edit->empty())
     Edit->create(LIS, VRM);
@@ -564,6 +628,13 @@ void SplitEditor::openIntv() {
   // Create the open interval.
   OpenIdx = Edit->size();
   Edit->create(LIS, VRM);
+  return OpenIdx;
+}
+
+void SplitEditor::selectIntv(unsigned Idx) {
+  assert(Idx != 0 && "Cannot select the complement interval");
+  assert(Idx < Edit->size() && "Can only select previously opened interval");
+  OpenIdx = Idx;
 }
 
 SlotIndex SplitEditor::enterIntvBefore(SlotIndex Idx) {
@@ -686,18 +757,11 @@ void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) {
   DEBUG(dump());
 }
 
-/// closeIntv - Indicate that we are done editing the currently open
-/// LiveInterval, and ranges can be trimmed.
-void SplitEditor::closeIntv() {
-  assert(OpenIdx && "openIntv not called before closeIntv");
-  OpenIdx = 0;
-}
-
-/// transferSimpleValues - Transfer all simply defined values to the new live
-/// ranges.
-/// Values that were rematerialized or that have multiple defs are left alone.
-bool SplitEditor::transferSimpleValues() {
+/// transferValues - Transfer all possible values to the new live ranges.
+/// Values that were rematerialized are left alone, they need extendRange().
+bool SplitEditor::transferValues() {
   bool Skipped = false;
+  LiveInBlocks.clear();
   RegAssignMap::const_iterator AssignI = RegAssign.begin();
   for (LiveInterval::const_iterator ParentI = Edit->getParent().begin(),
          ParentE = Edit->getParent().end(); ParentI != ParentE; ++ParentI) {
@@ -721,16 +785,98 @@ bool SplitEditor::transferSimpleValues() {
         RegIdx = 0;
         End = std::min(End, AssignI.start());
       }
+
+      // The interval [Start;End) is continuously mapped to RegIdx, ParentVNI.
       DEBUG(dbgs() << " [" << Start << ';' << End << ")=" << RegIdx);
+      LiveInterval *LI = Edit->get(RegIdx);
+
+      // Check for a simply defined value that can be blitted directly.
       if (VNInfo *VNI = Values.lookup(std::make_pair(RegIdx, ParentVNI->id))) {
         DEBUG(dbgs() << ':' << VNI->id);
-        Edit->get(RegIdx)->addRange(LiveRange(Start, End, VNI));
-      } else
+        LI->addRange(LiveRange(Start, End, VNI));
+        Start = End;
+        continue;
+      }
+
+      // Skip rematerialized values, we need to use extendRange() and
+      // extendPHIKillRanges() to completely recompute the live ranges.
+      if (Edit->didRematerialize(ParentVNI)) {
+        DEBUG(dbgs() << "(remat)");
         Skipped = true;
+        Start = End;
+        continue;
+      }
+
+      // Initialize the live-out cache the first time it is needed.
+      if (LiveOutSeen.empty()) {
+        unsigned N = VRM.getMachineFunction().getNumBlockIDs();
+        LiveOutSeen.resize(N);
+        LiveOutCache.resize(N);
+      }
+
+      // This value has multiple defs in RegIdx, but it wasn't rematerialized,
+      // so the live range is accurate. Add live-in blocks in [Start;End) to the
+      // LiveInBlocks.
+      MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start);
+      SlotIndex BlockStart, BlockEnd;
+      tie(BlockStart, BlockEnd) = LIS.getSlotIndexes()->getMBBRange(MBB);
+
+      // The first block may be live-in, or it may have its own def.
+      if (Start != BlockStart) {
+        VNInfo *VNI = LI->extendInBlock(BlockStart,
+                                        std::min(BlockEnd, End).getPrevSlot());
+        assert(VNI && "Missing def for complex mapped value");
+        DEBUG(dbgs() << ':' << VNI->id << "*BB#" << MBB->getNumber());
+        // MBB has its own def. Is it also live-out?
+        if (BlockEnd <= End) {
+          LiveOutSeen.set(MBB->getNumber());
+          LiveOutCache[MBB] = LiveOutPair(VNI, MDT[MBB]);
+        }
+        // Skip to the next block for live-in.
+        ++MBB;
+        BlockStart = BlockEnd;
+      }
+
+      // Handle the live-in blocks covered by [Start;End).
+      assert(Start <= BlockStart && "Expected live-in block");
+      while (BlockStart < End) {
+        DEBUG(dbgs() << ">BB#" << MBB->getNumber());
+        BlockEnd = LIS.getMBBEndIdx(MBB);
+        if (BlockStart == ParentVNI->def) {
+          // This block has the def of a parent PHI, so it isn't live-in.
+          assert(ParentVNI->isPHIDef() && "Non-phi defined at block start?");
+          VNInfo *VNI = LI->extendInBlock(BlockStart,
+                                         std::min(BlockEnd, End).getPrevSlot());
+          assert(VNI && "Missing def for complex mapped parent PHI");
+          if (End >= BlockEnd) {
+            // Live-out as well.
+            LiveOutSeen.set(MBB->getNumber());
+            LiveOutCache[MBB] = LiveOutPair(VNI, MDT[MBB]);
+          }
+        } else {
+          // This block needs a live-in value.
+          LiveInBlocks.push_back(MDT[MBB]);
+          // The last block covered may not be live-out.
+          if (End < BlockEnd)
+            LiveInBlocks.back().Kill = End;
+          else {
+            // Live-out, but we need updateSSA to tell us the value.
+            LiveOutSeen.set(MBB->getNumber());
+            LiveOutCache[MBB] = LiveOutPair((VNInfo*)0,
+                                            (MachineDomTreeNode*)0);
+          }
+        }
+        BlockStart = BlockEnd;
+        ++MBB;
+      }
       Start = End;
     } while (Start != ParentI->end);
     DEBUG(dbgs() << '\n');
   }
+
+  if (!LiveInBlocks.empty())
+    updateSSA();
+
   return Skipped;
 }
 
@@ -835,8 +981,7 @@ void SplitEditor::deleteRematVictims() {
   Edit->eliminateDeadDefs(Dead, LIS, VRM, TII);
 }
 
-void SplitEditor::finish() {
-  assert(OpenIdx == 0 && "Previous LI not closed before rewrite");
+void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
   ++NumFinished;
 
   // At this point, the live intervals in Edit contain VNInfos corresponding to
@@ -866,24 +1011,31 @@ void SplitEditor::finish() {
     assert((*I)->hasAtLeastOneValue() && "Split interval has no value");
 #endif
 
-  // Transfer the simply mapped values, check if any are complex.
-  bool Complex = transferSimpleValues();
-  if (Complex)
+  // Transfer the simply mapped values, check if any are skipped.
+  bool Skipped = transferValues();
+  if (Skipped)
     extendPHIKillRanges();
   else
     ++NumSimple;
 
   // Rewrite virtual registers, possibly extending ranges.
-  rewriteAssigned(Complex);
+  rewriteAssigned(Skipped);
 
   // Delete defs that were rematted everywhere.
-  if (Complex)
+  if (Skipped)
     deleteRematVictims();
 
   // Get rid of unused values and set phi-kill flags.
   for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I)
     (*I)->RenumberValues(LIS);
 
+  // Provide a reverse mapping from original indices to Edit ranges.
+  if (LRMap) {
+    LRMap->clear();
+    for (unsigned i = 0, e = Edit->size(); i != e; ++i)
+      LRMap->push_back(i);
+  }
+
   // Now check if any registers were separated into multiple components.
   ConnectedVNInfoEqClasses ConEQ(LIS);
   for (unsigned i = 0, e = Edit->size(); i != e; ++i) {
@@ -895,13 +1047,18 @@ void SplitEditor::finish() {
     DEBUG(dbgs() << "  " << NumComp << " components: " << *li << '\n');
     SmallVector<LiveInterval*, 8> dups;
     dups.push_back(li);
-    for (unsigned i = 1; i != NumComp; ++i)
+    for (unsigned j = 1; j != NumComp; ++j)
       dups.push_back(&Edit->create(LIS, VRM));
     ConEQ.Distribute(&dups[0], MRI);
+    // The new intervals all map back to i.
+    if (LRMap)
+      LRMap->resize(Edit->size(), i);
   }
 
   // Calculate spill weight and allocation hints for new intervals.
   Edit->calculateRegClassAndHint(VRM.getMachineFunction(), LIS, SA.Loops);
+
+  assert(!LRMap || LRMap->size() == Edit->size());
 }
 
 
@@ -925,6 +1082,21 @@ bool SplitAnalysis::getMultiUseBlocks(BlockPtrSet &Blocks) {
   return !Blocks.empty();
 }
 
+void SplitEditor::splitSingleBlock(const SplitAnalysis::BlockInfo &BI) {
+  openIntv();
+  SlotIndex LastSplitPoint = SA.getLastSplitPoint(BI.MBB->getNumber());
+  SlotIndex SegStart = enterIntvBefore(std::min(BI.FirstUse,
+    LastSplitPoint));
+  if (!BI.LiveOut || BI.LastUse < LastSplitPoint) {
+    useIntv(SegStart, leaveIntvAfter(BI.LastUse));
+  } else {
+      // The last use is after the last valid split point.
+    SlotIndex SegStop = leaveIntvBefore(LastSplitPoint);
+    useIntv(SegStart, SegStop);
+    overlapIntv(SegStop, BI.LastUse);
+  }
+}
+
 /// splitSingleBlocks - Split CurLI into a separate live interval inside each
 /// basic block in Blocks.
 void SplitEditor::splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks) {
@@ -932,22 +1104,8 @@ void SplitEditor::splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks) {
   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA.getUseBlocks();
   for (unsigned i = 0; i != UseBlocks.size(); ++i) {
     const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
-    if (!Blocks.count(BI.MBB))
-      continue;
-
-    openIntv();
-    SlotIndex LastSplitPoint = SA.getLastSplitPoint(BI.MBB->getNumber());
-    SlotIndex SegStart = enterIntvBefore(std::min(BI.FirstUse,
-                                                  LastSplitPoint));
-    if (!BI.LiveOut || BI.LastUse < LastSplitPoint) {
-      useIntv(SegStart, leaveIntvAfter(BI.LastUse));
-    } else {
-      // The last use is after the last valid split point.
-      SlotIndex SegStop = leaveIntvBefore(LastSplitPoint);
-      useIntv(SegStart, SegStop);
-      overlapIntv(SegStop, BI.LastUse);
-    }
-    closeIntv();
+    if (Blocks.count(BI.MBB))
+      splitSingleBlock(BI);
   }
   finish();
 }
diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h
index 20ac8a1..7174c0b 100644
--- a/lib/CodeGen/SplitKit.h
+++ b/lib/CodeGen/SplitKit.h
@@ -12,6 +12,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#ifndef LLVM_CODEGEN_SPLITKIT_H
+#define LLVM_CODEGEN_SPLITKIT_H
+
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
@@ -60,17 +63,22 @@ public:
   ///  1. |   o---x   | Internal to block. Variable is only live in this block.
   ///  2. |---x       | Live-in, kill.
   ///  3. |       o---| Def, live-out.
-  ///  4. |---x   o---| Live-in, kill, def, live-out.
+  ///  4. |---x   o---| Live-in, kill, def, live-out. Counted by NumGapBlocks.
   ///  5. |---o---o---| Live-through with uses or defs.
-  ///  6. |-----------| Live-through without uses. Transparent.
+  ///  6. |-----------| Live-through without uses. Counted by NumThroughBlocks.
+  ///
+  /// Two BlockInfo entries are created for template 4. One for the live-in
+  /// segment, and one for the live-out segment. These entries look as if the
+  /// block were split in the middle where the live range isn't live.
+  ///
+  /// Live-through blocks without any uses don't get BlockInfo entries. They
+  /// are simply listed in ThroughBlocks instead.
   ///
   struct BlockInfo {
     MachineBasicBlock *MBB;
     SlotIndex FirstUse;   ///< First instr using current reg.
     SlotIndex LastUse;    ///< Last instr using current reg.
-    SlotIndex Kill;       ///< Interval end point inside block.
-    SlotIndex Def;        ///< Interval start point inside block.
-    bool LiveThrough;     ///< Live in whole block (Templ 5. or 6. above).
+    bool LiveThrough;     ///< Live in whole block (Templ 5. above).
     bool LiveIn;          ///< Current reg is live in.
     bool LiveOut;         ///< Current reg is live out.
   };
@@ -88,8 +96,18 @@ private:
   /// UseBlocks - Blocks where CurLI has uses.
   SmallVector<BlockInfo, 8> UseBlocks;
 
+  /// NumGapBlocks - Number of duplicate entries in UseBlocks for blocks where
+  /// the live range has a gap.
+  unsigned NumGapBlocks;
+
   /// ThroughBlocks - Block numbers where CurLI is live through without uses.
-  SmallVector<unsigned, 8> ThroughBlocks;
+  BitVector ThroughBlocks;
+
+  /// NumThroughBlocks - Number of live-through blocks.
+  unsigned NumThroughBlocks;
+
+  /// DidRepairRange - analyze was forced to shrinkToUses().
+  bool DidRepairRange;
 
   SlotIndex computeLastSplitPoint(unsigned Num);
 
@@ -107,6 +125,11 @@ public:
   /// split.
   void analyze(const LiveInterval *li);
 
+  /// didRepairRange() - Returns true if CurLI was invalid and has been repaired
+  /// by analyze(). This really shouldn't happen, but sometimes the coalescer
+  /// can create live ranges that end in mid-air.
+  bool didRepairRange() const { return DidRepairRange; }
+
   /// clear - clear all data structures so SplitAnalysis is ready to analyze a
   /// new interval.
   void clear();
@@ -133,11 +156,26 @@ public:
 
   /// getUseBlocks - Return an array of BlockInfo objects for the basic blocks
   /// where CurLI has uses.
-  ArrayRef<BlockInfo> getUseBlocks() { return UseBlocks; }
+  ArrayRef<BlockInfo> getUseBlocks() const { return UseBlocks; }
+
+  /// getNumThroughBlocks - Return the number of through blocks.
+  unsigned getNumThroughBlocks() const { return NumThroughBlocks; }
+
+  /// isThroughBlock - Return true if CurLI is live through MBB without uses.
+  bool isThroughBlock(unsigned MBB) const { return ThroughBlocks.test(MBB); }
+
+  /// getThroughBlocks - Return the set of through blocks.
+  const BitVector &getThroughBlocks() const { return ThroughBlocks; }
+
+  /// getNumLiveBlocks - Return the number of blocks where CurLI is live.
+  unsigned getNumLiveBlocks() const {
+    return getUseBlocks().size() - NumGapBlocks + getNumThroughBlocks();
+  }
 
-  /// getThroughBlocks - Return an array of block numbers where CurLI is live
-  /// through without uses.
-  ArrayRef<unsigned> getThroughBlocks() { return ThroughBlocks; }
+  /// countLiveBlocks - Return the number of blocks where li is live. This is
+  /// guaranteed to return the same number as getNumLiveBlocks() after calling
+  /// analyze(li).
+  unsigned countLiveBlocks(const LiveInterval *li) const;
 
   typedef SmallPtrSet<const MachineBasicBlock*, 16> BlockPtrSet;
 
@@ -223,6 +261,30 @@ class SplitEditor {
   // entry in LiveOutCache.
   BitVector LiveOutSeen;
 
+  /// LiveInBlock - Info for updateSSA() about a block where a register is
+  /// live-in.
+  /// The updateSSA caller provides DomNode and Kill inside MBB, updateSSA()
+  /// adds the computed live-in value.
+  struct LiveInBlock {
+    // Dominator tree node for the block.
+    // Cleared by updateSSA when the final value has been determined.
+    MachineDomTreeNode *DomNode;
+
+    // Live-in value filled in by updateSSA once it is known.
+    VNInfo *Value;
+
+    // Position in block where the live-in range ends, or SlotIndex() if the
+    // range passes through the block.
+    SlotIndex Kill;
+
+    LiveInBlock(MachineDomTreeNode *node) : DomNode(node), Value(0) {}
+  };
+
+  /// LiveInBlocks - List of live-in blocks used by findReachingDefs() and
+  /// updateSSA(). This list is usually empty, it exists here to avoid frequent
+  /// reallocations.
+  SmallVector<LiveInBlock, 16> LiveInBlocks;
+
   /// defValue - define a value in RegIdx from ParentVNI at Idx.
   /// Idx does not have to be ParentVNI->def, but it must be contained within
   /// ParentVNI's live range in ParentLI. The new value is added to the value
@@ -246,17 +308,22 @@ class SplitEditor {
   /// Insert PHIDefs as needed to preserve SSA form.
   void extendRange(unsigned RegIdx, SlotIndex Idx);
 
-  /// updateSSA - Insert PHIDefs as necessary and update LiveOutCache such that
-  /// Edit.get(RegIdx) is live-in to all the blocks in LiveIn.
-  /// Return the value that is eventually live-in to IdxMBB.
-  VNInfo *updateSSA(unsigned RegIdx,
-                    SmallVectorImpl<MachineDomTreeNode*> &LiveIn,
-                    SlotIndex Idx,
-                    const MachineBasicBlock *IdxMBB);
+  /// findReachingDefs - Starting from MBB, add blocks to LiveInBlocks until all
+  /// reaching defs for LI are found.
+  /// @param LI   Live interval whose value is needed.
+  /// @param MBB  Block where LI should be live-in.
+  /// @param Kill Kill point in MBB.
+  /// @return Unique value seen, or NULL.
+  VNInfo *findReachingDefs(LiveInterval *LI, MachineBasicBlock *MBB,
+                           SlotIndex Kill);
 
-  /// transferSimpleValues - Transfer simply defined values to the new ranges.
-  /// Return true if any complex ranges were skipped.
-  bool transferSimpleValues();
+  /// updateSSA - Compute and insert PHIDefs such that all blocks in
+  // LiveInBlocks get a known live-in value. Add live ranges to the blocks.
+  void updateSSA();
+
+  /// transferValues - Transfer values to the new ranges.
+  /// Return true if any ranges were skipped.
+  bool transferValues();
 
   /// extendPHIKillRanges - Extend the ranges of all values killed by original
   /// parent PHIDefs.
@@ -278,7 +345,15 @@ public:
   void reset(LiveRangeEdit&);
 
   /// Create a new virtual register and live interval.
-  void openIntv();
+  /// Return the interval index, starting from 1. Interval index 0 is the
+  /// implicit complement interval.
+  unsigned openIntv();
+
+  /// currentIntv - Return the current interval index.
+  unsigned currentIntv() const { return OpenIdx; }
+
+  /// selectIntv - Select a previously opened interval index.
+  void selectIntv(unsigned Idx);
 
   /// enterIntvBefore - Enter the open interval before the instruction at Idx.
   /// If the parent interval is not live before Idx, a COPY is not inserted.
@@ -321,22 +396,28 @@ public:
   ///
   void overlapIntv(SlotIndex Start, SlotIndex End);
 
-  /// closeIntv - Indicate that we are done editing the currently open
-  /// LiveInterval, and ranges can be trimmed.
-  void closeIntv();
-
   /// finish - after all the new live ranges have been created, compute the
   /// remaining live range, and rewrite instructions to use the new registers.
-  void finish();
+  /// @param LRMap When not null, this vector will map each live range in Edit
+  ///              back to the indices returned by openIntv.
+  ///              There may be extra indices created by dead code elimination.
+  void finish(SmallVectorImpl<unsigned> *LRMap = 0);
 
   /// dump - print the current interval maping to dbgs().
   void dump() const;
 
   // ===--- High level methods ---===
 
+  /// splitSingleBlock - Split CurLI into a separate live interval around the
+  /// uses in a single block. This is intended to be used as part of a larger
+  /// split, and doesn't call finish().
+  void splitSingleBlock(const SplitAnalysis::BlockInfo &BI);
+
   /// splitSingleBlocks - Split CurLI into a separate live interval inside each
   /// basic block in Blocks.
   void splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks);
 };
 
 }
+
+#endif
diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp
index ec7829e..227eb47 100644
--- a/lib/CodeGen/StrongPHIElimination.cpp
+++ b/lib/CodeGen/StrongPHIElimination.cpp
@@ -587,7 +587,7 @@ StrongPHIElimination::SplitInterferencesForBasicBlock(
   }
 
   // We now walk the PHIs in successor blocks and check for interferences. This
-  // is necesary because the use of a PHI's operands are logically contained in
+  // is necessary because the use of a PHI's operands are logically contained in
   // the predecessor block. The def of a PHI's destination register is processed
   // along with the other defs in a basic block.
 
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index 04d3d31..90cb72f 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -34,6 +34,7 @@ STATISTIC(NumTails     , "Number of tails duplicated");
 STATISTIC(NumTailDups  , "Number of tail duplicated blocks");
 STATISTIC(NumInstrDups , "Additional instructions due to tail duplication");
 STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
+STATISTIC(NumAddedPHIs , "Number of phis added");
 
 // Heuristic for tail duplication.
 static cl::opt<unsigned>
@@ -80,16 +81,21 @@ namespace {
     void ProcessPHI(MachineInstr *MI, MachineBasicBlock *TailBB,
                     MachineBasicBlock *PredBB,
                     DenseMap<unsigned, unsigned> &LocalVRMap,
-                    SmallVector<std::pair<unsigned,unsigned>, 4> &Copies);
+                    SmallVector<std::pair<unsigned,unsigned>, 4> &Copies,
+                    const DenseSet<unsigned> &UsedByPhi,
+                    bool Remove);
     void DuplicateInstruction(MachineInstr *MI,
                               MachineBasicBlock *TailBB,
                               MachineBasicBlock *PredBB,
                               MachineFunction &MF,
-                              DenseMap<unsigned, unsigned> &LocalVRMap);
+                              DenseMap<unsigned, unsigned> &LocalVRMap,
+                              const DenseSet<unsigned> &UsedByPhi);
     void UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
                               SmallVector<MachineBasicBlock*, 8> &TDBBs,
                               SmallSetVector<MachineBasicBlock*, 8> &Succs);
     bool TailDuplicateBlocks(MachineFunction &MF);
+    bool shouldTailDuplicate(const MachineFunction &MF,
+                             MachineBasicBlock &TailBB);
     bool TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
                        SmallVector<MachineBasicBlock*, 8> &TDBBs,
                        SmallVector<MachineInstr*, 16> &Copies);
@@ -146,11 +152,11 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
       for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
         MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB();
         if (CheckExtra && !Preds.count(PHIBB)) {
-          // This is not a hard error.
           dbgs() << "Warning: malformed PHI in BB#" << MBB->getNumber()
                  << ": " << *MI;
           dbgs() << "  extra input from predecessor BB#"
                  << PHIBB->getNumber() << '\n';
+          llvm_unreachable(0);
         }
         if (PHIBB->getNumber() < 0) {
           dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
@@ -183,10 +189,6 @@ bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) {
     if (NumTails == TailDupLimit)
       break;
 
-    // Only duplicate blocks that end with unconditional branches.
-    if (MBB->canFallThrough())
-      continue;
-
     // Save the successors list.
     SmallSetVector<MachineBasicBlock*, 8> Succs(MBB->succ_begin(),
                                                 MBB->succ_end());
@@ -240,7 +242,7 @@ bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) {
             MachineOperand &UseMO = UI.getOperand();
             MachineInstr *UseMI = &*UI;
             ++UI;
-            if (UseMI->getParent() == DefBB)
+            if (UseMI->getParent() == DefBB && !UseMI->isPHI())
               continue;
             SSAUpdate.RewriteUse(UseMO);
           }
@@ -271,6 +273,7 @@ bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) {
       MadeChange = true;
     }
   }
+  NumAddedPHIs += NewPHIs.size();
 
   return MadeChange;
 }
@@ -293,6 +296,24 @@ static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) {
   return 0;
 }
 
+
+// Remember which registers are used by phis in this block. This is
+// used to determine which registers are liveout while modifying the
+// block (which is why we need to copy the information).
+static void getRegsUsedByPHIs(const MachineBasicBlock &BB,
+                              DenseSet<unsigned> *UsedByPhi) {
+  for(MachineBasicBlock::const_iterator I = BB.begin(), E = BB.end();
+      I != E; ++I) {
+    const MachineInstr &MI = *I;
+    if (!MI.isPHI())
+      break;
+    for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
+      unsigned SrcReg = MI.getOperand(i).getReg();
+      UsedByPhi->insert(SrcReg);
+    }
+  }
+}
+
 /// AddSSAUpdateEntry - Add a definition and source virtual registers pair for
 /// SSA update.
 void TailDuplicatePass::AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
@@ -315,7 +336,9 @@ void TailDuplicatePass::ProcessPHI(MachineInstr *MI,
                                    MachineBasicBlock *TailBB,
                                    MachineBasicBlock *PredBB,
                                    DenseMap<unsigned, unsigned> &LocalVRMap,
-                         SmallVector<std::pair<unsigned,unsigned>, 4> &Copies) {
+                           SmallVector<std::pair<unsigned,unsigned>, 4> &Copies,
+                                   const DenseSet<unsigned> &RegsUsedByPhi,
+                                   bool Remove) {
   unsigned DefReg = MI->getOperand(0).getReg();
   unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB);
   assert(SrcOpIdx && "Unable to find matching PHI source?");
@@ -327,9 +350,12 @@ void TailDuplicatePass::ProcessPHI(MachineInstr *MI,
   // available value liveout of the block.
   unsigned NewDef = MRI->createVirtualRegister(RC);
   Copies.push_back(std::make_pair(NewDef, SrcReg));
-  if (isDefLiveOut(DefReg, TailBB, MRI))
+  if (isDefLiveOut(DefReg, TailBB, MRI) || RegsUsedByPhi.count(DefReg))
     AddSSAUpdateEntry(DefReg, NewDef, PredBB);
 
+  if (!Remove)
+    return;
+
   // Remove PredBB from the PHI node.
   MI->RemoveOperand(SrcOpIdx+1);
   MI->RemoveOperand(SrcOpIdx);
@@ -343,7 +369,8 @@ void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI,
                                      MachineBasicBlock *TailBB,
                                      MachineBasicBlock *PredBB,
                                      MachineFunction &MF,
-                                     DenseMap<unsigned, unsigned> &LocalVRMap) {
+                                     DenseMap<unsigned, unsigned> &LocalVRMap,
+                                     const DenseSet<unsigned> &UsedByPhi) {
   MachineInstr *NewMI = TII->duplicate(MI, MF);
   for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
     MachineOperand &MO = NewMI->getOperand(i);
@@ -357,7 +384,7 @@ void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI,
       unsigned NewReg = MRI->createVirtualRegister(RC);
       MO.setReg(NewReg);
       LocalVRMap.insert(std::make_pair(Reg, NewReg));
-      if (isDefLiveOut(Reg, TailBB, MRI))
+      if (isDefLiveOut(Reg, TailBB, MRI) || UsedByPhi.count(Reg))
         AddSSAUpdateEntry(Reg, NewReg, PredBB);
     } else {
       DenseMap<unsigned, unsigned>::iterator VI = LocalVRMap.find(Reg);
@@ -416,6 +443,13 @@ TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
         // This register is defined in the tail block.
         for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
           MachineBasicBlock *SrcBB = LI->second[j].first;
+          // If we didn't duplicate a bb into a particular predecessor, we
+          // might still have added an entry to SSAUpdateVals to correcly
+          // recompute SSA. If that case, avoid adding a dummy extra argument
+          // this PHI.
+          if (!SrcBB->isSuccessor(SuccBB))
+            continue;
+
           unsigned SrcReg = LI->second[j].second;
           if (Idx != 0) {
             II->getOperand(Idx).setReg(SrcReg);
@@ -448,14 +482,19 @@ TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
   }
 }
 
-/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each
-/// of its predecessors.
+/// shouldTailDuplicate - Determine if it is profitable to duplicate this block.
 bool
-TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
-                                 SmallVector<MachineBasicBlock*, 8> &TDBBs,
-                                 SmallVector<MachineInstr*, 16> &Copies) {
-  // Set the limit on the number of instructions to duplicate, with a default
-  // of one less than the tail-merge threshold. When optimizing for size,
+TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
+                                       MachineBasicBlock &TailBB) {
+  // Only duplicate blocks that end with unconditional branches.
+  if (TailBB.canFallThrough())
+    return false;
+
+  // Don't try to tail-duplicate single-block loops.
+  if (TailBB.isSuccessor(&TailBB))
+    return false;
+
+  // Set the limit on the cost to duplicate. When optimizing for size,
   // duplicate only one, because one branch instruction can be eliminated to
   // compensate for the duplication.
   unsigned MaxDuplicateCount;
@@ -465,49 +504,56 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
   else
     MaxDuplicateCount = TailDuplicateSize;
 
-  if (PreRegAlloc) {
-    if (TailBB->empty())
-      return false;
-    const TargetInstrDesc &TID = TailBB->back().getDesc();
-    // Pre-regalloc tail duplication hurts compile time and doesn't help
-    // much except for indirect branches and returns.
-    if (!TID.isIndirectBranch() && !TID.isReturn())
-      return false;
-    // If the target has hardware branch prediction that can handle indirect
-    // branches, duplicating them can often make them predictable when there
-    // are common paths through the code.  The limit needs to be high enough
-    // to allow undoing the effects of tail merging and other optimizations
-    // that rearrange the predecessors of the indirect branch.
-    MaxDuplicateCount = 20;
-  }
+  // If the target has hardware branch prediction that can handle indirect
+  // branches, duplicating them can often make them predictable when there
+  // are common paths through the code.  The limit needs to be high enough
+  // to allow undoing the effects of tail merging and other optimizations
+  // that rearrange the predecessors of the indirect branch.
 
-  // Don't try to tail-duplicate single-block loops.
-  if (TailBB->isSuccessor(TailBB))
-    return false;
+  if (PreRegAlloc && !TailBB.empty()) {
+    const TargetInstrDesc &TID = TailBB.back().getDesc();
+    if (TID.isIndirectBranch())
+      MaxDuplicateCount = 20;
+  }
 
   // Check the instructions in the block to determine whether tail-duplication
   // is invalid or unlikely to be profitable.
   unsigned InstrCount = 0;
-  bool HasCall = false;
-  for (MachineBasicBlock::iterator I = TailBB->begin();
-       I != TailBB->end(); ++I) {
+  for (MachineBasicBlock::const_iterator I = TailBB.begin(); I != TailBB.end();
+       ++I) {
     // Non-duplicable things shouldn't be tail-duplicated.
-    if (I->getDesc().isNotDuplicable()) return false;
+    if (I->getDesc().isNotDuplicable())
+      return false;
+
     // Do not duplicate 'return' instructions if this is a pre-regalloc run.
     // A return may expand into a lot more instructions (e.g. reload of callee
     // saved registers) after PEI.
-    if (PreRegAlloc && I->getDesc().isReturn()) return false;
-    // Don't duplicate more than the threshold.
-    if (InstrCount == MaxDuplicateCount) return false;
-    // Remember if we saw a call.
-    if (I->getDesc().isCall()) HasCall = true;
+    if (PreRegAlloc && I->getDesc().isReturn())
+      return false;
+
+    // Avoid duplicating calls before register allocation. Calls presents a
+    // barrier to register allocation so duplicating them may end up increasing
+    // spills.
+    if (PreRegAlloc && I->getDesc().isCall())
+      return false;
+
     if (!I->isPHI() && !I->isDebugValue())
       InstrCount += 1;
+
+    if (InstrCount > MaxDuplicateCount)
+      return false;
   }
-  // Don't tail-duplicate calls before register allocation. Calls presents a
-  // barrier to register allocation so duplicating them may end up increasing
-  // spills.
-  if (InstrCount > 1 && (PreRegAlloc && HasCall))
+
+  return true;
+}
+
+/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each
+/// of its predecessors.
+bool
+TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
+                                 SmallVector<MachineBasicBlock*, 8> &TDBBs,
+                                 SmallVector<MachineInstr*, 16> &Copies) {
+  if (!shouldTailDuplicate(MF, *TailBB))
     return false;
 
   DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');
@@ -518,13 +564,17 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
   bool Changed = false;
   SmallSetVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(),
                                               TailBB->pred_end());
+  DenseSet<unsigned> UsedByPhi;
+  getRegsUsedByPHIs(*TailBB, &UsedByPhi);
   for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
        PE = Preds.end(); PI != PE; ++PI) {
     MachineBasicBlock *PredBB = *PI;
 
     assert(TailBB != PredBB &&
            "Single-block loop should have been rejected earlier!");
-    if (PredBB->succ_size() > 1) continue;
+    // EH edges are ignored by AnalyzeBranch.
+    if (PredBB->succ_size() > 1)
+      continue;
 
     MachineBasicBlock *PredTBB, *PredFBB;
     SmallVector<MachineOperand, 4> PredCond;
@@ -532,9 +582,6 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
       continue;
     if (!PredCond.empty())
       continue;
-    // EH edges are ignored by AnalyzeBranch.
-    if (PredBB->succ_size() != 1)
-      continue;
     // Don't duplicate into a fall-through predecessor (at least for now).
     if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough())
       continue;
@@ -557,11 +604,11 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
       if (MI->isPHI()) {
         // Replace the uses of the def of the PHI with the register coming
         // from PredBB.
-        ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos);
+        ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true);
       } else {
         // Replace def of virtual registers with new registers, and update
         // uses with PHI source register or the new registers.
-        DuplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap);
+        DuplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap, UsedByPhi);
       }
     }
     MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator();
@@ -570,6 +617,10 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
                                TII->get(TargetOpcode::COPY),
                                CopyInfos[i].first).addReg(CopyInfos[i].second));
     }
+
+    // Simplify
+    TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true);
+
     NumInstrDups += TailBB->size() - 1; // subtract one for removed branch
 
     // Update the CFG.
@@ -590,12 +641,11 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
   MachineBasicBlock *PrevBB = prior(MachineFunction::iterator(TailBB));
   MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0;
   SmallVector<MachineOperand, 4> PriorCond;
-  bool PriorUnAnalyzable =
-    TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true);
   // This has to check PrevBB->succ_size() because EH edges are ignored by
   // AnalyzeBranch.
-  if (!PriorUnAnalyzable && PriorCond.empty() && !PriorTBB &&
-      TailBB->pred_size() == 1 && PrevBB->succ_size() == 1 &&
+  if (PrevBB->succ_size() == 1 && 
+      !TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) &&
+      PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 &&
       !TailBB->hasAddressTaken()) {
     DEBUG(dbgs() << "\nMerging into block: " << *PrevBB
           << "From MBB: " << *TailBB);
@@ -608,7 +658,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
         // Replace the uses of the def of the PHI with the register coming
         // from PredBB.
         MachineInstr *MI = &*I++;
-        ProcessPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos);
+        ProcessPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, true);
         if (MI->getParent())
           MI->eraseFromParent();
       }
@@ -618,7 +668,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
         // Replace def of virtual registers with new registers, and update
         // uses with PHI source register or the new registers.
         MachineInstr *MI = &*I++;
-        DuplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap);
+        DuplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap, UsedByPhi);
         MI->eraseFromParent();
       }
       MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator();
@@ -639,6 +689,57 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
     Changed = true;
   }
 
+  // If this is after register allocation, there are no phis to fix.
+  if (!PreRegAlloc)
+    return Changed;
+
+  // If we made no changes so far, we are safe.
+  if (!Changed)
+    return Changed;
+
+
+  // Handle the nasty case in that we duplicated a block that is part of a loop
+  // into some but not all of its predecessors. For example:
+  //    1 -> 2 <-> 3                 |
+  //          \                      |
+  //           \---> rest            |
+  // if we duplicate 2 into 1 but not into 3, we end up with
+  // 12 -> 3 <-> 2 -> rest           |
+  //   \             /               |
+  //    \----->-----/                |
+  // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced
+  // with a phi in 3 (which now dominates 2).
+  // What we do here is introduce a copy in 3 of the register defined by the
+  // phi, just like when we are duplicating 2 into 3, but we don't copy any
+  // real instructions or remove the 3 -> 2 edge from the phi in 2.
+  for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
+       PE = Preds.end(); PI != PE; ++PI) {
+    MachineBasicBlock *PredBB = *PI;
+    if (std::find(TDBBs.begin(), TDBBs.end(), PredBB) != TDBBs.end())
+      continue;
+
+    // EH edges
+    if (PredBB->succ_size() != 1)
+      continue;
+
+    DenseMap<unsigned, unsigned> LocalVRMap;
+    SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
+    MachineBasicBlock::iterator I = TailBB->begin();
+    // Process PHI instructions first.
+    while (I != TailBB->end() && I->isPHI()) {
+      // Replace the uses of the def of the PHI with the register coming
+      // from PredBB.
+      MachineInstr *MI = &*I++;
+      ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false);
+    }
+    MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator();
+    for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
+      Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(),
+                               TII->get(TargetOpcode::COPY),
+                               CopyInfos[i].first).addReg(CopyInfos[i].second));
+    }
+  }
+
   return Changed;
 }
 
@@ -655,4 +756,3 @@ void TailDuplicatePass::RemoveDeadBlock(MachineBasicBlock *MBB) {
   // Remove the block.
   MBB->eraseFromParent();
 }
-
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
index 15340a3..34e2b33 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -212,8 +212,7 @@ static const TargetRegisterClass *canFoldCopy(const MachineInstr *MI,
   if (TargetRegisterInfo::isPhysicalRegister(LiveOp.getReg()))
     return RC->contains(LiveOp.getReg()) ? RC : 0;
 
-  const TargetRegisterClass *LiveRC = MRI.getRegClass(LiveReg);
-  if (RC == LiveRC || RC->hasSubClass(LiveRC))
+  if (RC->hasSubClassEq(MRI.getRegClass(LiveReg)))
     return RC;
 
   // FIXME: Allow folding when register classes are memory compatible.
@@ -388,11 +387,6 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
     if (MO.isDef() != (i == 0))
       return false;
 
-    // For the def, it should be the only def of that register.
-    if (MO.isDef() && (llvm::next(MRI.def_begin(Reg)) != MRI.def_end() ||
-                       MRI.isLiveIn(Reg)))
-      return false;
-
     // Don't allow any virtual-register uses. Rematting an instruction with
     // virtual register uses would length the live ranges of the uses, which
     // is not necessarily a good idea, certainly not "trivial".
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index f332d12..2da1bd4 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -23,6 +23,7 @@
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetData.h"
@@ -176,12 +177,59 @@ const MCSection *TargetLoweringObjectFileELF::getEHFrameSection() const {
                                     SectionKind::getDataRel());
 }
 
+MCSymbol *
+TargetLoweringObjectFileELF::getCFIPersonalitySymbol(const GlobalValue *GV,
+                                                     Mangler *Mang,
+                                                MachineModuleInfo *MMI) const {
+  unsigned Encoding = getPersonalityEncoding();
+  switch (Encoding & 0x70) {
+  default:
+    report_fatal_error("We do not support this DWARF encoding yet!");
+  case dwarf::DW_EH_PE_absptr:
+    return  Mang->getSymbol(GV);
+    break;
+  case dwarf::DW_EH_PE_pcrel: {
+    return getContext().GetOrCreateSymbol(StringRef("DW.ref.") +
+                                          Mang->getSymbol(GV)->getName());
+    break;
+  }
+  }
+}
+
+void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer,
+                                                       const TargetMachine &TM,
+                                                       const MCSymbol *Sym) const {
+  SmallString<64> NameData("DW.ref.");
+  NameData += Sym->getName();
+  MCSymbol *Label = getContext().GetOrCreateSymbol(NameData);
+  Streamer.EmitSymbolAttribute(Label, MCSA_Hidden);
+  Streamer.EmitSymbolAttribute(Label, MCSA_Weak);
+  StringRef Prefix = ".data.";
+  NameData.insert(NameData.begin(), Prefix.begin(), Prefix.end());
+  unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::SHF_GROUP;
+  const MCSection *Sec = getContext().getELFSection(NameData,
+                                                    ELF::SHT_PROGBITS,
+                                                    Flags,
+                                                    SectionKind::getDataRel(),
+                                                    0, Label->getName());
+  Streamer.SwitchSection(Sec);
+  Streamer.EmitValueToAlignment(8);
+  Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject);
+  const MCExpr *E = MCConstantExpr::Create(8, getContext());
+  Streamer.EmitELFSize(Label, E);
+  Streamer.EmitLabel(Label);
+
+  unsigned Size = TM.getTargetData()->getPointerSize();
+  Streamer.EmitSymbolValue(Sym, Size);
+}
+
 static SectionKind
 getELFKindForNamedSection(StringRef Name, SectionKind K) {
-  // FIXME: Why is this here? Codegen is should not be in the business
-  // of figuring section flags. If the user wrote section(".eh_frame"),
-  // we should just pass that to MC which will defer to the assembly
-  // or use its default if producing an object file.
+  // N.B.: The defaults used in here are no the same ones used in MC.
+  // We follow gcc, MC follows gas. For example, given ".section .eh_frame",
+  // both gas and MC will produce a section with no flags. Given
+  // section(".eh_frame") gcc will produce
+  // .section	.eh_frame,"a",@progbits
   if (Name.empty() || Name[0] != '.') return K;
 
   // Some lame default implementation based on some magic section names.
@@ -207,9 +255,6 @@ getELFKindForNamedSection(StringRef Name, SectionKind K) {
       Name.startswith(".llvm.linkonce.tb."))
     return SectionKind::getThreadBSS();
 
-  if (Name == ".eh_frame")
-    return SectionKind::getDataRel();
-
   return K;
 }
 
@@ -424,8 +469,7 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
     }
 
     return TargetLoweringObjectFile::
-      getExprForDwarfReference(SSym, Mang, MMI,
-                               Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
+      getExprForDwarfReference(SSym, Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
   }
 
   return TargetLoweringObjectFile::
@@ -438,26 +482,13 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
 
 void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
                                                const TargetMachine &TM) {
-  // _foo.eh symbols are currently always exported so that the linker knows
-  // about them.  This is not necessary on 10.6 and later, but it
-  // doesn't hurt anything.
-  // FIXME: I need to get this from Triple.
-  IsFunctionEHSymbolGlobal = true;
   IsFunctionEHFrameSymbolPrivate = false;
   SupportsWeakOmittedEHFrame = false;
 
+  // .comm doesn't support alignment before Leopard.
   Triple T(((LLVMTargetMachine&)TM).getTargetTriple());
-  if (T.getOS() == Triple::Darwin) {
-    switch (T.getDarwinMajorNumber()) {
-    case 7:  // 10.3 Panther.
-    case 8:  // 10.4 Tiger.
-      CommDirectiveSupportsAlignment = false;
-      break;
-    case 9:   // 10.5 Leopard.
-    case 10:  // 10.6 SnowLeopard.
-      break;
-    }
-  }
+  if (T.isMacOSX() && T.isMacOSXVersionLT(10, 5))
+    CommDirectiveSupportsAlignment = false;
 
   TargetLoweringObjectFile::Initialize(Ctx, TM);
 
@@ -803,14 +834,36 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
     }
 
     return TargetLoweringObjectFile::
-      getExprForDwarfReference(SSym, Mang, MMI,
-                               Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
+      getExprForDwarfReference(SSym, Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
   }
 
   return TargetLoweringObjectFile::
     getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer);
 }
 
+MCSymbol *TargetLoweringObjectFileMachO::
+getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
+                        MachineModuleInfo *MMI) const {
+  // The mach-o version of this method defaults to returning a stub reference.
+  MachineModuleInfoMachO &MachOMMI =
+    MMI->getObjFileInfo<MachineModuleInfoMachO>();
+
+  SmallString<128> Name;
+  Mang->getNameWithPrefix(Name, GV, true);
+  Name += "$non_lazy_ptr";
+
+  // Add information about the stub reference to MachOMMI so that the stub
+  // gets emitted by the asmprinter.
+  MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str());
+  MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym);
+  if (StubSym.getPointer() == 0) {
+    MCSymbol *Sym = Mang->getSymbol(GV);
+    StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
+  }
+
+  return SSym;
+}
+
 unsigned TargetLoweringObjectFileMachO::getPersonalityEncoding() const {
   return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
 }
@@ -819,7 +872,7 @@ unsigned TargetLoweringObjectFileMachO::getLSDAEncoding() const {
   return DW_EH_PE_pcrel;
 }
 
-unsigned TargetLoweringObjectFileMachO::getFDEEncoding() const {
+unsigned TargetLoweringObjectFileMachO::getFDEEncoding(bool CFI) const {
   return DW_EH_PE_pcrel;
 }
 
@@ -934,6 +987,20 @@ void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx,
     getContext().getCOFFSection(".drectve",
                                 COFF::IMAGE_SCN_LNK_INFO,
                                 SectionKind::getMetadata());
+
+  PDataSection =
+    getContext().getCOFFSection(".pdata",
+                                COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                                COFF::IMAGE_SCN_MEM_READ |
+                                COFF::IMAGE_SCN_MEM_WRITE,
+                                SectionKind::getDataRel());
+
+  XDataSection =
+    getContext().getCOFFSection(".xdata",
+                                COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                                COFF::IMAGE_SCN_MEM_READ |
+                                COFF::IMAGE_SCN_MEM_WRITE,
+                                SectionKind::getDataRel());
 }
 
 const MCSection *TargetLoweringObjectFileCOFF::getEHFrameSection() const {
@@ -944,6 +1011,28 @@ const MCSection *TargetLoweringObjectFileCOFF::getEHFrameSection() const {
                                      SectionKind::getDataRel());
 }
 
+const MCSection *TargetLoweringObjectFileCOFF::getWin64EHFuncTableSection(
+                                                       StringRef suffix) const {
+  if (suffix == "")
+    return PDataSection;
+  return getContext().getCOFFSection((".pdata"+suffix).str(),
+                                     COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                                     COFF::IMAGE_SCN_MEM_READ |
+                                     COFF::IMAGE_SCN_MEM_WRITE,
+                                     SectionKind::getDataRel());
+}
+
+const MCSection *TargetLoweringObjectFileCOFF::getWin64EHTableSection(
+                                                       StringRef suffix) const {
+  if (suffix == "")
+    return XDataSection;
+  return getContext().getCOFFSection((".xdata"+suffix).str(),
+                                     COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                                     COFF::IMAGE_SCN_MEM_READ |
+                                     COFF::IMAGE_SCN_MEM_WRITE,
+                                     SectionKind::getDataRel());
+}
+
 
 static unsigned
 getCOFFSectionFlags(SectionKind K) {
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 52ea872..f54d879 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1125,6 +1125,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
             break; // The tied operands have been eliminated.
         }
 
+        bool IsEarlyClobber = false;
         bool RemovedKillFlag = false;
         bool AllUsesCopied = true;
         unsigned LastCopiedReg = 0;
@@ -1132,7 +1133,11 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
         for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) {
           unsigned SrcIdx = TiedPairs[tpi].first;
           unsigned DstIdx = TiedPairs[tpi].second;
-          unsigned regA = mi->getOperand(DstIdx).getReg();
+
+          const MachineOperand &DstMO = mi->getOperand(DstIdx);
+          unsigned regA = DstMO.getReg();
+          IsEarlyClobber |= DstMO.isEarlyClobber();
+
           // Grab regB from the instruction because it may have changed if the
           // instruction was commuted.
           regB = mi->getOperand(SrcIdx).getReg();
@@ -1196,15 +1201,17 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
         }
 
         if (AllUsesCopied) {
-          // Replace other (un-tied) uses of regB with LastCopiedReg.
-          for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
-            MachineOperand &MO = mi->getOperand(i);
-            if (MO.isReg() && MO.getReg() == regB && MO.isUse()) {
-              if (MO.isKill()) {
-                MO.setIsKill(false);
-                RemovedKillFlag = true;
+          if (!IsEarlyClobber) {
+            // Replace other (un-tied) uses of regB with LastCopiedReg.
+            for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
+              MachineOperand &MO = mi->getOperand(i);
+              if (MO.isReg() && MO.getReg() == regB && MO.isUse()) {
+                if (MO.isKill()) {
+                  MO.setIsKill(false);
+                  RemovedKillFlag = true;
+                }
+                MO.setReg(LastCopiedReg);
               }
-              MO.setReg(LastCopiedReg);
             }
           }
 
diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp
index 48d8ab1..52693f0 100644
--- a/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/lib/CodeGen/UnreachableBlockElim.cpp
@@ -196,8 +196,11 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
         temp->eraseFromParent();
         ModifiedPHI = true;
 
-        if (Input != Output)
-          F.getRegInfo().replaceRegWith(Output, Input);
+        if (Input != Output) {
+          MachineRegisterInfo &MRI = F.getRegInfo();
+          MRI.constrainRegClass(Input, MRI.getRegClass(Output));
+          MRI.replaceRegWith(Output, Input);
+        }
 
         continue;
       }
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index 7a7ea69..7557979 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -42,6 +42,7 @@
 using namespace llvm;
 
 STATISTIC(NumSpills  , "Number of register spills");
+STATISTIC(NumIdCopies, "Number of identity moves eliminated after rewriting");
 
 //===----------------------------------------------------------------------===//
 //  VirtRegMap implementation
@@ -260,6 +261,8 @@ void VirtRegMap::rewrite(SlotIndexes *Indexes) {
                << "********** Function: "
                << MF->getFunction()->getName() << '\n');
   DEBUG(dump());
+  SmallVector<unsigned, 8> SuperDeads;
+  SmallVector<unsigned, 8> SuperDefs;
   SmallVector<unsigned, 8> SuperKills;
 
   for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
@@ -283,12 +286,13 @@ void VirtRegMap::rewrite(SlotIndexes *Indexes) {
         if (MO.getSubReg()) {
           // A virtual register kill refers to the whole register, so we may
           // have to add <imp-use,kill> operands for the super-register.
-          if (MO.isUse() && MO.isKill() && !MO.isUndef())
-            SuperKills.push_back(PhysReg);
-
-          // We don't have to deal with sub-register defs because
-          // LiveIntervalAnalysis already added the necessary <imp-def>
-          // operands.
+          if (MO.isUse()) {
+            if (MO.isKill() && !MO.isUndef())
+              SuperKills.push_back(PhysReg);
+          } else if (MO.isDead())
+            SuperDeads.push_back(PhysReg);
+          else
+            SuperDefs.push_back(PhysReg);
 
           // PhysReg operands cannot have subregister indexes.
           PhysReg = TRI->getSubReg(PhysReg, MO.getSubReg());
@@ -305,10 +309,17 @@ void VirtRegMap::rewrite(SlotIndexes *Indexes) {
       while (!SuperKills.empty())
         MI->addRegisterKilled(SuperKills.pop_back_val(), TRI, true);
 
+      while (!SuperDeads.empty())
+        MI->addRegisterDead(SuperDeads.pop_back_val(), TRI, true);
+
+      while (!SuperDefs.empty())
+        MI->addRegisterDefined(SuperDefs.pop_back_val(), TRI);
+
       DEBUG(dbgs() << "> " << *MI);
 
       // Finally, remove any identity copies.
       if (MI->isIdentityCopy()) {
+        ++NumIdCopies;
         if (MI->getNumOperands() == 2) {
           DEBUG(dbgs() << "Deleting identity copy.\n");
           RemoveMachineInstrFromMaps(MI);
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
index 67be1b0..1850658 100644
--- a/lib/CodeGen/VirtRegRewriter.cpp
+++ b/lib/CodeGen/VirtRegRewriter.cpp
@@ -32,7 +32,7 @@ STATISTIC(NumCommutes, "Number of instructions commuted");
 STATISTIC(NumDRM     , "Number of re-materializable defs elided");
 STATISTIC(NumStores  , "Number of stores added");
 STATISTIC(NumPSpills , "Number of physical register spills");
-STATISTIC(NumOmitted , "Number of reloads omited");
+STATISTIC(NumOmitted , "Number of reloads omitted");
 STATISTIC(NumAvoided , "Number of reloads deemed unnecessary");
 STATISTIC(NumCopified, "Number of available reloads turned into copies");
 STATISTIC(NumReMats  , "Number of re-materialization");
@@ -669,7 +669,7 @@ static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI,
   }
 }
 
-/// ReMaterialize - Re-materialize definition for Reg targetting DestReg.
+/// ReMaterialize - Re-materialize definition for Reg targeting DestReg.
 ///
 static void ReMaterialize(MachineBasicBlock &MBB,
                           MachineBasicBlock::iterator &MII,