Merge remote-tracking branch 'upstream/master' into merge-llvm

Conflicts: lib/CodeGen/AsmPrinter/AsmPrinter.cpp lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp lib/MC/MCAssembler.cpp lib/Support/Atomic.cpp lib/Support/Memory.cpp lib/Target/ARM/ARMJITInfo.cpp Change-Id: Ib339baf88df5b04870c8df1bedcfe1f877ccab8d
author: Stephen Hines <srhines@google.com> 2013-01-21 13:15:17 -0800
committer: Stephen Hines <srhines@google.com> 2013-01-21 13:15:17 -0800
commit: 059800f9e3fee2852672f846d91a2da14da7783a (patch)
tree: a6ef16b7263252ae1b8069295ea9cbbae0d9467d /lib/CodeGen
parent: cbefa15de4821975bb99fc6d74b3bdb42b2df45c (diff)
parent: b6714227eda5d499f7667fc865f931126a8dc488 (diff)
download: external_llvm-059800f9e3fee2852672f846d91a2da14da7783a.zip
external_llvm-059800f9e3fee2852672f846d91a2da14da7783a.tar.gz
external_llvm-059800f9e3fee2852672f846d91a2da14da7783a.tar.bz2
154 files changed, 10680 insertions, 6834 deletions
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 205480a..152d9fa 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -20,14 +20,13 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 // If DebugDiv > 0 then only break antidep with (ID % DebugDiv) == DebugMod
@@ -616,7 +615,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
   const TargetRegisterClass *SuperRC =
     TRI->getMinimalPhysRegClass(SuperReg, MVT::Other);
 
-  ArrayRef<unsigned> Order = RegClassInfo.getOrder(SuperRC);
+  ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(SuperRC);
   if (Order.empty()) {
     DEBUG(dbgs() << "\tEmpty Super Regclass!!\n");
     return false;
@@ -635,7 +634,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
     --R;
     const unsigned NewSuperReg = Order[R];
     // Don't consider non-allocatable registers
-    if (!RegClassInfo.isAllocatable(NewSuperReg)) continue;
+    if (!MRI.isAllocatable(NewSuperReg)) continue;
     // Don't replace a register with itself.
     if (NewSuperReg == SuperReg) continue;
 
@@ -818,7 +817,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
         DEBUG(dbgs() << "\tAntidep reg: " << TRI->getName(AntiDepReg));
         assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
 
-        if (!RegClassInfo.isAllocatable(AntiDepReg)) {
+        if (!MRI.isAllocatable(AntiDepReg)) {
           // Don't break anti-dependencies on non-allocatable registers.
           DEBUG(dbgs() << " (non-allocatable)\n");
           continue;
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h
index 7067784..6683630 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.h
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -18,15 +18,15 @@
 #define LLVM_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H
 
 #include "AntiDepBreaker.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include <map>
 
 namespace llvm {
diff --git a/lib/CodeGen/AllocationOrder.cpp b/lib/CodeGen/AllocationOrder.cpp
index 32ad34a..94754a0 100644
--- a/lib/CodeGen/AllocationOrder.cpp
+++ b/lib/CodeGen/AllocationOrder.cpp
@@ -14,10 +14,15 @@
 //
 //===----------------------------------------------------------------------===//
 
+#define DEBUG_TYPE "regalloc"
 #include "AllocationOrder.h"
-#include "VirtRegMap.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
 
 using namespace llvm;
 
@@ -25,55 +30,19 @@ using namespace llvm;
 AllocationOrder::AllocationOrder(unsigned VirtReg,
                                  const VirtRegMap &VRM,
                                  const RegisterClassInfo &RegClassInfo)
-  : Begin(0), End(0), Pos(0), RCI(RegClassInfo), OwnedBegin(false) {
-  const TargetRegisterClass *RC = VRM.getRegInfo().getRegClass(VirtReg);
-  std::pair<unsigned, unsigned> HintPair =
-    VRM.getRegInfo().getRegAllocationHint(VirtReg);
-
-  // HintPair.second is a register, phys or virt.
-  Hint = HintPair.second;
-
-  // Translate to physreg, or 0 if not assigned yet.
-  if (TargetRegisterInfo::isVirtualRegister(Hint))
-    Hint = VRM.getPhys(Hint);
-
-  // The first hint pair component indicates a target-specific hint.
-  if (HintPair.first) {
-    const TargetRegisterInfo &TRI = VRM.getTargetRegInfo();
-    // The remaining allocation order may depend on the hint.
-    ArrayRef<uint16_t> Order =
-      TRI.getRawAllocationOrder(RC, HintPair.first, Hint,
-                                VRM.getMachineFunction());
-    if (Order.empty())
-      return;
-
-    // Copy the allocation order with reserved registers removed.
-    OwnedBegin = true;
-    unsigned *P = new unsigned[Order.size()];
-    Begin = P;
-    for (unsigned i = 0; i != Order.size(); ++i)
-      if (!RCI.isReserved(Order[i]))
-        *P++ = Order[i];
-    End = P;
-
-    // Target-dependent hints require resolution.
-    Hint = TRI.ResolveRegAllocHint(HintPair.first, Hint,
-                                   VRM.getMachineFunction());
-  } else {
-    // If there is no hint or just a normal hint, use the cached allocation
-    // order from RegisterClassInfo.
-    ArrayRef<unsigned> O = RCI.getOrder(RC);
-    Begin = O.begin();
-    End = O.end();
-  }
-
-  // The hint must be a valid physreg for allocation.
-  if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) ||
-               !RC->contains(Hint) || RCI.isReserved(Hint)))
-    Hint = 0;
-}
-
-AllocationOrder::~AllocationOrder() {
-  if (OwnedBegin)
-    delete [] Begin;
+  : Pos(0) {
+  const MachineFunction &MF = VRM.getMachineFunction();
+  const TargetRegisterInfo *TRI = &VRM.getTargetRegInfo();
+  Order = RegClassInfo.getOrder(MF.getRegInfo().getRegClass(VirtReg));
+  TRI->getRegAllocationHints(VirtReg, Order, Hints, MF, &VRM);
+  rewind();
+
+  DEBUG({
+    if (!Hints.empty()) {
+      dbgs() << "hints:";
+      for (unsigned I = 0, E = Hints.size(); I != E; ++I)
+        dbgs() << ' ' << PrintReg(Hints[I], TRI);
+      dbgs() << '\n';
+    }
+  });
 }
diff --git a/lib/CodeGen/AllocationOrder.h b/lib/CodeGen/AllocationOrder.h
index 0ce7e0c..a5293f6 100644
--- a/lib/CodeGen/AllocationOrder.h
+++ b/lib/CodeGen/AllocationOrder.h
@@ -17,21 +17,21 @@
 #ifndef LLVM_CODEGEN_ALLOCATIONORDER_H
 #define LLVM_CODEGEN_ALLOCATIONORDER_H
 
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/MC/MCRegisterInfo.h"
+
 namespace llvm {
 
 class RegisterClassInfo;
 class VirtRegMap;
 
 class AllocationOrder {
-  const unsigned *Begin;
-  const unsigned *End;
-  const unsigned *Pos;
-  const RegisterClassInfo &RCI;
-  unsigned Hint;
-  bool OwnedBegin;
-public:
+  SmallVector<MCPhysReg, 16> Hints;
+  ArrayRef<MCPhysReg> Order;
+  int Pos;
 
-  /// AllocationOrder - Create a new AllocationOrder for VirtReg.
+public:
+  /// Create a new AllocationOrder for VirtReg.
   /// @param VirtReg      Virtual register to allocate for.
   /// @param VRM          Virtual register map for function.
   /// @param RegClassInfo Information about reserved and allocatable registers.
@@ -39,32 +39,30 @@ public:
                   const VirtRegMap &VRM,
                   const RegisterClassInfo &RegClassInfo);
 
-  ~AllocationOrder();
-
-  /// next - Return the next physical register in the allocation order, or 0.
-  /// It is safe to call next again after it returned 0.
-  /// It will keep returning 0 until rewind() is called.
+  /// Return the next physical register in the allocation order, or 0.
+  /// It is safe to call next() again after it returned 0, it will keep
+  /// returning 0 until rewind() is called.
   unsigned next() {
-    // First take the hint.
-    if (!Pos) {
-      Pos = Begin;
-      if (Hint)
-        return Hint;
-    }
-    // Then look at the order from TRI.
-    while (Pos != End) {
-      unsigned Reg = *Pos++;
-      if (Reg != Hint)
+    if (Pos < 0)
+      return Hints.end()[Pos++];
+    while (Pos < int(Order.size())) {
+      unsigned Reg = Order[Pos++];
+      if (!isHint(Reg))
         return Reg;
     }
     return 0;
   }
 
-  /// rewind - Start over from the beginning.
-  void rewind() { Pos = 0; }
+  /// Start over from the beginning.
+  void rewind() { Pos = -int(Hints.size()); }
+
+  /// Return true if the last register returned from next() was a preferred register.
+  bool isHint() const { return Pos <= 0; }
 
-  /// isHint - Return true if PhysReg is a preferred register.
-  bool isHint(unsigned PhysReg) const { return PhysReg == Hint; }
+  /// Return true if PhysReg is a preferred register.
+  bool isHint(unsigned PhysReg) const {
+    return std::find(Hints.begin(), Hints.end(), PhysReg) != Hints.end();
+  }
 };
 
 } // end namespace llvm
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index 447f398..aaba144 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -13,19 +13,18 @@
 
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
 using namespace llvm;
 
 /// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence
@@ -79,7 +78,7 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, Type *Ty,
                            uint64_t StartingOffset) {
   // Given a struct type, recursively traverse the elements.
   if (StructType *STy = dyn_cast<StructType>(Ty)) {
-    const StructLayout *SL = TLI.getTargetData()->getStructLayout(STy);
+    const StructLayout *SL = TLI.getDataLayout()->getStructLayout(STy);
     for (StructType::element_iterator EB = STy->element_begin(),
                                       EI = EB,
                                       EE = STy->element_end();
@@ -91,7 +90,7 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, Type *Ty,
   // Given an array type, recursively traverse the elements.
   if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
     Type *EltTy = ATy->getElementType();
-    uint64_t EltSize = TLI.getTargetData()->getTypeAllocSize(EltTy);
+    uint64_t EltSize = TLI.getDataLayout()->getTypeAllocSize(EltTy);
     for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
       ComputeValueVTs(TLI, EltTy, ValueVTs, Offsets,
                       StartingOffset + i * EltSize);
@@ -266,7 +265,7 @@ static const Value *getNoopInput(const Value *V, const TargetLowering &TLI) {
 /// between it and the return.
 ///
 /// This function only tests target-independent requirements.
-bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
+bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attribute CalleeRetAttr,
                                 const TargetLowering &TLI) {
   const Instruction *I = CS.getInstruction();
   const BasicBlock *ExitBB = I->getParent();
@@ -313,12 +312,14 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
   // Conservatively require the attributes of the call to match those of
   // the return. Ignore noalias because it doesn't affect the call sequence.
   const Function *F = ExitBB->getParent();
-  Attributes CallerRetAttr = F->getAttributes().getRetAttributes();
-  if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
+  Attribute CallerRetAttr = F->getAttributes().getRetAttributes();
+  if (AttrBuilder(CalleeRetAttr).removeAttribute(Attribute::NoAlias) !=
+      AttrBuilder(CallerRetAttr).removeAttribute(Attribute::NoAlias))
     return false;
 
   // It's not safe to eliminate the sign / zero extension of the return value.
-  if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
+  if (CallerRetAttr.hasAttribute(Attribute::ZExt) ||
+      CallerRetAttr.hasAttribute(Attribute::SExt))
     return false;
 
   // Otherwise, make sure the unmodified return value of I is the return value.
@@ -346,21 +347,3 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
   
   return true;
 }
-
-bool llvm::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
-                                SDValue &Chain, const TargetLowering &TLI) {
-  const Function *F = DAG.getMachineFunction().getFunction();
-
-  // Conservatively require the attributes of the call to match those of
-  // the return. Ignore noalias because it doesn't affect the call sequence.
-  Attributes CallerRetAttr = F->getAttributes().getRetAttributes();
-  if (CallerRetAttr & ~Attribute::NoAlias)
-    return false;
-
-  // It's not safe to eliminate the sign / zero extension of the return value.
-  if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
-    return false;
-
-  // Check if the only use is a function return node.
-  return TLI.isUsedByReturnOnly(Node, Chain);
-}
diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp
index bf5d8c4..1728331 100644
--- a/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -12,29 +12,29 @@
 //===----------------------------------------------------------------------===//
 
 #include "DwarfException.h"
-#include "llvm/Module.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Module.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/FormattedStream.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Dwarf.h"
-#include "llvm/Support/FormattedStream.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Twine.h"
 using namespace llvm;
 
 cl::opt<bool>
@@ -69,24 +69,69 @@ void ARMException::EndFunction() {
     Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
                                                   Asm->getFunctionNumber()));
 
-    // Emit references to personality.
-    if (const Function * Personality =
-        MMI->getPersonalities()[MMI->getPersonalityIndex()]) {
-      MCSymbol *PerSym = Asm->Mang->getSymbol(Personality);
-      Asm->OutStreamer.EmitSymbolAttribute(PerSym, MCSA_Global);
-      Asm->OutStreamer.EmitPersonality(PerSym);
-    }
-
     if (EnableARMEHABIDescriptors) {
       // Map all labels and get rid of any dead landing pads.
       MMI->TidyLandingPads();
 
-      Asm->OutStreamer.EmitHandlerData();
+      if (!MMI->getLandingPads().empty()) {
+        // Emit references to personality.
+        if (const Function * Personality =
+            MMI->getPersonalities()[MMI->getPersonalityIndex()]) {
+          MCSymbol *PerSym = Asm->Mang->getSymbol(Personality);
+          Asm->OutStreamer.EmitSymbolAttribute(PerSym, MCSA_Global);
+          Asm->OutStreamer.EmitPersonality(PerSym);
+        }
+
+        // Emit .handlerdata directive.
+        Asm->OutStreamer.EmitHandlerData();
 
-      // Emit actual exception table
-      EmitExceptionTable();
+        // Emit actual exception table
+        EmitExceptionTable();
+      }
     }
   }
 
   Asm->OutStreamer.EmitFnEnd();
 }
+
+void ARMException::EmitTypeInfos(unsigned TTypeEncoding) {
+  const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
+  const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+
+  bool VerboseAsm = Asm->OutStreamer.isVerboseAsm();
+
+  int Entry = 0;
+  // Emit the Catch TypeInfos.
+  if (VerboseAsm && !TypeInfos.empty()) {
+    Asm->OutStreamer.AddComment(">> Catch TypeInfos <<");
+    Asm->OutStreamer.AddBlankLine();
+    Entry = TypeInfos.size();
+  }
+
+  for (std::vector<const GlobalVariable *>::const_reverse_iterator
+         I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) {
+    const GlobalVariable *GV = *I;
+    if (VerboseAsm)
+      Asm->OutStreamer.AddComment("TypeInfo " + Twine(Entry--));
+    Asm->EmitTTypeReference(GV, TTypeEncoding);
+  }
+
+  // Emit the Exception Specifications.
+  if (VerboseAsm && !FilterIds.empty()) {
+    Asm->OutStreamer.AddComment(">> Filter TypeInfos <<");
+    Asm->OutStreamer.AddBlankLine();
+    Entry = 0;
+  }
+  for (std::vector<unsigned>::const_iterator
+         I = FilterIds.begin(), E = FilterIds.end(); I < E; ++I) {
+    unsigned TypeID = *I;
+    if (VerboseAsm) {
+      --Entry;
+      if (TypeID != 0)
+        Asm->OutStreamer.AddComment("FilterInfo " + Twine(Entry));
+    }
+
+    Asm->EmitTTypeReference((TypeID == 0 ? 0 : TypeInfos[TypeID - 1]),
+                            TTypeEncoding);
+  }
+}
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index b30b427..cbcc290 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -15,8 +15,10 @@
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "DwarfDebug.h"
 #include "DwarfException.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/Module.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/GCMetadataPrinter.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -24,7 +26,10 @@
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
@@ -32,21 +37,16 @@
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Timer.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Assembly/Writer.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/Timer.h"
-#include <ctype.h>
 using namespace llvm;
 
 static const char *DWARFGroupName = "DWARF Emission";
@@ -68,7 +68,7 @@ static gcp_map_type &getGCMap(void *&P) {
 /// getGVAlignmentLog2 - Return the alignment to use for the specified global
 /// value in log2 form.  This rounds up to the preferred alignment if possible
 /// and legal.
-static unsigned getGVAlignmentLog2(const GlobalValue *GV, const TargetData &TD,
+static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &TD,
                                    unsigned InBits = 0) {
   unsigned NumBits = 0;
   if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
@@ -91,9 +91,6 @@ static unsigned getGVAlignmentLog2(const GlobalValue *GV, const TargetData &TD,
   return NumBits;
 }
 
-
-
-
 AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer)
   : MachineFunctionPass(ID),
     TM(tm), MAI(tm.getMCAsmInfo()),
@@ -131,10 +128,9 @@ const TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const {
   return TM.getTargetLowering()->getObjFileLowering();
 }
 
-
-/// getTargetData - Return information about data layout.
-const TargetData &AsmPrinter::getTargetData() const {
-  return *TM.getTargetData();
+/// getDataLayout - Return information about data layout.
+const DataLayout &AsmPrinter::getDataLayout() const {
+  return *TM.getDataLayout();
 }
 
 /// getCurrentSection() - Return the current section we are emitting to.
@@ -154,6 +150,8 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
 }
 
 bool AsmPrinter::doInitialization(Module &M) {
+  OutStreamer.InitStreamer();
+
   MMI = getAnalysisIfAvailable<MachineModuleInfo>();
   MMI->AnalyzeModule(M);
 
@@ -161,7 +159,7 @@ bool AsmPrinter::doInitialization(Module &M) {
   const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
     .Initialize(OutContext, TM);
 
-  Mang = new Mangler(OutContext, *TM.getTargetData());
+  Mang = new Mangler(OutContext, *TM.getDataLayout());
 
   // Allow the target to emit any magic that it wants at the start of the file.
   EmitStartOfAsmFile(M);
@@ -281,7 +279,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
 
   SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM);
 
-  const TargetData *TD = TM.getTargetData();
+  const DataLayout *TD = TM.getDataLayout();
   uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType());
 
   // If the alignment is specified, we *must* obey it.  Overaligning a global
@@ -313,8 +311,13 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
       return;
     }
 
-    if (Align == 1 ||
-        MAI->getLCOMMDirectiveAlignmentType() != LCOMM::NoAlignment) {
+    // Use .lcomm only if it supports user-specified alignment.
+    // Otherwise, while it would still be correct to use .lcomm in some
+    // cases (e.g. when Align == 1), the external assembler might enfore
+    // some -unknown- default alignment behavior, which could cause
+    // spurious differences between external and integrated assembler.
+    // Prefer to simply fall back to .local / .comm in this case.
+    if (MAI->getLCOMMDirectiveAlignmentType() != LCOMM::NoAlignment) {
       // .lcomm _foo, 42
       OutStreamer.EmitLocalCommonSymbol(GVSym, Size, Align);
       return;
@@ -944,6 +947,8 @@ bool AsmPrinter::doFinalization(Module &M) {
   MMI = 0;
 
   OutStreamer.Finish();
+  OutStreamer.reset();
+
   return false;
 }
 
@@ -992,7 +997,7 @@ void AsmPrinter::EmitConstantPool() {
       Kind = SectionKind::getReadOnlyWithRelLocal();
       break;
     case 0:
-    switch (TM.getTargetData()->getTypeAllocSize(CPE.getType())) {
+    switch (TM.getDataLayout()->getTypeAllocSize(CPE.getType())) {
     case 4:  Kind = SectionKind::getMergeableConst4(); break;
     case 8:  Kind = SectionKind::getMergeableConst8(); break;
     case 16: Kind = SectionKind::getMergeableConst16();break;
@@ -1038,7 +1043,7 @@ void AsmPrinter::EmitConstantPool() {
       OutStreamer.EmitFill(NewOffset - Offset, 0/*fillval*/, 0/*addrspace*/);
 
       Type *Ty = CPE.getType();
-      Offset = NewOffset + TM.getTargetData()->getTypeAllocSize(Ty);
+      Offset = NewOffset + TM.getDataLayout()->getTypeAllocSize(Ty);
       OutStreamer.EmitLabel(GetCPISymbol(CPI));
 
       if (CPE.isMachineConstantPoolEntry())
@@ -1081,7 +1086,12 @@ void AsmPrinter::EmitJumpTableInfo() {
     JTInDiffSection = true;
   }
 
-  EmitAlignment(Log2_32(MJTI->getEntryAlignment(*TM.getTargetData())));
+  EmitAlignment(Log2_32(MJTI->getEntryAlignment(*TM.getDataLayout())));
+
+  // Jump tables in code sections are marked with a data_region directive
+  // where that's supported.
+  if (!JTInDiffSection)
+    OutStreamer.EmitDataRegion(MCDR_DataRegionJT32);
 
   for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) {
     const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
@@ -1123,6 +1133,8 @@ void AsmPrinter::EmitJumpTableInfo() {
     for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii)
       EmitJumpTableEntry(MJTI, JTBBs[ii], JTI);
   }
+  if (!JTInDiffSection)
+    OutStreamer.EmitDataRegion(MCDR_DataRegionEnd);
 }
 
 /// EmitJumpTableEntry - Emit a jump table entry for the specified MBB to the
@@ -1190,7 +1202,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
 
   assert(Value && "Unknown entry kind!");
 
-  unsigned EntrySize = MJTI->getEntrySize(*TM.getTargetData());
+  unsigned EntrySize = MJTI->getEntrySize(*TM.getDataLayout());
   OutStreamer.EmitValue(Value, EntrySize, /*addrspace*/0);
 }
 
@@ -1292,7 +1304,7 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) {
   }
 
   // Emit the function pointers in the target-specific order
-  const TargetData *TD = TM.getTargetData();
+  const DataLayout *TD = TM.getDataLayout();
   unsigned Align = Log2_32(TD->getPointerPrefAlignment());
   std::stable_sort(Structors.begin(), Structors.end(), priority_order);
   for (unsigned i = 0, e = Structors.size(); i != e; ++i) {
@@ -1408,7 +1420,7 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
 // if required for correctness.
 //
 void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV) const {
-  if (GV) NumBits = getGVAlignmentLog2(GV, *TM.getTargetData(), NumBits);
+  if (GV) NumBits = getGVAlignmentLog2(GV, *TM.getDataLayout(), NumBits);
 
   if (NumBits == 0) return;   // 1-byte aligned: no need to emit alignment.
 
@@ -1447,10 +1459,10 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) {
   switch (CE->getOpcode()) {
   default:
     // If the code isn't optimized, there may be outstanding folding
-    // opportunities. Attempt to fold the expression using TargetData as a
+    // opportunities. Attempt to fold the expression using DataLayout as a
     // last resort before giving up.
     if (Constant *C =
-          ConstantFoldConstantExpression(CE, AP.TM.getTargetData()))
+          ConstantFoldConstantExpression(CE, AP.TM.getDataLayout()))
       if (C != CE)
         return lowerConstant(C, AP);
 
@@ -1464,21 +1476,16 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) {
       report_fatal_error(OS.str());
     }
   case Instruction::GetElementPtr: {
-    const TargetData &TD = *AP.TM.getTargetData();
+    const DataLayout &TD = *AP.TM.getDataLayout();
     // Generate a symbolic expression for the byte address
-    const Constant *PtrVal = CE->getOperand(0);
-    SmallVector<Value*, 8> IdxVec(CE->op_begin()+1, CE->op_end());
-    int64_t Offset = TD.getIndexedOffset(PtrVal->getType(), IdxVec);
+    APInt OffsetAI(TD.getPointerSizeInBits(), 0);
+    cast<GEPOperator>(CE)->accumulateConstantOffset(TD, OffsetAI);
 
     const MCExpr *Base = lowerConstant(CE->getOperand(0), AP);
-    if (Offset == 0)
+    if (!OffsetAI)
       return Base;
 
-    // Truncate/sext the offset to the pointer size.
-    unsigned Width = TD.getPointerSizeInBits();
-    if (Width < 64)
-      Offset = SignExtend64(Offset, Width);
-
+    int64_t Offset = OffsetAI.getSExtValue();
     return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx),
                                    Ctx);
   }
@@ -1493,7 +1500,7 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) {
     return lowerConstant(CE->getOperand(0), AP);
 
   case Instruction::IntToPtr: {
-    const TargetData &TD = *AP.TM.getTargetData();
+    const DataLayout &TD = *AP.TM.getDataLayout();
     // Handle casts to pointers by changing them into casts to the appropriate
     // integer type.  This promotes constant folding and simplifies this code.
     Constant *Op = CE->getOperand(0);
@@ -1503,7 +1510,7 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) {
   }
 
   case Instruction::PtrToInt: {
-    const TargetData &TD = *AP.TM.getTargetData();
+    const DataLayout &TD = *AP.TM.getDataLayout();
     // Support only foldable casts to/from pointers that can be eliminated by
     // changing the pointer to the appropriately sized integer type.
     Constant *Op = CE->getOperand(0);
@@ -1577,7 +1584,7 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) {
   if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
     if (CI->getBitWidth() > 64) return -1;
 
-    uint64_t Size = TM.getTargetData()->getTypeAllocSize(V->getType());
+    uint64_t Size = TM.getDataLayout()->getTypeAllocSize(V->getType());
     uint64_t Value = CI->getZExtValue();
 
     // Make sure the constant is at least 8 bits long and has a power
@@ -1608,7 +1615,7 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) {
     }
     return Byte;
   }
-  
+
   if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(V))
     return isRepeatedByteSequence(CDS);
 
@@ -1617,16 +1624,16 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) {
 
 static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
                                              unsigned AddrSpace,AsmPrinter &AP){
-  
+
   // See if we can aggregate this into a .fill, if so, emit it as such.
   int Value = isRepeatedByteSequence(CDS, AP.TM);
   if (Value != -1) {
-    uint64_t Bytes = AP.TM.getTargetData()->getTypeAllocSize(CDS->getType());
+    uint64_t Bytes = AP.TM.getDataLayout()->getTypeAllocSize(CDS->getType());
     // Don't emit a 1-byte object as a .fill.
     if (Bytes > 1)
       return AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace);
   }
-  
+
   // If this can be emitted with .ascii/.asciz, emit it as such.
   if (CDS->isString())
     return AP.OutStreamer.EmitBytes(CDS->getAsString(), AddrSpace);
@@ -1650,7 +1657,7 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
         float F;
         uint32_t I;
       };
-      
+
       F = CDS->getElementAsFloat(i);
       if (AP.isVerbose())
         AP.OutStreamer.GetCommentOS() << "float " << F << '\n';
@@ -1663,7 +1670,7 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
         double F;
         uint64_t I;
       };
-      
+
       F = CDS->getElementAsDouble(i);
       if (AP.isVerbose())
         AP.OutStreamer.GetCommentOS() << "double " << F << '\n';
@@ -1671,7 +1678,7 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
     }
   }
 
-  const TargetData &TD = *AP.TM.getTargetData();
+  const DataLayout &TD = *AP.TM.getDataLayout();
   unsigned Size = TD.getTypeAllocSize(CDS->getType());
   unsigned EmittedSize = TD.getTypeAllocSize(CDS->getType()->getElementType()) *
                         CDS->getNumElements();
@@ -1687,7 +1694,7 @@ static void emitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace,
   int Value = isRepeatedByteSequence(CA, AP.TM);
 
   if (Value != -1) {
-    uint64_t Bytes = AP.TM.getTargetData()->getTypeAllocSize(CA->getType());
+    uint64_t Bytes = AP.TM.getDataLayout()->getTypeAllocSize(CA->getType());
     AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace);
   }
   else {
@@ -1701,7 +1708,7 @@ static void emitGlobalConstantVector(const ConstantVector *CV,
   for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
     emitGlobalConstantImpl(CV->getOperand(i), AddrSpace, AP);
 
-  const TargetData &TD = *AP.TM.getTargetData();
+  const DataLayout &TD = *AP.TM.getDataLayout();
   unsigned Size = TD.getTypeAllocSize(CV->getType());
   unsigned EmittedSize = TD.getTypeAllocSize(CV->getType()->getElementType()) *
                          CV->getType()->getNumElements();
@@ -1712,7 +1719,7 @@ static void emitGlobalConstantVector(const ConstantVector *CV,
 static void emitGlobalConstantStruct(const ConstantStruct *CS,
                                      unsigned AddrSpace, AsmPrinter &AP) {
   // Print the fields in successive locations. Pad to align if needed!
-  const TargetData *TD = AP.TM.getTargetData();
+  const DataLayout *TD = AP.TM.getDataLayout();
   unsigned Size = TD->getTypeAllocSize(CS->getType());
   const StructLayout *Layout = TD->getStructLayout(CS->getType());
   uint64_t SizeSoFar = 0;
@@ -1777,31 +1784,34 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace,
     return;
   }
 
-  if (CFP->getType()->isX86_FP80Ty()) {
+  if (CFP->getType()->isX86_FP80Ty() || CFP->getType()->isFP128Ty()) {
     // all long double variants are printed as hex
     // API needed to prevent premature destruction
     APInt API = CFP->getValueAPF().bitcastToAPInt();
     const uint64_t *p = API.getRawData();
     if (AP.isVerbose()) {
       // Convert to double so we can print the approximate val as a comment.
-      APFloat DoubleVal = CFP->getValueAPF();
-      bool ignored;
-      DoubleVal.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
-                        &ignored);
-      AP.OutStreamer.GetCommentOS() << "x86_fp80 ~= "
-        << DoubleVal.convertToDouble() << '\n';
+      SmallString<8> StrVal;
+      CFP->getValueAPF().toString(StrVal);
+
+      const char *TyNote = CFP->getType()->isFP128Ty() ? "fp128 " : "x86_fp80 ";
+      AP.OutStreamer.GetCommentOS() << TyNote << StrVal << '\n';
     }
 
-    if (AP.TM.getTargetData()->isBigEndian()) {
-      AP.OutStreamer.EmitIntValue(p[1], 2, AddrSpace);
+    // The 80-bit type is made of a 64-bit and 16-bit value, the 128-bit has 2
+    // 64-bit words.
+    uint32_t TrailingSize = CFP->getType()->isFP128Ty() ? 8 : 2;
+
+    if (AP.TM.getDataLayout()->isBigEndian()) {
+      AP.OutStreamer.EmitIntValue(p[1], TrailingSize, AddrSpace);
       AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace);
     } else {
       AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace);
-      AP.OutStreamer.EmitIntValue(p[1], 2, AddrSpace);
+      AP.OutStreamer.EmitIntValue(p[1], TrailingSize, AddrSpace);
     }
 
     // Emit the tail padding for the long double.
-    const TargetData &TD = *AP.TM.getTargetData();
+    const DataLayout &TD = *AP.TM.getDataLayout();
     AP.OutStreamer.EmitZeros(TD.getTypeAllocSize(CFP->getType()) -
                              TD.getTypeStoreSize(CFP->getType()), AddrSpace);
     return;
@@ -1813,7 +1823,7 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace,
   // API needed to prevent premature destruction.
   APInt API = CFP->getValueAPF().bitcastToAPInt();
   const uint64_t *p = API.getRawData();
-  if (AP.TM.getTargetData()->isBigEndian()) {
+  if (AP.TM.getDataLayout()->isBigEndian()) {
     AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace);
     AP.OutStreamer.EmitIntValue(p[1], 8, AddrSpace);
   } else {
@@ -1824,7 +1834,7 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace,
 
 static void emitGlobalConstantLargeInt(const ConstantInt *CI,
                                        unsigned AddrSpace, AsmPrinter &AP) {
-  const TargetData *TD = AP.TM.getTargetData();
+  const DataLayout *TD = AP.TM.getDataLayout();
   unsigned BitWidth = CI->getBitWidth();
   assert((BitWidth & 63) == 0 && "only support multiples of 64-bits");
 
@@ -1840,7 +1850,7 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI,
 
 static void emitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
                                    AsmPrinter &AP) {
-  const TargetData *TD = AP.TM.getTargetData();
+  const DataLayout *TD = AP.TM.getDataLayout();
   uint64_t Size = TD->getTypeAllocSize(CV->getType());
   if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV))
     return AP.OutStreamer.EmitZeros(Size, AddrSpace);
@@ -1872,7 +1882,7 @@ static void emitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
 
   if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(CV))
     return emitGlobalConstantDataSequential(CDS, AddrSpace, AP);
-  
+
   if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV))
     return emitGlobalConstantArray(CVA, AddrSpace, AP);
 
@@ -1894,10 +1904,10 @@ static void emitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
         return emitGlobalConstantImpl(New, AddrSpace, AP);
     }
   }
-  
+
   if (const ConstantVector *V = dyn_cast<ConstantVector>(CV))
     return emitGlobalConstantVector(V, AddrSpace, AP);
-    
+
   // Otherwise, it must be a ConstantExpr.  Lower it to an MCExpr, then emit it
   // thread the streamer with EmitValue.
   AP.OutStreamer.EmitValue(lowerConstant(CV, AP), Size, AddrSpace);
@@ -1905,7 +1915,7 @@ static void emitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
 
 /// EmitGlobalConstant - Print a general LLVM constant to the .s file.
 void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) {
-  uint64_t Size = TM.getTargetData()->getTypeAllocSize(CV->getType());
+  uint64_t Size = TM.getDataLayout()->getTypeAllocSize(CV->getType());
   if (Size)
     emitGlobalConstantImpl(CV, AddrSpace, *this);
   else if (MAI->hasSubsectionsViaSymbols()) {
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index 90d511c..ece92d8 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -13,19 +13,19 @@
 
 #define DEBUG_TYPE "asm-printer"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/MC/MachineLocation.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/Support/Dwarf.h"
-#include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -53,7 +53,7 @@ void AsmPrinter::EmitULEB128(unsigned Value, const char *Desc,
 void AsmPrinter::EmitCFAByte(unsigned Val) const {
   if (isVerbose()) {
     if (Val >= dwarf::DW_CFA_offset && Val < dwarf::DW_CFA_offset+64)
-      OutStreamer.AddComment("DW_CFA_offset + Reg (" + 
+      OutStreamer.AddComment("DW_CFA_offset + Reg (" +
                              Twine(Val-dwarf::DW_CFA_offset) + ")");
     else
       OutStreamer.AddComment(dwarf::CallFrameString(Val));
@@ -83,7 +83,7 @@ static const char *DecodeDWARFEncoding(unsigned Encoding) {
   case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata8:
     return "indirect pcrel sdata8";
   }
-  
+
   return "<unknown encoding>";
 }
 
@@ -101,7 +101,7 @@ void AsmPrinter::EmitEncodingByte(unsigned Val, const char *Desc) const {
       OutStreamer.AddComment(Twine("Encoding = ") +
                              DecodeDWARFEncoding(Val));
   }
-  
+
   OutStreamer.EmitIntValue(Val, 1, 0/*addrspace*/);
 }
 
@@ -109,30 +109,26 @@ void AsmPrinter::EmitEncodingByte(unsigned Val, const char *Desc) const {
 unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const {
   if (Encoding == dwarf::DW_EH_PE_omit)
     return 0;
-  
+
   switch (Encoding & 0x07) {
   default: llvm_unreachable("Invalid encoded value.");
-  case dwarf::DW_EH_PE_absptr: return TM.getTargetData()->getPointerSize();
+  case dwarf::DW_EH_PE_absptr: return TM.getDataLayout()->getPointerSize();
   case dwarf::DW_EH_PE_udata2: return 2;
   case dwarf::DW_EH_PE_udata4: return 4;
   case dwarf::DW_EH_PE_udata8: return 8;
   }
 }
 
-void AsmPrinter::EmitReference(const MCSymbol *Sym, unsigned Encoding) const {
-  const TargetLoweringObjectFile &TLOF = getObjFileLowering();
-  
-  const MCExpr *Exp =
-    TLOF.getExprForDwarfReference(Sym, Encoding, OutStreamer);
-  OutStreamer.EmitAbsValue(Exp, GetSizeOfEncodedValue(Encoding));
-}
+void AsmPrinter::EmitTTypeReference(const GlobalValue *GV,
+                                    unsigned Encoding) const {
+  if (GV) {
+    const TargetLoweringObjectFile &TLOF = getObjFileLowering();
 
-void AsmPrinter::EmitReference(const GlobalValue *GV, unsigned Encoding)const{
-  const TargetLoweringObjectFile &TLOF = getObjFileLowering();
-  
-  const MCExpr *Exp =
-    TLOF.getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, OutStreamer);
-  OutStreamer.EmitValue(Exp, GetSizeOfEncodedValue(Encoding), /*addrspace*/0);
+    const MCExpr *Exp =
+      TLOF.getTTypeGlobalReference(GV, Mang, MMI, Encoding, OutStreamer);
+    OutStreamer.EmitValue(Exp, GetSizeOfEncodedValue(Encoding), /*addrspace*/0);
+  } else
+    OutStreamer.EmitIntValue(0, GetSizeOfEncodedValue(Encoding), 0);
 }
 
 /// EmitSectionOffset - Emit the 4-byte offset of Label from the start of its
@@ -149,22 +145,22 @@ void AsmPrinter::EmitSectionOffset(const MCSymbol *Label,
     OutStreamer.EmitCOFFSecRel32(Label);
     return;
   }
-  
+
   // Get the section that we're referring to, based on SectionLabel.
   const MCSection &Section = SectionLabel->getSection();
-  
+
   // If Label has already been emitted, verify that it is in the same section as
   // section label for sanity.
   assert((!Label->isInSection() || &Label->getSection() == &Section) &&
          "Section offset using wrong section base for label");
-  
+
   // If the section in question will end up with an address of 0 anyway, we can
   // just emit an absolute reference to save a relocation.
   if (Section.isBaseAddressKnownZero()) {
     OutStreamer.EmitSymbolValue(Label, 4, 0/*AddrSpace*/);
     return;
   }
-  
+
   // Otherwise, emit it as a label difference from the start of the section.
   EmitLabelDifference(Label, SectionLabel, 4);
 }
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index b26ffeb..4890627 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -13,32 +13,32 @@
 
 #define DEBUG_TYPE "asm-printer"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/Constants.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCTargetAsmParser.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/Twine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
 namespace {
   struct SrcMgrDiagInfo {
     const MDNode *LocInfo;
-    LLVMContext::InlineAsmDiagHandlerTy DiagHandler;
+    LLVMContext::DiagHandlerTy DiagHandler;
     void *DiagContext;
   };
 }
@@ -89,15 +89,15 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode,
   SourceMgr SrcMgr;
   SrcMgrDiagInfo DiagInfo;
 
-  // If the current LLVMContext has an inline asm handler, set it in SourceMgr.
+  // If the current LLVMContext has a diagnostic handler, set it in SourceMgr.
   LLVMContext &LLVMCtx = MMI->getModule()->getContext();
   bool HasDiagHandler = false;
-  if (LLVMCtx.getInlineAsmDiagnosticHandler() != 0) {
+  if (LLVMCtx.getDiagnosticHandler() != 0) {
     // If the source manager has an issue, we arrange for srcMgrDiagHandler
     // to be invoked, getting DiagInfo passed into it.
     DiagInfo.LocInfo = LocMDNode;
-    DiagInfo.DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler();
-    DiagInfo.DiagContext = LLVMCtx.getInlineAsmDiagnosticContext();
+    DiagInfo.DiagHandler = LLVMCtx.getDiagnosticHandler();
+    DiagInfo.DiagContext = LLVMCtx.getDiagnosticContext();
     SrcMgr.setDiagHandler(srcMgrDiagHandler, &DiagInfo);
     HasDiagHandler = true;
   }
@@ -139,81 +139,116 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode,
 #endif // ANDROID_TARGET_BUILD
 }
 
-
-/// EmitInlineAsm - This method formats and emits the specified machine
-/// instruction that is an inline asm.
-void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
+static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
+                               MachineModuleInfo *MMI, int InlineAsmVariant,
+                               AsmPrinter *AP, unsigned LocCookie,
+                               raw_ostream &OS) {
 #ifndef ANDROID_TARGET_BUILD
-  assert(MI->isInlineAsm() && "printInlineAsm only works on inline asms");
+  // Switch to the inline assembly variant.
+  OS << "\t.intel_syntax\n\t";
 
+  const char *LastEmitted = AsmStr; // One past the last character emitted.
   unsigned NumOperands = MI->getNumOperands();
 
-  // Count the number of register definitions to find the asm string.
-  unsigned NumDefs = 0;
-  for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef();
-       ++NumDefs)
-    assert(NumDefs != NumOperands-2 && "No asm string?");
-
-  assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?");
+  while (*LastEmitted) {
+    switch (*LastEmitted) {
+    default: {
+      // Not a special case, emit the string section literally.
+      const char *LiteralEnd = LastEmitted+1;
+      while (*LiteralEnd && *LiteralEnd != '{' && *LiteralEnd != '|' &&
+             *LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n')
+        ++LiteralEnd;
 
-  // Disassemble the AsmStr, printing out the literal pieces, the operands, etc.
-  const char *AsmStr = MI->getOperand(NumDefs).getSymbolName();
+      OS.write(LastEmitted, LiteralEnd-LastEmitted);
+      LastEmitted = LiteralEnd;
+      break;
+    }
+    case '\n':
+      ++LastEmitted;   // Consume newline character.
+      OS << '\n';      // Indent code with newline.
+      break;
+    case '$': {
+      ++LastEmitted;   // Consume '$' character.
+      bool Done = true;
 
-  // If this asmstr is empty, just print the #APP/#NOAPP markers.
-  // These are useful to see where empty asm's wound up.
-  if (AsmStr[0] == 0) {
-    // Don't emit the comments if writing to a .o file.
-    if (!OutStreamer.hasRawTextSupport()) return;
+      // Handle escapes.
+      switch (*LastEmitted) {
+      default: Done = false; break;
+      case '$':
+        ++LastEmitted;  // Consume second '$' character.
+        break;
+      }
+      if (Done) break;
 
-    OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
-                            MAI->getInlineAsmStart());
-    OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
-                            MAI->getInlineAsmEnd());
-    return;
-  }
+      const char *IDStart = LastEmitted;
+      const char *IDEnd = IDStart;
+      while (*IDEnd >= '0' && *IDEnd <= '9') ++IDEnd;
 
-  // Emit the #APP start marker.  This has to happen even if verbose-asm isn't
-  // enabled, so we use EmitRawText.
-  if (OutStreamer.hasRawTextSupport())
-    OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
-                            MAI->getInlineAsmStart());
+      unsigned Val;
+      if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val))
+        report_fatal_error("Bad $ operand number in inline asm string: '" +
+                           Twine(AsmStr) + "'");
+      LastEmitted = IDEnd;
 
-  // Get the !srcloc metadata node if we have it, and decode the loc cookie from
-  // it.
-  unsigned LocCookie = 0;
-  const MDNode *LocMD = 0;
-  for (unsigned i = MI->getNumOperands(); i != 0; --i) {
-    if (MI->getOperand(i-1).isMetadata() &&
-        (LocMD = MI->getOperand(i-1).getMetadata()) &&
-        LocMD->getNumOperands() != 0) {
-      if (const ConstantInt *CI = dyn_cast<ConstantInt>(LocMD->getOperand(0))) {
-        LocCookie = CI->getZExtValue();
-        break;
-      }
-    }
-  }
+      if (Val >= NumOperands-1)
+        report_fatal_error("Invalid $ operand number in inline asm string: '" +
+                           Twine(AsmStr) + "'");
 
-  // Emit the inline asm to a temporary string so we can emit it through
-  // EmitInlineAsm.
-  SmallString<256> StringData;
-  raw_svector_ostream OS(StringData);
+      // Okay, we finally have a value number.  Ask the target to print this
+      // operand!
+      unsigned OpNo = InlineAsm::MIOp_FirstOperand;
 
-  OS << '\t';
+      bool Error = false;
 
-  // The variant of the current asmprinter.
-  int AsmPrinterVariant = MAI->getAssemblerDialect();
-  int InlineAsmVariant = MI->getInlineAsmDialect();
+      // Scan to find the machine operand number for the operand.
+      for (; Val; --Val) {
+        if (OpNo >= MI->getNumOperands()) break;
+        unsigned OpFlags = MI->getOperand(OpNo).getImm();
+        OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1;
+      }
 
-  // Switch to the inline assembly variant.
-  if (AsmPrinterVariant != InlineAsmVariant) {
-    if (InlineAsmVariant == 0)
-      OS << ".att_syntax\n\t";
-    else
-      OS << ".intel_syntax\n\t";
+      // We may have a location metadata attached to the end of the
+      // instruction, and at no point should see metadata at any
+      // other point while processing. It's an error if so.
+      if (OpNo >= MI->getNumOperands() ||
+          MI->getOperand(OpNo).isMetadata()) {
+        Error = true;
+      } else {
+        unsigned OpFlags = MI->getOperand(OpNo).getImm();
+        ++OpNo;  // Skip over the ID number.
+        
+        if (InlineAsm::isMemKind(OpFlags)) {
+          Error = AP->PrintAsmMemoryOperand(MI, OpNo, InlineAsmVariant,
+                                            /*Modifier*/ 0, OS);
+        } else {
+          Error = AP->PrintAsmOperand(MI, OpNo, InlineAsmVariant,
+                                      /*Modifier*/ 0, OS);
+        }
+      }
+      if (Error) {
+        std::string msg;
+        raw_string_ostream Msg(msg);
+        Msg << "invalid operand in inline asm: '" << AsmStr << "'";
+        MMI->getModule()->getContext().emitError(LocCookie, Msg.str());
+      }
+      break;
+    }
+    }
   }
+  OS << "\n\t.att_syntax\n" << (char)0;  // null terminate string.
+#endif // ANDROID_TARGET_BUILD
+}
 
+static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
+                                MachineModuleInfo *MMI, int InlineAsmVariant,
+                                int AsmPrinterVariant, AsmPrinter *AP,
+                                unsigned LocCookie, raw_ostream &OS) {
+#ifndef ANDROID_TARGET_BUILD
   int CurVariant = -1;            // The number of the {.|.|.} region we are in.
   const char *LastEmitted = AsmStr; // One past the last character emitted.
+  unsigned NumOperands = MI->getNumOperands();
+
+  OS << '\t';
 
   while (*LastEmitted) {
     switch (*LastEmitted) {
@@ -286,7 +321,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
                              " string: '" + Twine(AsmStr) + "'");
 
         std::string Val(StrStart, StrEnd);
-        PrintSpecial(MI, OS, Val.c_str());
+        AP->PrintSpecial(MI, OS, Val.c_str());
         LastEmitted = StrEnd+1;
         break;
       }
@@ -354,7 +389,6 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
             // FIXME: What if the operand isn't an MBB, report error?
             OS << *MI->getOperand(OpNo).getMBB()->getSymbol();
           else {
-            AsmPrinter *AP = const_cast<AsmPrinter*>(this);
             if (InlineAsm::isMemKind(OpFlags)) {
               Error = AP->PrintAsmMemoryOperand(MI, OpNo, InlineAsmVariant,
                                                 Modifier[0] ? Modifier : 0,
@@ -376,15 +410,76 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
     }
     }
   }
-  // Switch to the AsmPrinter variant.
-  if (AsmPrinterVariant != InlineAsmVariant) {
-    if (AsmPrinterVariant == 0)
-      OS << "\n\t.att_syntax";
-    else
-      OS << "\n\t.intel_syntax";
+  OS << '\n' << (char)0;  // null terminate string.
+#endif // ANDROID_TARGET_BUILD
+}
+
+/// EmitInlineAsm - This method formats and emits the specified machine
+/// instruction that is an inline asm.
+void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
+#ifndef ANDROID_TARGET_BUILD
+  assert(MI->isInlineAsm() && "printInlineAsm only works on inline asms");
+
+  // Count the number of register definitions to find the asm string.
+  unsigned NumDefs = 0;
+  for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef();
+       ++NumDefs)
+    assert(NumDefs != MI->getNumOperands()-2 && "No asm string?");
+
+  assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?");
+
+  // Disassemble the AsmStr, printing out the literal pieces, the operands, etc.
+  const char *AsmStr = MI->getOperand(NumDefs).getSymbolName();
+
+  // If this asmstr is empty, just print the #APP/#NOAPP markers.
+  // These are useful to see where empty asm's wound up.
+  if (AsmStr[0] == 0) {
+    // Don't emit the comments if writing to a .o file.
+    if (!OutStreamer.hasRawTextSupport()) return;
+
+    OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
+                            MAI->getInlineAsmStart());
+    OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
+                            MAI->getInlineAsmEnd());
+    return;
   }
 
-  OS << '\n' << (char)0;  // null terminate string.
+  // Emit the #APP start marker.  This has to happen even if verbose-asm isn't
+  // enabled, so we use EmitRawText.
+  if (OutStreamer.hasRawTextSupport())
+    OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
+                            MAI->getInlineAsmStart());
+
+  // Get the !srcloc metadata node if we have it, and decode the loc cookie from
+  // it.
+  unsigned LocCookie = 0;
+  const MDNode *LocMD = 0;
+  for (unsigned i = MI->getNumOperands(); i != 0; --i) {
+    if (MI->getOperand(i-1).isMetadata() &&
+        (LocMD = MI->getOperand(i-1).getMetadata()) &&
+        LocMD->getNumOperands() != 0) {
+      if (const ConstantInt *CI = dyn_cast<ConstantInt>(LocMD->getOperand(0))) {
+        LocCookie = CI->getZExtValue();
+        break;
+      }
+    }
+  }
+
+  // Emit the inline asm to a temporary string so we can emit it through
+  // EmitInlineAsm.
+  SmallString<256> StringData;
+  raw_svector_ostream OS(StringData);
+
+  // The variant of the current asmprinter.
+  int AsmPrinterVariant = MAI->getAssemblerDialect();
+  InlineAsm::AsmDialect InlineAsmVariant = MI->getInlineAsmDialect();
+  AsmPrinter *AP = const_cast<AsmPrinter*>(this);
+  if (InlineAsmVariant == InlineAsm::AD_ATT)
+    EmitGCCInlineAsmStr(AsmStr, MI, MMI, InlineAsmVariant, AsmPrinterVariant,
+                        AP, LocCookie, OS);
+  else
+    EmitMSInlineAsmStr(AsmStr, MI, MMI, InlineAsmVariant, AP, LocCookie, OS);
+
   EmitInlineAsm(OS.str(), LocMD, MI->getInlineAsmDialect());
 
   // Emit the #NOAPP end marker.  This has to happen even if verbose-asm isn't
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index 0885285..a913ca4 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -8,16 +8,16 @@
 //===----------------------------------------------------------------------===//
 //
 // Data structures for DWARF info entries.
-// 
+//
 //===----------------------------------------------------------------------===//
 
 #include "DIE.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/TargetData.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -197,9 +197,11 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const {
   case dwarf::DW_FORM_data4: Size = 4; break;
   case dwarf::DW_FORM_ref8:  // Fall thru
   case dwarf::DW_FORM_data8: Size = 8; break;
+  case dwarf::DW_FORM_GNU_str_index: Asm->EmitULEB128(Integer); return;
   case dwarf::DW_FORM_udata: Asm->EmitULEB128(Integer); return;
   case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return;
-  case dwarf::DW_FORM_addr:  Size = Asm->getTargetData().getPointerSize(); break;
+  case dwarf::DW_FORM_addr:
+    Size = Asm->getDataLayout().getPointerSize(); break;
   default: llvm_unreachable("DIE Value form not supported yet");
   }
   Asm->OutStreamer.EmitIntValue(Integer, Size, 0/*addrspace*/);
@@ -219,9 +221,10 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const {
   case dwarf::DW_FORM_data4: return sizeof(int32_t);
   case dwarf::DW_FORM_ref8:  // Fall thru
   case dwarf::DW_FORM_data8: return sizeof(int64_t);
+  case dwarf::DW_FORM_GNU_str_index: return MCAsmInfo::getULEB128Size(Integer);
   case dwarf::DW_FORM_udata: return MCAsmInfo::getULEB128Size(Integer);
   case dwarf::DW_FORM_sdata: return MCAsmInfo::getSLEB128Size(Integer);
-  case dwarf::DW_FORM_addr:  return AP->getTargetData().getPointerSize();
+  case dwarf::DW_FORM_addr:  return AP->getDataLayout().getPointerSize();
   default: llvm_unreachable("DIE Value form not supported yet");
   }
 }
@@ -248,7 +251,7 @@ void DIELabel::EmitValue(AsmPrinter *AP, unsigned Form) const {
 unsigned DIELabel::SizeOf(AsmPrinter *AP, unsigned Form) const {
   if (Form == dwarf::DW_FORM_data4) return 4;
   if (Form == dwarf::DW_FORM_strp) return 4;
-  return AP->getTargetData().getPointerSize();
+  return AP->getDataLayout().getPointerSize();
 }
 
 #ifndef NDEBUG
@@ -272,7 +275,7 @@ void DIEDelta::EmitValue(AsmPrinter *AP, unsigned Form) const {
 unsigned DIEDelta::SizeOf(AsmPrinter *AP, unsigned Form) const {
   if (Form == dwarf::DW_FORM_data4) return 4;
   if (Form == dwarf::DW_FORM_strp) return 4;
-  return AP->getTargetData().getPointerSize();
+  return AP->getDataLayout().getPointerSize();
 }
 
 #ifndef NDEBUG
diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h
index f93ea1b..35d7959 100644
--- a/lib/CodeGen/AsmPrinter/DIE.h
+++ b/lib/CodeGen/AsmPrinter/DIE.h
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 //
 // Data structures for DWARF info entries.
-// 
+//
 //===----------------------------------------------------------------------===//
 
 #ifndef CODEGEN_ASMPRINTER_DIE_H__
@@ -131,7 +131,7 @@ namespace llvm {
 
     DIE *Parent;
 
-    /// Attributes values.
+    /// Attribute values.
     ///
     SmallVector<DIEValue*, 32> Values;
 
@@ -155,7 +155,7 @@ namespace llvm {
     void setTag(unsigned Tag) { Abbrev.setTag(Tag); }
     void setOffset(unsigned O) { Offset = O; }
     void setSize(unsigned S) { Size = S; }
-    
+
     /// addValue - Add a value and attributes to a DIE.
     ///
     void addValue(unsigned Attribute, unsigned Form, DIEValue *Value) {
@@ -214,9 +214,6 @@ namespace llvm {
     ///
     virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const = 0;
 
-    // Implement isa/cast/dyncast.
-    static bool classof(const DIEValue *) { return true; }
-
 #ifndef NDEBUG
     virtual void print(raw_ostream &O) = 0;
     void dump();
@@ -257,7 +254,6 @@ namespace llvm {
     virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
 
     // Implement isa/cast/dyncast.
-    static bool classof(const DIEInteger *) { return true; }
     static bool classof(const DIEValue *I) { return I->getType() == isInteger; }
 
 #ifndef NDEBUG
@@ -286,7 +282,6 @@ namespace llvm {
     virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
 
     // Implement isa/cast/dyncast.
-    static bool classof(const DIELabel *)  { return true; }
     static bool classof(const DIEValue *L) { return L->getType() == isLabel; }
 
 #ifndef NDEBUG
@@ -313,7 +308,6 @@ namespace llvm {
     virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
 
     // Implement isa/cast/dyncast.
-    static bool classof(const DIEDelta *)  { return true; }
     static bool classof(const DIEValue *D) { return D->getType() == isDelta; }
 
 #ifndef NDEBUG
@@ -343,7 +337,6 @@ namespace llvm {
     }
 
     // Implement isa/cast/dyncast.
-    static bool classof(const DIEEntry *)  { return true; }
     static bool classof(const DIEValue *E) { return E->getType() == isEntry; }
 
 #ifndef NDEBUG
@@ -383,7 +376,6 @@ namespace llvm {
     virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
 
     // Implement isa/cast/dyncast.
-    static bool classof(const DIEBlock *)  { return true; }
     static bool classof(const DIEValue *E) { return E->getType() == isBlock; }
 
 #ifndef NDEBUG
diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
index 454a923..c193999 100644
--- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
@@ -12,10 +12,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "DwarfAccelTable.h"
-#include "DwarfDebug.h"
 #include "DIE.h"
-#include "llvm/ADT/Twine.h"
+#include "DwarfDebug.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCStreamer.h"
@@ -32,7 +32,7 @@ const char *DwarfAccelTable::Atom::AtomTypeString(enum AtomType AT) {
   case eAtomTypeTag: return "eAtomTypeTag";
   case eAtomTypeNameFlags: return "eAtomTypeNameFlags";
   case eAtomTypeTypeFlags: return "eAtomTypeTypeFlags";
-  } 
+  }
   llvm_unreachable("invalid AtomType!");
 }
 
@@ -133,8 +133,8 @@ void DwarfAccelTable::EmitHeader(AsmPrinter *Asm) {
   }
 }
 
-// Walk through and emit the buckets for the table. This will look
-// like a list of numbers of how many elements are in each bucket.
+// Walk through and emit the buckets for the table. Each index is
+// an offset into the list of hashes.
 void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) {
   unsigned index = 0;
   for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
@@ -155,7 +155,7 @@ void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) {
            HE = Buckets[i].end(); HI != HE; ++HI) {
       Asm->OutStreamer.AddComment("Hash in Bucket " + Twine(i));
       Asm->EmitInt32((*HI)->HashValue);
-    } 
+    }
   }
 }
 
@@ -181,7 +181,7 @@ void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) {
 // Walk through the buckets and emit the full data for each element in
 // the bucket. For the string case emit the dies and the various offsets.
 // Terminate each HashData bucket with 0.
-void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D) {
+void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfUnits *D) {
   uint64_t PrevHash = UINT64_MAX;
   for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
     for (HashList::const_iterator HI = Buckets[i].begin(),
@@ -190,7 +190,7 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D) {
       Asm->OutStreamer.EmitLabel((*HI)->Sym);
       Asm->OutStreamer.AddComment((*HI)->Str);
       Asm->EmitSectionOffset(D->getStringPoolEntry((*HI)->Str),
-                             D->getStringPool());
+                             D->getStringPoolSym());
       Asm->OutStreamer.AddComment("Num DIEs");
       Asm->EmitInt32((*HI)->Data.size());
       for (ArrayRef<HashDataContents*>::const_iterator
@@ -215,7 +215,7 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D) {
 
 // Emit the entire data structure to the output file.
 void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin,
-                           DwarfDebug *D) {
+                           DwarfUnits *D) {
   // Emit the header.
   EmitHeader(Asm);
 
@@ -258,7 +258,7 @@ void DwarfAccelTable::print(raw_ostream &O) {
     for (std::vector<HashData*>::const_iterator
            DI = Data.begin(), DE = Data.end(); DI != DE; ++DI)
       (*DI)->print(O);
-  
+
 
 }
 #endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
index 963b8cd..9915bca 100644
--- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
+++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
@@ -14,18 +14,18 @@
 #ifndef CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__
 #define CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__
 
-#include "llvm/ADT/StringMap.h"
+#include "DIE.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/FormattedStream.h"
-#include "DIE.h"
-#include <vector>
 #include <map>
+#include <vector>
 
 // The dwarf accelerator tables are an indirect hash table optimized
 // for null lookup rather than access to known data. They are output into
@@ -51,7 +51,7 @@
 // section contains all of the 32-bit hash values in contiguous memory, and
 // the offsets contain the offset into the data area for the particular
 // hash.
-// 
+//
 // For a lookup example, we could hash a function name and take it modulo the
 // number of buckets giving us our bucket. From there we take the bucket value
 // as an index into the hashes table and look at each successive hash as long
@@ -63,8 +63,8 @@ namespace llvm {
 
 class AsmPrinter;
 class DIE;
-class DwarfDebug;
-  
+class DwarfUnits;
+
 class DwarfAccelTable {
 
   enum HashFunctionType {
@@ -81,7 +81,7 @@ class DwarfAccelTable {
   // Helper function to compute the number of buckets needed based on
   // the number of unique hashes.
   void ComputeBucketCount (void);
-  
+
   struct TableHeader {
     uint32_t   magic;           // 'HASH' magic value to allow endian detection
     uint16_t   version;         // Version number.
@@ -94,7 +94,7 @@ class DwarfAccelTable {
     // Also written to disk is the implementation specific header data.
 
     static const uint32_t MagicHash = 0x48415348;
-    
+
     TableHeader (uint32_t data_len) :
       magic (MagicHash), version (1), hash_function (eHashFunctionDJB),
       bucket_count (0), hashes_count (0), header_data_len (data_len)
@@ -123,7 +123,7 @@ public:
   //
   // uint32_t die_offset_base
   // uint32_t atom_count
-  // atom_count Atoms  
+  // atom_count Atoms
   enum AtomType {
     eAtomTypeNULL       = 0u,
     eAtomTypeDIEOffset  = 1u,   // DIE offset, check form for encoding
@@ -138,12 +138,12 @@ public:
 
   enum TypeFlags {
     eTypeFlagClassMask = 0x0000000fu,
-    
+
     // Always set for C++, only set for ObjC if this is the
     // @implementation for a class.
     eTypeFlagClassIsImplementation  = ( 1u << 1 )
-  };  
-  
+  };
+
   // Make these public so that they can be used as a general interface to
   // the class.
   struct Atom {
@@ -237,15 +237,15 @@ private:
     #endif
   };
 
-  DwarfAccelTable(const DwarfAccelTable&); // DO NOT IMPLEMENT
-  void operator=(const DwarfAccelTable&);  // DO NOT IMPLEMENT
+  DwarfAccelTable(const DwarfAccelTable&) LLVM_DELETED_FUNCTION;
+  void operator=(const DwarfAccelTable&) LLVM_DELETED_FUNCTION;
 
   // Internal Functions
   void EmitHeader(AsmPrinter *);
   void EmitBuckets(AsmPrinter *);
   void EmitHashes(AsmPrinter *);
   void EmitOffsets(AsmPrinter *, MCSymbol *);
-  void EmitData(AsmPrinter *, DwarfDebug *D);
+  void EmitData(AsmPrinter *, DwarfUnits *D);
 
   // Allocator for HashData and HashDataContents.
   BumpPtrAllocator Allocator;
@@ -265,14 +265,14 @@ private:
   typedef std::vector<HashList> BucketList;
   BucketList Buckets;
   HashList Hashes;
-  
+
   // Public Implementation
  public:
   DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom>);
   ~DwarfAccelTable();
   void AddName(StringRef, DIE*, char = 0);
   void FinalizeTable(AsmPrinter *, const char *);
-  void Emit(AsmPrinter *, MCSymbol *, DwarfDebug *);
+  void Emit(AsmPrinter *, MCSymbol *, DwarfUnits *);
 #ifndef NDEBUG
   void print(raw_ostream &O);
   void dump() { print(dbgs()); }
diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index d975f1f..fec5ced 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -12,31 +12,31 @@
 //===----------------------------------------------------------------------===//
 
 #include "DwarfException.h"
-#include "llvm/Module.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/MC/MachineLocation.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Module.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/Dwarf.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FormattedStream.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Twine.h"
 using namespace llvm;
 
 DwarfCFIException::DwarfCFIException(AsmPrinter *A)
@@ -122,8 +122,9 @@ void DwarfCFIException::BeginFunction(const MachineFunction *MF) {
   const MCSymbol *Sym = TLOF.getCFIPersonalitySymbol(Per, Asm->Mang, MMI);
   Asm->OutStreamer.EmitCFIPersonality(Sym, PerEncoding);
 
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
-                                                Asm->getFunctionNumber()));
+  Asm->OutStreamer.EmitDebugLabel
+    (Asm->GetTempSymbol("eh_func_begin",
+                        Asm->getFunctionNumber()));
 
   // Provide LSDA information.
   if (!shouldEmitLSDA)
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index e585897..21cceaf 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -13,28 +13,29 @@
 
 #define DEBUG_TYPE "dwarfdebug"
 
-#include "DwarfAccelTable.h"
 #include "DwarfCompileUnit.h"
+#include "DwarfAccelTable.h"
 #include "DwarfDebug.h"
-#include "llvm/Constants.h"
+#include "llvm/ADT/APFloat.h"
 #include "llvm/DIBuilder.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Instructions.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/APFloat.h"
-#include "llvm/Support/ErrorHandling.h"
 
 using namespace llvm;
 
 /// CompileUnit - Compile unit constructor.
-CompileUnit::CompileUnit(unsigned I, unsigned L, DIE *D, AsmPrinter *A,
-                         DwarfDebug *DW)
-  : ID(I), Language(L), CUDie(D), Asm(A), DD(DW), IndexTyDie(0) {
+CompileUnit::CompileUnit(unsigned UID, unsigned L, DIE *D, AsmPrinter *A,
+                         DwarfDebug *DW, DwarfUnits *DWU)
+  : UniqueID(UID), Language(L), CUDie(D), Asm(A), DD(DW), DU(DWU),
+    IndexTyDie(0) {
   DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1);
 }
 
@@ -51,6 +52,50 @@ DIEEntry *CompileUnit::createDIEEntry(DIE *Entry) {
   return Value;
 }
 
+/// getDefaultLowerBound - Return the default lower bound for an array. If the
+/// DWARF version doesn't handle the language, return -1.
+int64_t CompileUnit::getDefaultLowerBound() const {
+  switch (Language) {
+  default:
+    break;
+
+  case dwarf::DW_LANG_C89:
+  case dwarf::DW_LANG_C99:
+  case dwarf::DW_LANG_C:
+  case dwarf::DW_LANG_C_plus_plus:
+  case dwarf::DW_LANG_ObjC:
+  case dwarf::DW_LANG_ObjC_plus_plus:
+    return 0;
+
+  case dwarf::DW_LANG_Fortran77:
+  case dwarf::DW_LANG_Fortran90:
+  case dwarf::DW_LANG_Fortran95:
+    return 1;
+
+  // The languages below have valid values only if the DWARF version >= 4.
+  case dwarf::DW_LANG_Java:
+  case dwarf::DW_LANG_Python:
+  case dwarf::DW_LANG_UPC:
+  case dwarf::DW_LANG_D:
+    if (dwarf::DWARF_VERSION >= 4)
+      return 0;
+    break;
+
+  case dwarf::DW_LANG_Ada83:
+  case dwarf::DW_LANG_Ada95:
+  case dwarf::DW_LANG_Cobol74:
+  case dwarf::DW_LANG_Cobol85:
+  case dwarf::DW_LANG_Modula2:
+  case dwarf::DW_LANG_Pascal83:
+  case dwarf::DW_LANG_PLI:
+    if (dwarf::DWARF_VERSION >= 4)
+      return 1;
+    break;
+  }
+
+  return -1;
+}
+
 /// addFlag - Add a flag that is true.
 void CompileUnit::addFlag(DIE *Die, unsigned Attribute) {
   if (!DD->useDarwinGDBCompat())
@@ -81,14 +126,37 @@ void CompileUnit::addSInt(DIE *Die, unsigned Attribute,
 
 /// addString - Add a string attribute data and value. We always emit a
 /// reference to the string pool instead of immediate strings so that DIEs have
-/// more predictable sizes.
+/// more predictable sizes. In the case of split dwarf we emit an index
+/// into another table which gets us the static offset into the string
+/// table.
 void CompileUnit::addString(DIE *Die, unsigned Attribute, StringRef String) {
-  MCSymbol *Symb = DD->getStringPoolEntry(String);
+  if (!DD->useSplitDwarf()) {
+    MCSymbol *Symb = DU->getStringPoolEntry(String);
+    DIEValue *Value;
+    if (Asm->needsRelocationsForDwarfStringPool())
+      Value = new (DIEValueAllocator) DIELabel(Symb);
+    else {
+      MCSymbol *StringPool = DU->getStringPoolSym();
+      Value = new (DIEValueAllocator) DIEDelta(Symb, StringPool);
+    }
+    Die->addValue(Attribute, dwarf::DW_FORM_strp, Value);
+  } else {
+    unsigned idx = DU->getStringPoolIndex(String);
+    DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx);
+    Die->addValue(Attribute, dwarf::DW_FORM_GNU_str_index, Value);
+  }
+}
+
+/// addLocalString - Add a string attribute data and value. This is guaranteed
+/// to be in the local string pool instead of indirected.
+void CompileUnit::addLocalString(DIE *Die, unsigned Attribute,
+                                 StringRef String) {
+  MCSymbol *Symb = DU->getStringPoolEntry(String);
   DIEValue *Value;
   if (Asm->needsRelocationsForDwarfStringPool())
     Value = new (DIEValueAllocator) DIELabel(Symb);
   else {
-    MCSymbol *StringPool = DD->getStringPool();
+    MCSymbol *StringPool = DU->getStringPoolSym();
     Value = new (DIEValueAllocator) DIEDelta(Symb, StringPool);
   }
   Die->addValue(Attribute, dwarf::DW_FORM_strp, Value);
@@ -132,11 +200,11 @@ void CompileUnit::addSourceLine(DIE *Die, DIVariable V) {
   // Verify variable.
   if (!V.Verify())
     return;
-  
+
   unsigned Line = V.getLineNumber();
   if (Line == 0)
     return;
-  unsigned FileID = DD->GetOrCreateSourceID(V.getContext().getFilename(),
+  unsigned FileID = DD->getOrCreateSourceID(V.getContext().getFilename(),
                                             V.getContext().getDirectory());
   assert(FileID && "Invalid file id");
   addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
@@ -153,7 +221,7 @@ void CompileUnit::addSourceLine(DIE *Die, DIGlobalVariable G) {
   unsigned Line = G.getLineNumber();
   if (Line == 0)
     return;
-  unsigned FileID = DD->GetOrCreateSourceID(G.getFilename(), G.getDirectory());
+  unsigned FileID = DD->getOrCreateSourceID(G.getFilename(), G.getDirectory());
   assert(FileID && "Invalid file id");
   addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
   addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -171,7 +239,7 @@ void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) {
   if (Line == 0)
     return;
 
-  unsigned FileID = DD->GetOrCreateSourceID(SP.getFilename(),
+  unsigned FileID = DD->getOrCreateSourceID(SP.getFilename(),
                                             SP.getDirectory());
   assert(FileID && "Invalid file id");
   addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
@@ -188,7 +256,7 @@ void CompileUnit::addSourceLine(DIE *Die, DIType Ty) {
   unsigned Line = Ty.getLineNumber();
   if (Line == 0)
     return;
-  unsigned FileID = DD->GetOrCreateSourceID(Ty.getFilename(),
+  unsigned FileID = DD->getOrCreateSourceID(Ty.getFilename(),
                                             Ty.getDirectory());
   assert(FileID && "Invalid file id");
   addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
@@ -206,7 +274,7 @@ void CompileUnit::addSourceLine(DIE *Die, DIObjCProperty Ty) {
   if (Line == 0)
     return;
   DIFile File = Ty.getFile();
-  unsigned FileID = DD->GetOrCreateSourceID(File.getFilename(),
+  unsigned FileID = DD->getOrCreateSourceID(File.getFilename(),
                                             File.getDirectory());
   assert(FileID && "Invalid file id");
   addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
@@ -225,15 +293,15 @@ void CompileUnit::addSourceLine(DIE *Die, DINameSpace NS) {
     return;
   StringRef FN = NS.getFilename();
 
-  unsigned FileID = DD->GetOrCreateSourceID(FN, NS.getDirectory());
+  unsigned FileID = DD->getOrCreateSourceID(FN, NS.getDirectory());
   assert(FileID && "Invalid file id");
   addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
   addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
 }
 
-/// addVariableAddress - Add DW_AT_location attribute for a 
+/// addVariableAddress - Add DW_AT_location attribute for a
 /// DbgVariable based on provided MachineLocation.
-void CompileUnit::addVariableAddress(DbgVariable *&DV, DIE *Die, 
+void CompileUnit::addVariableAddress(DbgVariable *&DV, DIE *Die,
                                      MachineLocation Location) {
   if (DV->variableHasComplexAddress())
     addComplexAddress(DV, Die, dwarf::DW_AT_location, Location);
@@ -492,7 +560,7 @@ bool CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO,
     case 64: Form = dwarf::DW_FORM_data8; break;
     default: break;
   }
-  SignedConstant ? addSInt(Block, 0, Form, MO.getImm()) 
+  SignedConstant ? addSInt(Block, 0, Form, MO.getImm())
     : addUInt(Block, 0, Form, MO.getImm());
 
   addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
@@ -510,7 +578,7 @@ bool CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) {
   const char *FltPtr = (const char*)FltVal.getRawData();
 
   int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte.
-  bool LittleEndian = Asm->getTargetData().isLittleEndian();
+  bool LittleEndian = Asm->getDataLayout().isLittleEndian();
   int Incr = (LittleEndian ? 1 : -1);
   int Start = (LittleEndian ? 0 : NumBytes - 1);
   int Stop = (LittleEndian ? NumBytes : -1);
@@ -535,7 +603,7 @@ bool CompileUnit::addConstantValue(DIE *Die, const ConstantInt *CI,
     case 16: form = dwarf::DW_FORM_data2; break;
     case 32: form = dwarf::DW_FORM_data4; break;
     case 64: form = dwarf::DW_FORM_data8; break;
-    default: 
+    default:
       form = Unsigned ? dwarf::DW_FORM_udata : dwarf::DW_FORM_sdata;
     }
     if (Unsigned)
@@ -552,7 +620,7 @@ bool CompileUnit::addConstantValue(DIE *Die, const ConstantInt *CI,
   const uint64_t *Ptr64 = Val.getRawData();
 
   int NumBytes = Val.getBitWidth() / 8; // 8 bits per byte.
-  bool LittleEndian = Asm->getTargetData().isLittleEndian();
+  bool LittleEndian = Asm->getDataLayout().isLittleEndian();
 
   // Output the constant to DWARF one byte at a time.
   for (int i = 0; i < NumBytes; i++) {
@@ -635,7 +703,7 @@ DIE *CompileUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
                      DwarfAccelTable::eTypeFlagClassIsImplementation : 0;
     addAccelType(Ty.getName(), std::make_pair(TyDIE, Flags));
   }
-  
+
   addToContextOwner(TyDIE, Ty.getContext());
   return TyDIE;
 }
@@ -670,8 +738,8 @@ void CompileUnit::addType(DIE *Entity, DIType Ty, unsigned Attribute) {
 ///
 void CompileUnit::addGlobalType(DIType Ty) {
   DIDescriptor Context = Ty.getContext();
-  if (Ty.isCompositeType() && !Ty.getName().empty() && !Ty.isForwardDecl() 
-      && (!Context || Context.isCompileUnit() || Context.isFile() 
+  if (Ty.isCompositeType() && !Ty.getName().empty() && !Ty.isForwardDecl()
+      && (!Context || Context.isCompileUnit() || Context.isFile()
           || Context.isNameSpace()))
     if (DIEEntry *Entry = getDIEEntry(Ty))
       GlobalTypes[Ty.getName()] = Entry->getEntry();
@@ -739,6 +807,9 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) {
   if (Size && Tag != dwarf::DW_TAG_pointer_type)
     addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
 
+  if (Tag == dwarf::DW_TAG_ptr_to_member_type)
+      addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4,
+                  getOrCreateTypeDIE(DTy.getClassType()));
   // Add source line info if available and TyDesc is not a forward declaration.
   if (!DTy.isForwardDecl())
     addSourceLine(&Buffer, DTy);
@@ -754,7 +825,6 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
   Buffer.setTag(Tag);
 
   switch (Tag) {
-  case dwarf::DW_TAG_vector_type:
   case dwarf::DW_TAG_array_type:
     constructArrayTypeDIE(Buffer, &CTy);
     break;
@@ -830,7 +900,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
         else if (SP.isPrivate())
           addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
                   dwarf::DW_ACCESS_private);
-        else 
+        else
           addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_public);
         if (SP.isExplicit())
@@ -878,7 +948,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
         if (Property.isNonAtomicObjCProperty())
           PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_nonatomic;
         if (PropertyAttributes)
-          addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, 0, 
+          addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, 0,
                  PropertyAttributes);
 
         DIEEntry *Entry = getDIEEntry(Element);
@@ -951,7 +1021,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
   }
 }
 
-/// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE 
+/// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE
 /// for the given DITemplateTypeParameter.
 DIE *
 CompileUnit::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) {
@@ -965,7 +1035,7 @@ CompileUnit::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) {
   return ParamDIE;
 }
 
-/// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE 
+/// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE
 /// for the given DITemplateValueParameter.
 DIE *
 CompileUnit::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV){
@@ -977,7 +1047,7 @@ CompileUnit::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV){
   addType(ParamDIE, TPV.getType());
   if (!TPV.getName().empty())
     addString(ParamDIE, dwarf::DW_AT_name, TPV.getName());
-  addUInt(ParamDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, 
+  addUInt(ParamDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata,
           TPV.getValue());
   return ParamDIE;
 }
@@ -1095,7 +1165,7 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
 
   if (!SP.isDefinition()) {
     addFlag(SPDie, dwarf::DW_AT_declaration);
-    
+
     // Add arguments. Do not add arguments for subprogram definition. They will
     // be handled while processing variables.
     DICompositeType SPTy = SP.getType();
@@ -1105,7 +1175,7 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
     if (SPTag == dwarf::DW_TAG_subroutine_type)
       for (unsigned i = 1, N =  Args.getNumElements(); i < N; ++i) {
         DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
-        DIType ATy = DIType(DIType(Args.getElement(i)));
+        DIType ATy = DIType(Args.getElement(i));
         addType(Arg, ATy);
         if (ATy.isArtificial())
           addFlag(Arg, dwarf::DW_AT_artificial);
@@ -1213,7 +1283,7 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
     } else {
       addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
     }
-  } else if (const ConstantInt *CI = 
+  } else if (const ConstantInt *CI =
              dyn_cast_or_null<ConstantInt>(GV.getConstant()))
     addConstantValue(VariableDIE, CI, GTy.isUnsignedDIType());
   else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) {
@@ -1226,8 +1296,8 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
                     Asm->Mang->getSymbol(cast<GlobalValue>(Ptr)));
     addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
     SmallVector<Value*, 3> Idx(CE->op_begin()+1, CE->op_end());
-    addUInt(Block, 0, dwarf::DW_FORM_udata, 
-                   Asm->getTargetData().getIndexedOffset(Ptr->getType(), Idx));
+    addUInt(Block, 0, dwarf::DW_FORM_udata,
+                   Asm->getDataLayout().getIndexedOffset(Ptr->getType(), Idx));
     addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
     addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
   }
@@ -1250,22 +1320,25 @@ void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR,
                                        DIE *IndexTy) {
   DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type);
   addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy);
-  uint64_t L = SR.getLo();
-  uint64_t H = SR.getHi();
 
-  // The L value defines the lower bounds which is typically zero for C/C++. The
-  // H value is the upper bounds.  Values are 64 bit.  H - L + 1 is the size
-  // of the array. If L > H then do not emit DW_AT_lower_bound and 
-  // DW_AT_upper_bound attributes. If L is zero and H is also zero then the
-  // array has one element and in such case do not emit lower bound.
+  // The LowerBound value defines the lower bounds which is typically zero for
+  // C/C++. The Count value is the number of elements.  Values are 64 bit. If
+  // Count == -1 then the array is unbounded and we do not emit
+  // DW_AT_lower_bound and DW_AT_upper_bound attributes. If LowerBound == 0 and
+  // Count == 0, then the array has zero elements in which case we do not emit
+  // an upper bound.
+  int64_t LowerBound = SR.getLo();
+  int64_t DefaultLowerBound = getDefaultLowerBound();
+  int64_t Count = SR.getCount();
+
+  if (DefaultLowerBound == -1 || LowerBound != DefaultLowerBound)
+    addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, LowerBound);
+
+  if (Count != -1 && Count != 0)
+    // FIXME: An unbounded array should reference the expression that defines
+    // the array.
+    addUInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, LowerBound + Count - 1);
 
-  if (L > H) {
-    Buffer.addChild(DW_Subrange);
-    return;
-  }
-  if (L)
-    addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L);
-  addUInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H);
   Buffer.addChild(DW_Subrange);
 }
 
@@ -1273,7 +1346,7 @@ void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR,
 void CompileUnit::constructArrayTypeDIE(DIE &Buffer,
                                         DICompositeType *CTy) {
   Buffer.setTag(dwarf::DW_TAG_array_type);
-  if (CTy->getTag() == dwarf::DW_TAG_vector_type)
+  if (CTy->isVector())
     addFlag(&Buffer, dwarf::DW_AT_GNU_vector);
 
   // Emit derived type.
@@ -1281,10 +1354,13 @@ void CompileUnit::constructArrayTypeDIE(DIE &Buffer,
   DIArray Elements = CTy->getTypeArray();
 
   // Get an anonymous type for index type.
+  // FIXME: This type should be passed down from the front end
+  // as different languages may have different sizes for indexes.
   DIE *IdxTy = getIndexTyDie();
   if (!IdxTy) {
     // Construct an anonymous type for index type.
     IdxTy = new DIE(dwarf::DW_TAG_base_type);
+    addString(IdxTy, dwarf::DW_AT_name, "int");
     addUInt(IdxTy, dwarf::DW_AT_byte_size, 0, sizeof(int32_t));
     addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
             dwarf::DW_ATE_signed);
@@ -1327,8 +1403,6 @@ void CompileUnit::constructContainingTypeDIEs() {
 /// constructVariableDIE - Construct a DIE for the given DbgVariable.
 DIE *CompileUnit::constructVariableDIE(DbgVariable *DV, bool isScopeAbstract) {
   StringRef Name = DV->getName();
-  if (Name.empty())
-    return NULL;
 
   // Translate tag to proper Dwarf tag.
   unsigned Tag = DV->getTag();
@@ -1376,20 +1450,20 @@ DIE *CompileUnit::constructVariableDIE(DbgVariable *DV, bool isScopeAbstract) {
             TRI->getFrameRegister(*Asm->MF) == RegOp.getReg()) {
           unsigned FrameReg = 0;
           const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
-          int Offset = 
-            TFI->getFrameIndexReference(*Asm->MF, 
-                                        DVInsn->getOperand(1).getImm(), 
+          int Offset =
+            TFI->getFrameIndexReference(*Asm->MF,
+                                        DVInsn->getOperand(1).getImm(),
                                         FrameReg);
           MachineLocation Location(FrameReg, Offset);
           addVariableAddress(DV, VariableDie, Location);
-          
+
         } else if (RegOp.getReg())
-          addVariableAddress(DV, VariableDie, 
+          addVariableAddress(DV, VariableDie,
                                          MachineLocation(RegOp.getReg()));
         updated = true;
       }
       else if (DVInsn->getOperand(0).isImm())
-        updated = 
+        updated =
           addConstantValue(VariableDie, DVInsn->getOperand(0),
                                        DV->getType());
       else if (DVInsn->getOperand(0).isFPImm())
@@ -1397,11 +1471,11 @@ DIE *CompileUnit::constructVariableDIE(DbgVariable *DV, bool isScopeAbstract) {
           addConstantFPValue(VariableDie, DVInsn->getOperand(0));
       else if (DVInsn->getOperand(0).isCImm())
         updated =
-          addConstantValue(VariableDie, 
+          addConstantValue(VariableDie,
                                        DVInsn->getOperand(0).getCImm(),
                                        DV->getType().isUnsignedDIType());
     } else {
-      addVariableAddress(DV, VariableDie, 
+      addVariableAddress(DV, VariableDie,
                                      Asm->getDebugValueLocation(DVInsn));
       updated = true;
     }
@@ -1419,7 +1493,7 @@ DIE *CompileUnit::constructVariableDIE(DbgVariable *DV, bool isScopeAbstract) {
     if (FI != ~0) {
       unsigned FrameReg = 0;
       const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
-      int Offset = 
+      int Offset =
         TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
       MachineLocation Location(FrameReg, Offset);
       addVariableAddress(DV, VariableDie, Location);
@@ -1459,7 +1533,7 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) {
     Offset -= FieldOffset;
 
     // Maybe we need to work from the other end.
-    if (Asm->getTargetData().isLittleEndian())
+    if (Asm->getDataLayout().isLittleEndian())
       Offset = FieldSize - (Offset + Size);
     addUInt(MemberDie, dwarf::DW_AT_bit_offset, 0, Offset);
 
@@ -1499,7 +1573,7 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) {
     addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_private);
   // Otherwise C++ member and base classes are considered public.
-  else 
+  else
     addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_public);
   if (DT.isVirtual())
@@ -1509,9 +1583,12 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) {
   // Objective-C properties.
   if (MDNode *PNode = DT.getObjCProperty())
     if (DIEEntry *PropertyDie = getDIEEntry(PNode))
-      MemberDie->addValue(dwarf::DW_AT_APPLE_property, dwarf::DW_FORM_ref4, 
+      MemberDie->addValue(dwarf::DW_AT_APPLE_property, dwarf::DW_FORM_ref4,
                           PropertyDie);
 
+  if (DT.isArtificial())
+    addFlag(MemberDie, dwarf::DW_AT_artificial);
+
   // This is only for backward compatibility.
   StringRef PropertyName = DT.getObjCPropertyName();
   if (!PropertyName.empty()) {
@@ -1536,7 +1613,7 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) {
     if (DT.isNonAtomicObjCProperty())
       PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_nonatomic;
     if (PropertyAttributes)
-      addUInt(MemberDie, dwarf::DW_AT_APPLE_property_attribute, 0, 
+      addUInt(MemberDie, dwarf::DW_AT_APPLE_property_attribute, 0,
               PropertyAttributes);
   }
   return MemberDie;
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 22401fe..f210dcc 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -15,14 +15,15 @@
 #define CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
 
 #include "DIE.h"
-#include "llvm/DebugInfo.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/DebugInfo.h"
 
 namespace llvm {
 
 class DwarfDebug;
+class DwarfUnits;
 class MachineLocation;
 class MachineOperand;
 class ConstantInt;
@@ -32,9 +33,9 @@ class DbgVariable;
 /// CompileUnit - This dwarf writer support class manages information associated
 /// with a source file.
 class CompileUnit {
-  /// ID - File identifier for source.
+  /// UniqueID - a numeric ID unique among all CUs in the module
   ///
-  unsigned ID;
+  unsigned UniqueID;
 
   /// Language - The DW_AT_language of the compile unit
   ///
@@ -47,7 +48,9 @@ class CompileUnit {
   /// Asm - Target of Dwarf emission.
   AsmPrinter *Asm;
 
+  // Holders for some common dwarf information.
   DwarfDebug *DD;
+  DwarfUnits *DU;
 
   /// IndexTyDie - An anonymous type for index type.  Owned by CUDie.
   DIE *IndexTyDie;
@@ -79,12 +82,17 @@ class CompileUnit {
   /// corresponds to the MDNode mapped with the subprogram DIE.
   DenseMap<DIE *, const MDNode *> ContainingTypeMap;
 
+  /// getLowerBoundDefault - Return the default lower bound for an array. If the
+  /// DWARF version doesn't handle the language, return -1.
+  int64_t getDefaultLowerBound() const;
+
 public:
-  CompileUnit(unsigned I, unsigned L, DIE *D, AsmPrinter *A, DwarfDebug *DW);
+  CompileUnit(unsigned UID, unsigned L, DIE *D, AsmPrinter *A, DwarfDebug *DW,
+              DwarfUnits *);
   ~CompileUnit();
 
   // Accessors.
-  unsigned getID()                  const { return ID; }
+  unsigned getUniqueID()            const { return UniqueID; }
   unsigned getLanguage()            const { return Language; }
   DIE* getCUDie()                   const { return CUDie.get(); }
   const StringMap<DIE*> &getGlobalTypes() const { return GlobalTypes; }
@@ -102,7 +110,7 @@ public:
   &getAccelTypes() const {
     return AccelTypes;
   }
-  
+
   /// hasContent - Return true if this compile unit has something to write out.
   ///
   bool hasContent() const { return !CUDie->getChildren().empty(); }
@@ -129,12 +137,12 @@ public:
     std::vector<std::pair<DIE*, unsigned > > &DIEs = AccelTypes[Name];
     DIEs.push_back(Die);
   }
-  
+
   /// getDIE - Returns the debug information entry map slot for the
   /// specified debug variable.
   DIE *getDIE(const MDNode *N) { return MDNodeToDieMap.lookup(N); }
 
-  DIEBlock *getDIEBlock() { 
+  DIEBlock *getDIEBlock() {
     return new (DIEValueAllocator) DIEBlock();
   }
 
@@ -174,11 +182,10 @@ public:
   void setIndexTyDie(DIE *D) {
     IndexTyDie = D;
   }
-public:
 
   /// addFlag - Add a flag that is true to the DIE.
   void addFlag(DIE *Die, unsigned Attribute);
-  
+
   /// addUInt - Add an unsigned integer attribute data and value.
   ///
   void addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer);
@@ -191,6 +198,10 @@ public:
   ///
   void addString(DIE *Die, unsigned Attribute, const StringRef Str);
 
+  /// addLocalString - Add a string attribute data and value.
+  ///
+  void addLocalString(DIE *Die, unsigned Attribute, const StringRef Str);
+
   /// addLabel - Add a Dwarf label attribute data and value.
   ///
   void addLabel(DIE *Die, unsigned Attribute, unsigned Form,
@@ -204,7 +215,7 @@ public:
   /// addDIEEntry - Add a DIE attribute data and value.
   ///
   void addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry);
-  
+
   /// addBlock - Add block data.
   ///
   void addBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block);
@@ -257,7 +268,7 @@ public:
   void addBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute,
                             const MachineLocation &Location);
 
-  /// addVariableAddress - Add DW_AT_location attribute for a 
+  /// addVariableAddress - Add DW_AT_location attribute for a
   /// DbgVariable based on provided MachineLocation.
   void addVariableAddress(DbgVariable *&DV, DIE *Die, MachineLocation Location);
 
@@ -279,12 +290,12 @@ public:
   /// given DIType.
   DIE *getOrCreateTypeDIE(const MDNode *N);
 
-  /// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE 
+  /// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE
   /// for the given DITemplateTypeParameter.
   DIE *getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP);
 
-  /// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE 
-  /// for the given DITemplateValueParameter.
+  /// getOrCreateTemplateValueParameterDIE - Find existing DIE or create
+  /// new DIE for the given DITemplateValueParameter.
   DIE *getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TVP);
 
   /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug
@@ -312,7 +323,7 @@ public:
   void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy);
 
   /// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
-  void constructArrayTypeDIE(DIE &Buffer, 
+  void constructArrayTypeDIE(DIE &Buffer,
                              DICompositeType *CTy);
 
   /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 946ac35..66a5a6d 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -16,34 +16,34 @@
 #include "DIE.h"
 #include "DwarfAccelTable.h"
 #include "DwarfCompileUnit.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/DIBuilder.h"
-#include "llvm/Module.h"
-#include "llvm/Instructions.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/DIBuilder.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Triple.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/Timer.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print",
@@ -78,6 +78,15 @@ static cl::opt<DefaultOnOff> DarwinGDBCompat("darwin-gdb-compat", cl::Hidden,
                 clEnumValEnd),
      cl::init(Default));
 
+static cl::opt<DefaultOnOff> SplitDwarf("split-dwarf", cl::Hidden,
+     cl::desc("Output prototype dwarf split debug info."),
+     cl::values(
+                clEnumVal(Default, "Default for platform"),
+                clEnumVal(Enable, "Enabled"),
+                clEnumVal(Disable, "Disabled"),
+                clEnumValEnd),
+     cl::init(Default));
+
 namespace {
   const char *DWARFGroupName = "DWARF Emission";
   const char *DbgTimerName = "DWARF Debug Writer";
@@ -85,8 +94,8 @@ namespace {
 
 //===----------------------------------------------------------------------===//
 
-/// Configuration values for initial hash set sizes (log2).
-///
+// Configuration values for initial hash set sizes (log2).
+//
 static const unsigned InitAbbreviationsSetSize = 9; // log2(512)
 
 namespace llvm {
@@ -102,19 +111,19 @@ DIType DbgVariable::getType() const {
        either the struct, or a pointer to the struct, as its type.  This
        is necessary for various behind-the-scenes things the compiler
        needs to do with by-reference variables in blocks.
-       
+
        However, as far as the original *programmer* is concerned, the
        variable should still have type 'SomeType', as originally declared.
-       
+
        The following function dives into the __Block_byref_x_VarName
        struct to find the original type of the variable.  This will be
        passed back to the code generating the type for the Debug
        Information Entry for the variable 'VarName'.  'VarName' will then
        have the original type 'SomeType' in its debug information.
-       
+
        The original type 'SomeType' will be the type of the field named
        'VarName' inside the __Block_byref_x_VarName struct.
-       
+
        NOTE: In order for this to not completely fail on the debugger
        side, the Debug Information Entry for the variable VarName needs to
        have a DW_AT_location that tells the debugger how to unwind through
@@ -122,15 +131,15 @@ DIType DbgVariable::getType() const {
        value of the variable.  The function addBlockByrefType does this.  */
     DIType subType = Ty;
     unsigned tag = Ty.getTag();
-    
+
     if (tag == dwarf::DW_TAG_pointer_type) {
       DIDerivedType DTy = DIDerivedType(Ty);
       subType = DTy.getTypeDerivedFrom();
     }
-    
+
     DICompositeType blockStruct = DICompositeType(subType);
     DIArray Elements = blockStruct.getTypeArray();
-    
+
     for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
       DIDescriptor Element = Elements.getElement(i);
       DIDerivedType DT = DIDerivedType(Element);
@@ -146,45 +155,56 @@ DIType DbgVariable::getType() const {
 DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
   : Asm(A), MMI(Asm->MMI), FirstCU(0),
     AbbreviationsSet(InitAbbreviationsSetSize),
-    SourceIdMap(DIEValueAllocator), StringPool(DIEValueAllocator),
-    PrevLabel(NULL) {
-  NextStringPoolNumber = 0;
+    SourceIdMap(DIEValueAllocator),
+    PrevLabel(NULL), GlobalCUIndexCount(0),
+    InfoHolder(A, &AbbreviationsSet, &Abbreviations, "info_string",
+               DIEValueAllocator),
+    SkeletonCU(0),
+    SkeletonAbbrevSet(InitAbbreviationsSetSize),
+    SkeletonHolder(A, &SkeletonAbbrevSet, &SkeletonAbbrevs, "skel_string",
+                   DIEValueAllocator) {
 
   DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0;
   DwarfStrSectionSym = TextSectionSym = 0;
   DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0;
+  DwarfAbbrevDWOSectionSym = DwarfStrDWOSectionSym = 0;
   FunctionBeginSym = FunctionEndSym = 0;
 
   // Turn on accelerator tables and older gdb compatibility
   // for Darwin.
-  bool isDarwin = Triple(M->getTargetTriple()).isOSDarwin();
+  bool IsDarwin = Triple(M->getTargetTriple()).isOSDarwin();
   if (DarwinGDBCompat == Default) {
-    if (isDarwin)
-      isDarwinGDBCompat = true;
+    if (IsDarwin)
+      IsDarwinGDBCompat = true;
     else
-      isDarwinGDBCompat = false;
+      IsDarwinGDBCompat = false;
   } else
-    isDarwinGDBCompat = DarwinGDBCompat == Enable ? true : false;
+    IsDarwinGDBCompat = DarwinGDBCompat == Enable ? true : false;
 
   if (DwarfAccelTables == Default) {
-    if (isDarwin)
-      hasDwarfAccelTables = true;
+    if (IsDarwin)
+      HasDwarfAccelTables = true;
     else
-      hasDwarfAccelTables = false;
+      HasDwarfAccelTables = false;
   } else
-    hasDwarfAccelTables = DwarfAccelTables == Enable ? true : false;
+    HasDwarfAccelTables = DwarfAccelTables == Enable ? true : false;
+
+  if (SplitDwarf == Default)
+    HasSplitDwarf = false;
+  else
+    HasSplitDwarf = SplitDwarf == Enable ? true : false;
 
   {
     NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
-    beginModule(M);
+    beginModule();
   }
 }
 DwarfDebug::~DwarfDebug() {
 }
 
-/// EmitSectionSym - Switch to the specified MCSection and emit an assembler
-/// temporary label to it if SymbolStem is specified.
-static MCSymbol *EmitSectionSym(AsmPrinter *Asm, const MCSection *Section,
+// Switch to the specified MCSection and emit an assembler
+// temporary label to it if SymbolStem is specified.
+static MCSymbol *emitSectionSym(AsmPrinter *Asm, const MCSection *Section,
                                 const char *SymbolStem = 0) {
   Asm->OutStreamer.SwitchSection(Section);
   if (!SymbolStem) return 0;
@@ -194,44 +214,55 @@ static MCSymbol *EmitSectionSym(AsmPrinter *Asm, const MCSection *Section,
   return TmpSym;
 }
 
-MCSymbol *DwarfDebug::getStringPool() {
-  return Asm->GetTempSymbol("section_str");
+MCSymbol *DwarfUnits::getStringPoolSym() {
+  return Asm->GetTempSymbol(StringPref);
 }
 
-MCSymbol *DwarfDebug::getStringPoolEntry(StringRef Str) {
-  std::pair<MCSymbol*, unsigned> &Entry = StringPool[Str];
+MCSymbol *DwarfUnits::getStringPoolEntry(StringRef Str) {
+  std::pair<MCSymbol*, unsigned> &Entry =
+    StringPool.GetOrCreateValue(Str).getValue();
   if (Entry.first) return Entry.first;
 
   Entry.second = NextStringPoolNumber++;
-  return Entry.first = Asm->GetTempSymbol("string", Entry.second);
+  return Entry.first = Asm->GetTempSymbol(StringPref, Entry.second);
 }
 
-/// assignAbbrevNumber - Define a unique number for the abbreviation.
-///
-void DwarfDebug::assignAbbrevNumber(DIEAbbrev &Abbrev) {
+unsigned DwarfUnits::getStringPoolIndex(StringRef Str) {
+  std::pair<MCSymbol*, unsigned> &Entry =
+    StringPool.GetOrCreateValue(Str).getValue();
+  if (Entry.first) return Entry.second;
+
+  Entry.second = NextStringPoolNumber++;
+  Entry.first = Asm->GetTempSymbol(StringPref, Entry.second);
+  return Entry.second;
+}
+
+// Define a unique number for the abbreviation.
+//
+void DwarfUnits::assignAbbrevNumber(DIEAbbrev &Abbrev) {
   // Profile the node so that we can make it unique.
   FoldingSetNodeID ID;
   Abbrev.Profile(ID);
 
   // Check the set for priors.
-  DIEAbbrev *InSet = AbbreviationsSet.GetOrInsertNode(&Abbrev);
+  DIEAbbrev *InSet = AbbreviationsSet->GetOrInsertNode(&Abbrev);
 
   // If it's newly added.
   if (InSet == &Abbrev) {
     // Add to abbreviation list.
-    Abbreviations.push_back(&Abbrev);
+    Abbreviations->push_back(&Abbrev);
 
     // Assign the vector position + 1 as its number.
-    Abbrev.setNumber(Abbreviations.size());
+    Abbrev.setNumber(Abbreviations->size());
   } else {
     // Assign existing abbreviation number.
     Abbrev.setNumber(InSet->getNumber());
   }
 }
 
-/// getRealLinkageName - If special LLVM prefix that is used to inform the asm
-/// printer to not emit usual symbol prefix before the symbol name is used then
-/// return linkage name after skipping this special LLVM prefix.
+// If special LLVM prefix that is used to inform the asm
+// printer to not emit usual symbol prefix before the symbol name is used then
+// return linkage name after skipping this special LLVM prefix.
 static StringRef getRealLinkageName(StringRef LinkageName) {
   char One = '\1';
   if (LinkageName.startswith(StringRef(&One, 1)))
@@ -275,7 +306,7 @@ static StringRef getObjCMethodName(StringRef In) {
 static void addSubprogramNames(CompileUnit *TheCU, DISubprogram SP,
                                DIE* Die) {
   if (!SP.isDefinition()) return;
-  
+
   TheCU->addAccelName(SP.getName(), Die);
 
   // If the linkage name is different than the name, go ahead and output
@@ -296,10 +327,9 @@ static void addSubprogramNames(CompileUnit *TheCU, DISubprogram SP,
   }
 }
 
-/// updateSubprogramScopeDIE - Find DIE for the given subprogram and
-/// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes.
-/// If there are global variables in this scope then create and insert
-/// DIEs for these variables.
+// Find DIE for the given subprogram and attach appropriate DW_AT_low_pc
+// and DW_AT_high_pc attributes. If there are global variables in this
+// scope then create and insert DIEs for these variables.
 DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU,
                                           const MDNode *SPNode) {
   DIE *SPDie = SPCU->getDIE(SPNode);
@@ -307,44 +337,51 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU,
   assert(SPDie && "Unable to find subprogram DIE!");
   DISubprogram SP(SPNode);
 
-  DISubprogram SPDecl = SP.getFunctionDeclaration();
-  if (!SPDecl.isSubprogram()) {
-    // There is not any need to generate specification DIE for a function
-    // defined at compile unit level. If a function is defined inside another
-    // function then gdb prefers the definition at top level and but does not
-    // expect specification DIE in parent function. So avoid creating
-    // specification DIE for a function defined inside a function.
-    if (SP.isDefinition() && !SP.getContext().isCompileUnit() &&
-        !SP.getContext().isFile() &&
-        !isSubprogramContext(SP.getContext())) {
-      SPCU->addFlag(SPDie, dwarf::DW_AT_declaration);
-      
-      // Add arguments.
-      DICompositeType SPTy = SP.getType();
-      DIArray Args = SPTy.getTypeArray();
-      unsigned SPTag = SPTy.getTag();
-      if (SPTag == dwarf::DW_TAG_subroutine_type)
-        for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
-          DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
-          DIType ATy = DIType(DIType(Args.getElement(i)));
-          SPCU->addType(Arg, ATy);
-          if (ATy.isArtificial())
-            SPCU->addFlag(Arg, dwarf::DW_AT_artificial);
-          SPDie->addChild(Arg);
-        }
-      DIE *SPDeclDie = SPDie;
-      SPDie = new DIE(dwarf::DW_TAG_subprogram);
-      SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4,
-                        SPDeclDie);
-      SPCU->addDie(SPDie);
-    }
-  }
-  // Pick up abstract subprogram DIE.
+  // If we're updating an abstract DIE, then we will be adding the children and
+  // object pointer later on. But what we don't want to do is process the
+  // concrete DIE twice.
   if (DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode)) {
+    // Pick up abstract subprogram DIE.
     SPDie = new DIE(dwarf::DW_TAG_subprogram);
     SPCU->addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin,
                       dwarf::DW_FORM_ref4, AbsSPDIE);
     SPCU->addDie(SPDie);
+  } else {
+    DISubprogram SPDecl = SP.getFunctionDeclaration();
+    if (!SPDecl.isSubprogram()) {
+      // There is not any need to generate specification DIE for a function
+      // defined at compile unit level. If a function is defined inside another
+      // function then gdb prefers the definition at top level and but does not
+      // expect specification DIE in parent function. So avoid creating
+      // specification DIE for a function defined inside a function.
+      if (SP.isDefinition() && !SP.getContext().isCompileUnit() &&
+          !SP.getContext().isFile() &&
+          !isSubprogramContext(SP.getContext())) {
+        SPCU->addFlag(SPDie, dwarf::DW_AT_declaration);
+
+        // Add arguments.
+        DICompositeType SPTy = SP.getType();
+        DIArray Args = SPTy.getTypeArray();
+        unsigned SPTag = SPTy.getTag();
+        if (SPTag == dwarf::DW_TAG_subroutine_type)
+          for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
+            DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
+            DIType ATy = DIType(Args.getElement(i));
+            SPCU->addType(Arg, ATy);
+            if (ATy.isArtificial())
+              SPCU->addFlag(Arg, dwarf::DW_AT_artificial);
+            if (ATy.isObjectPointer())
+              SPCU->addDIEEntry(SPDie, dwarf::DW_AT_object_pointer,
+                                dwarf::DW_FORM_ref4, Arg);
+            SPDie->addChild(Arg);
+          }
+        DIE *SPDeclDie = SPDie;
+        SPDie = new DIE(dwarf::DW_TAG_subprogram);
+        SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification,
+                          dwarf::DW_FORM_ref4, SPDeclDie);
+        SPCU->addDie(SPDie);
+      }
+    }
   }
 
   SPCU->addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
@@ -358,13 +395,13 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU,
   // Add name to the name table, we do this here because we're guaranteed
   // to have concrete versions of our DW_TAG_subprogram nodes.
   addSubprogramNames(SPCU, SP, SPDie);
-  
+
   return SPDie;
 }
 
-/// constructLexicalScope - Construct new DW_TAG_lexical_block
-/// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels.
-DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU, 
+// Construct new DW_TAG_lexical_block for this scope and attach
+// DW_AT_low_pc/DW_AT_high_pc labels.
+DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU,
                                           LexicalScope *Scope) {
   DIE *ScopeDIE = new DIE(dwarf::DW_TAG_lexical_block);
   if (Scope->isAbstractScope())
@@ -380,8 +417,8 @@ DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU,
     // .debug_range as a uint, size 4, for now. emitDIE will handle
     // DW_AT_ranges appropriately.
     TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4,
-                   DebugRangeSymbols.size() 
-                   * Asm->getTargetData().getPointerSize());
+                   DebugRangeSymbols.size()
+                   * Asm->getDataLayout().getPointerSize());
     for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(),
          RE = Ranges.end(); RI != RE; ++RI) {
       DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first));
@@ -406,9 +443,8 @@ DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU,
   return ScopeDIE;
 }
 
-/// constructInlinedScopeDIE - This scope represents inlined body of
-/// a function. Construct DIE to represent this concrete inlined copy
-/// of the function.
+// This scope represents inlined body of a function. Construct DIE to
+// represent this concrete inlined copy of the function.
 DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
                                           LexicalScope *Scope) {
   const SmallVector<InsnRange, 4> &Ranges = Scope->getRanges();
@@ -421,7 +457,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
   DISubprogram InlinedSP = getDISubprogram(DS);
   DIE *OriginDIE = TheCU->getDIE(InlinedSP);
   if (!OriginDIE) {
-    DEBUG(dbgs() << "Unable to find original DIE for inlined subprogram.");
+    DEBUG(dbgs() << "Unable to find original DIE for an inlined subprogram.");
     return NULL;
   }
 
@@ -430,7 +466,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
   const MCSymbol *EndLabel = getLabelAfterInsn(RI->second);
 
   if (StartLabel == 0 || EndLabel == 0) {
-    llvm_unreachable("Unexpected Start and End labels for a inlined scope!");
+    llvm_unreachable("Unexpected Start and End labels for an inlined scope!");
   }
   assert(StartLabel->isDefined() &&
          "Invalid starting label for an inlined scope!");
@@ -446,8 +482,8 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
     // .debug_range as a uint, size 4, for now. emitDIE will handle
     // DW_AT_ranges appropriately.
     TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4,
-                   DebugRangeSymbols.size() 
-                   * Asm->getTargetData().getPointerSize());
+                   DebugRangeSymbols.size()
+                   * Asm->getDataLayout().getPointerSize());
     for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(),
          RE = Ranges.end(); RI != RE; ++RI) {
       DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first));
@@ -456,9 +492,9 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
     DebugRangeSymbols.push_back(NULL);
     DebugRangeSymbols.push_back(NULL);
   } else {
-    TheCU->addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 
+    TheCU->addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
                     StartLabel);
-    TheCU->addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, 
+    TheCU->addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
                     EndLabel);
   }
 
@@ -480,37 +516,42 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
 
   DILocation DL(Scope->getInlinedAt());
   TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0,
-                 GetOrCreateSourceID(DL.getFilename(), DL.getDirectory()));
+                 getOrCreateSourceID(DL.getFilename(), DL.getDirectory()));
   TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber());
 
   // Add name to the name table, we do this here because we're guaranteed
   // to have concrete versions of our DW_TAG_inlined_subprogram nodes.
   addSubprogramNames(TheCU, InlinedSP, ScopeDIE);
-  
+
   return ScopeDIE;
 }
 
-/// constructScopeDIE - Construct a DIE for this scope.
+// Construct a DIE for this scope.
 DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
   if (!Scope || !Scope->getScopeNode())
     return NULL;
 
   SmallVector<DIE *, 8> Children;
+  DIE *ObjectPointer = NULL;
 
   // Collect arguments for current function.
   if (LScopes.isCurrentFunctionScope(Scope))
     for (unsigned i = 0, N = CurrentFnArguments.size(); i < N; ++i)
       if (DbgVariable *ArgDV = CurrentFnArguments[i])
-        if (DIE *Arg = 
-            TheCU->constructVariableDIE(ArgDV, Scope->isAbstractScope()))
+        if (DIE *Arg =
+            TheCU->constructVariableDIE(ArgDV, Scope->isAbstractScope())) {
           Children.push_back(Arg);
+          if (ArgDV->isObjectPointer()) ObjectPointer = Arg;
+        }
 
   // Collect lexical scope children first.
   const SmallVector<DbgVariable *, 8> &Variables = ScopeVariables.lookup(Scope);
   for (unsigned i = 0, N = Variables.size(); i < N; ++i)
-    if (DIE *Variable = 
-        TheCU->constructVariableDIE(Variables[i], Scope->isAbstractScope()))
+    if (DIE *Variable =
+        TheCU->constructVariableDIE(Variables[i], Scope->isAbstractScope())) {
       Children.push_back(Variable);
+      if (Variables[i]->isObjectPointer()) ObjectPointer = Variable;
+    }
   const SmallVector<LexicalScope *, 4> &Scopes = Scope->getChildren();
   for (unsigned j = 0, M = Scopes.size(); j < M; ++j)
     if (DIE *Nested = constructScopeDIE(TheCU, Scopes[j]))
@@ -536,7 +577,7 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
       return NULL;
     ScopeDIE = constructLexicalScopeDIE(TheCU, Scope);
   }
-  
+
   if (!ScopeDIE) return NULL;
 
   // Add children
@@ -544,21 +585,25 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
          E = Children.end(); I != E; ++I)
     ScopeDIE->addChild(*I);
 
+  if (DS.isSubprogram() && ObjectPointer != NULL)
+    TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer,
+                       dwarf::DW_FORM_ref4, ObjectPointer);
+
   if (DS.isSubprogram())
     TheCU->addPubTypes(DISubprogram(DS));
 
   return ScopeDIE;
 }
 
-/// GetOrCreateSourceID - Look up the source id with the given directory and
-/// source file names. If none currently exists, create a new id and insert it
-/// in the SourceIds map. This can update DirectoryNames and SourceFileNames
-/// maps as well.
-unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName, 
+// Look up the source id with the given directory and source file names.
+// If none currently exists, create a new id and insert it in the
+// SourceIds map. This can update DirectoryNames and SourceFileNames maps
+// as well.
+unsigned DwarfDebug::getOrCreateSourceID(StringRef FileName,
                                          StringRef DirName) {
   // If FE did not provide a file name, then assume stdin.
   if (FileName.empty())
-    return GetOrCreateSourceID("<stdin>", StringRef());
+    return getOrCreateSourceID("<stdin>", StringRef());
 
   // TODO: this might not belong here. See if we can factor this better.
   if (DirName == CompilationDir)
@@ -582,16 +627,20 @@ unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName,
   return SrcId;
 }
 
-/// constructCompileUnit - Create new CompileUnit for the given
-/// metadata node with tag DW_TAG_compile_unit.
+// Create new CompileUnit for the given metadata node with tag
+// DW_TAG_compile_unit.
 CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
   DICompileUnit DIUnit(N);
   StringRef FN = DIUnit.getFilename();
   CompilationDir = DIUnit.getDirectory();
-  unsigned ID = GetOrCreateSourceID(FN, CompilationDir);
+  // Call this to emit a .file directive if it wasn't emitted for the source
+  // file this CU comes from yet.
+  getOrCreateSourceID(FN, CompilationDir);
 
   DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
-  CompileUnit *NewCU = new CompileUnit(ID, DIUnit.getLanguage(), Die, Asm, this);
+  CompileUnit *NewCU = new CompileUnit(GlobalCUIndexCount++,
+                                       DIUnit.getLanguage(), Die, Asm,
+                                       this, &InfoHolder);
   NewCU->addString(Die, dwarf::DW_AT_producer, DIUnit.getProducer());
   NewCU->addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
                  DIUnit.getLanguage());
@@ -615,19 +664,24 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
   StringRef Flags = DIUnit.getFlags();
   if (!Flags.empty())
     NewCU->addString(Die, dwarf::DW_AT_APPLE_flags, Flags);
-  
+
   if (unsigned RVer = DIUnit.getRunTimeVersion())
     NewCU->addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers,
             dwarf::DW_FORM_data1, RVer);
 
   if (!FirstCU)
     FirstCU = NewCU;
+  if (useSplitDwarf() && !SkeletonCU)
+    SkeletonCU = constructSkeletonCU(N);
+
+  InfoHolder.addUnit(NewCU);
+
   CUMap.insert(std::make_pair(N, NewCU));
   return NewCU;
 }
 
-/// construct SubprogramDIE - Construct subprogram DIE.
-void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, 
+// Construct subprogram DIE.
+void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU,
                                         const MDNode *N) {
   CompileUnit *&CURef = SPMap[N];
   if (CURef)
@@ -651,30 +705,29 @@ void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU,
   return;
 }
 
-/// collectInfoFromNamedMDNodes - Collect debug info from named mdnodes such
-/// as llvm.dbg.enum and llvm.dbg.ty
-void DwarfDebug::collectInfoFromNamedMDNodes(Module *M) {
+// Collect debug info from named mdnodes such as llvm.dbg.enum and llvm.dbg.ty.
+void DwarfDebug::collectInfoFromNamedMDNodes(const Module *M) {
   if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.sp"))
     for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
       const MDNode *N = NMD->getOperand(i);
       if (CompileUnit *CU = CUMap.lookup(DISubprogram(N).getCompileUnit()))
         constructSubprogramDIE(CU, N);
     }
-  
+
   if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.gv"))
     for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
       const MDNode *N = NMD->getOperand(i);
       if (CompileUnit *CU = CUMap.lookup(DIGlobalVariable(N).getCompileUnit()))
         CU->createGlobalVariableDIE(N);
     }
-  
+
   if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.enum"))
     for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
       DIType Ty(NMD->getOperand(i));
       if (CompileUnit *CU = CUMap.lookup(Ty.getCompileUnit()))
         CU->getOrCreateTypeDIE(Ty);
     }
-  
+
   if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.ty"))
     for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
       DIType Ty(NMD->getOperand(i));
@@ -683,12 +736,12 @@ void DwarfDebug::collectInfoFromNamedMDNodes(Module *M) {
     }
 }
 
-/// collectLegacyDebugInfo - Collect debug info using DebugInfoFinder.
-/// FIXME - Remove this when dragon-egg and llvm-gcc switch to DIBuilder.
-bool DwarfDebug::collectLegacyDebugInfo(Module *M) {
+// Collect debug info using DebugInfoFinder.
+// FIXME - Remove this when dragonegg switches to DIBuilder.
+bool DwarfDebug::collectLegacyDebugInfo(const Module *M) {
   DebugInfoFinder DbgFinder;
   DbgFinder.processModule(*M);
-  
+
   bool HasDebugInfo = false;
   // Scan all the compile-units to see if there are any marked as the main
   // unit. If not, we do not generate debug info.
@@ -700,12 +753,12 @@ bool DwarfDebug::collectLegacyDebugInfo(Module *M) {
     }
   }
   if (!HasDebugInfo) return false;
-  
+
   // Create all the compile unit DIEs.
   for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
          E = DbgFinder.compile_unit_end(); I != E; ++I)
     constructCompileUnit(*I);
-  
+
   // Create DIEs for each global variable.
   for (DebugInfoFinder::iterator I = DbgFinder.global_variable_begin(),
          E = DbgFinder.global_variable_end(); I != E; ++I) {
@@ -713,7 +766,7 @@ bool DwarfDebug::collectLegacyDebugInfo(Module *M) {
     if (CompileUnit *CU = CUMap.lookup(DIGlobalVariable(N).getCompileUnit()))
       CU->createGlobalVariableDIE(N);
   }
-    
+
   // Create DIEs for each subprogram.
   for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(),
          E = DbgFinder.subprogram_end(); I != E; ++I) {
@@ -725,13 +778,15 @@ bool DwarfDebug::collectLegacyDebugInfo(Module *M) {
   return HasDebugInfo;
 }
 
-/// beginModule - Emit all Dwarf sections that should come prior to the
-/// content. Create global DIEs and emit initial debug info sections.
-/// This is invoked by the target AsmPrinter.
-void DwarfDebug::beginModule(Module *M) {
+// Emit all Dwarf sections that should come prior to the content. Create
+// global DIEs and emit initial debug info sections. This is invoked by
+// the target AsmPrinter.
+void DwarfDebug::beginModule() {
   if (DisableDebugInfoPrinting)
     return;
 
+  const Module *M = MMI->getModule();
+
   // If module has named metadata anchors then use them, otherwise scan the
   // module using debug info finder to collect debug info.
   NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
@@ -756,25 +811,36 @@ void DwarfDebug::beginModule(Module *M) {
     return;
 
   collectInfoFromNamedMDNodes(M);
-  
+
   // Tell MMI that we have debug info.
   MMI->setDebugInfoAvailability(true);
-  
-  // Emit initial sections.
-  EmitSectionLabels();
 
   // Prime section data.
   SectionMap.insert(Asm->getObjFileLowering().getTextSection());
 }
 
-/// endModule - Emit all Dwarf sections that should come after the content.
-///
-void DwarfDebug::endModule() {
-  if (!FirstCU) return;
+// Attach DW_AT_inline attribute with inlined subprogram DIEs.
+void DwarfDebug::computeInlinedDIEs() {
+  // Attach DW_AT_inline attribute with inlined subprogram DIEs.
+  for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(),
+         AE = InlinedSubprogramDIEs.end(); AI != AE; ++AI) {
+    DIE *ISP = *AI;
+    FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
+  }
+  for (DenseMap<const MDNode *, DIE *>::iterator AI = AbstractSPDies.begin(),
+         AE = AbstractSPDies.end(); AI != AE; ++AI) {
+    DIE *ISP = AI->second;
+    if (InlinedSubprogramDIEs.count(ISP))
+      continue;
+    FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
+  }
+}
+
+// Collect info for variables that were optimized out.
+void DwarfDebug::collectDeadVariables() {
   const Module *M = MMI->getModule();
   DenseMap<const MDNode *, LexicalScope *> DeadFnScopeMap;
 
-  // Collect info for variables that were optimized out.
   if (NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu")) {
     for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
       DICompileUnit TheCU(CU_Nodes->getOperand(i));
@@ -787,10 +853,10 @@ void DwarfDebug::endModule() {
         DIArray Variables = SP.getVariables();
         if (Variables.getNumElements() == 0) continue;
 
-        LexicalScope *Scope = 
+        LexicalScope *Scope =
           new LexicalScope(NULL, DIDescriptor(SP), NULL, false);
         DeadFnScopeMap[SP] = Scope;
-        
+
         // Construct subprogram DIE and add variables DIEs.
         CompileUnit *SPCU = CUMap.lookup(TheCU);
         assert(SPCU && "Unable to find Compile Unit!");
@@ -800,27 +866,22 @@ void DwarfDebug::endModule() {
           DIVariable DV(Variables.getElement(vi));
           if (!DV.Verify()) continue;
           DbgVariable *NewVar = new DbgVariable(DV, NULL);
-          if (DIE *VariableDIE = 
+          if (DIE *VariableDIE =
               SPCU->constructVariableDIE(NewVar, Scope->isAbstractScope()))
             ScopeDIE->addChild(VariableDIE);
         }
       }
     }
   }
+  DeleteContainerSeconds(DeadFnScopeMap);
+}
+
+void DwarfDebug::finalizeModuleInfo() {
+  // Collect info for variables that were optimized out.
+  collectDeadVariables();
 
   // Attach DW_AT_inline attribute with inlined subprogram DIEs.
-  for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(),
-         AE = InlinedSubprogramDIEs.end(); AI != AE; ++AI) {
-    DIE *ISP = *AI;
-    FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
-  }
-  for (DenseMap<const MDNode *, DIE *>::iterator AI = AbstractSPDies.begin(),
-         AE = AbstractSPDies.end(); AI != AE; ++AI) {
-    DIE *ISP = AI->second;
-    if (InlinedSubprogramDIEs.count(ISP))
-      continue;
-    FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
-  }
+  computeInlinedDIEs();
 
   // Emit DW_AT_containing_type attribute to connect types with their
   // vtable holding type.
@@ -830,6 +891,13 @@ void DwarfDebug::endModule() {
     TheCU->constructContainingTypeDIEs();
   }
 
+   // Compute DIE offsets and sizes.
+  InfoHolder.computeSizeAndOffsets();
+  if (useSplitDwarf())
+    SkeletonHolder.computeSizeAndOffsets();
+}
+
+void DwarfDebug::endSections() {
   // Standard sections final addresses.
   Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getTextSection());
   Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("text_end"));
@@ -837,19 +905,83 @@ void DwarfDebug::endModule() {
   Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("data_end"));
 
   // End text sections.
-  for (unsigned i = 1, N = SectionMap.size(); i <= N; ++i) {
-    Asm->OutStreamer.SwitchSection(SectionMap[i]);
-    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_end", i));
+  for (unsigned I = 0, E = SectionMap.size(); I != E; ++I) {
+    Asm->OutStreamer.SwitchSection(SectionMap[I]);
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_end", I+1));
   }
+}
+
+// Emit all Dwarf sections that should come after the content.
+void DwarfDebug::endModule() {
+
+  if (!FirstCU) return;
+
+  // End any existing sections.
+  // TODO: Does this need to happen?
+  endSections();
+
+  // Finalize the debug info for the module.
+  finalizeModuleInfo();
+
+  // Emit initial sections.
+  emitSectionLabels();
+
+  if (!useSplitDwarf()) {
+    // Emit all the DIEs into a debug info section.
+    emitDebugInfo();
+
+    // Corresponding abbreviations into a abbrev section.
+    emitAbbreviations();
+
+    // Emit info into a debug loc section.
+    emitDebugLoc();
+
+    // Emit info into a debug aranges section.
+    emitDebugARanges();
 
-  // Compute DIE offsets and sizes.
-  computeSizeAndOffsets();
+    // Emit info into a debug ranges section.
+    emitDebugRanges();
 
-  // Emit all the DIEs into a debug info section
-  emitDebugInfo();
+    // Emit info into a debug macinfo section.
+    emitDebugMacInfo();
 
-  // Corresponding abbreviations into a abbrev section.
-  emitAbbreviations();
+    // Emit inline info.
+    // TODO: When we don't need the option anymore we
+    // can remove all of the code that this section
+    // depends upon.
+    if (useDarwinGDBCompat())
+      emitDebugInlineInfo();
+  } else {
+    // TODO: Fill this in for separated debug sections and separate
+    // out information into new sections.
+
+    // Emit the debug info section and compile units.
+    emitDebugInfo();
+    emitDebugInfoDWO();
+
+    // Corresponding abbreviations into a abbrev section.
+    emitAbbreviations();
+    emitDebugAbbrevDWO();
+
+    // Emit info into a debug loc section.
+    emitDebugLoc();
+
+    // Emit info into a debug aranges section.
+    emitDebugARanges();
+
+    // Emit info into a debug ranges section.
+    emitDebugRanges();
+
+    // Emit info into a debug macinfo section.
+    emitDebugMacInfo();
+
+    // Emit inline info.
+    // TODO: When we don't need the option anymore we
+    // can remove all of the code that this section
+    // depends upon.
+    if (useDarwinGDBCompat())
+      emitDebugInlineInfo();
+  }
 
   // Emit info into the dwarf accelerator table sections.
   if (useDwarfAccelTables()) {
@@ -858,45 +990,32 @@ void DwarfDebug::endModule() {
     emitAccelNamespaces();
     emitAccelTypes();
   }
-  
+
   // Emit info into a debug pubtypes section.
   // TODO: When we don't need the option anymore we can
   // remove all of the code that adds to the table.
   if (useDarwinGDBCompat())
     emitDebugPubTypes();
 
-  // Emit info into a debug loc section.
-  emitDebugLoc();
-
-  // Emit info into a debug aranges section.
-  EmitDebugARanges();
-
-  // Emit info into a debug ranges section.
-  emitDebugRanges();
-
-  // Emit info into a debug macinfo section.
-  emitDebugMacInfo();
-
-  // Emit inline info.
-  // TODO: When we don't need the option anymore we
-  // can remove all of the code that this section
-  // depends upon.
-  if (useDarwinGDBCompat())
-    emitDebugInlineInfo();
-
-  // Emit info into a debug str section.
+  // Finally emit string information into a string table.
   emitDebugStr();
+  if (useSplitDwarf())
+    emitDebugStrDWO();
 
   // clean up.
-  DeleteContainerSeconds(DeadFnScopeMap);
   SPMap.clear();
   for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
          E = CUMap.end(); I != E; ++I)
     delete I->second;
-  FirstCU = NULL;  // Reset for the next Module, if any.
+
+  delete SkeletonCU;
+
+  // Reset these for the next Module if we have one.
+  FirstCU = NULL;
+  SkeletonCU = NULL;
 }
 
-/// findAbstractVariable - Find abstract variable, if any, associated with Var.
+// Find abstract variable, if any, associated with Var.
 DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &DV,
                                               DebugLoc ScopeLoc) {
   LLVMContext &Ctx = DV->getContext();
@@ -916,8 +1035,7 @@ DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &DV,
   return AbsDbgVariable;
 }
 
-/// addCurrentFnArgument - If Var is a current function argument then add
-/// it to CurrentFnArguments list.
+// If Var is a current function argument then add it to CurrentFnArguments list.
 bool DwarfDebug::addCurrentFnArgument(const MachineFunction *MF,
                                       DbgVariable *Var, LexicalScope *Scope) {
   if (!LScopes.isCurrentFunctionScope(Scope))
@@ -926,7 +1044,7 @@ bool DwarfDebug::addCurrentFnArgument(const MachineFunction *MF,
   if (DV.getTag() != dwarf::DW_TAG_arg_variable)
     return false;
   unsigned ArgNo = DV.getArgNumber();
-  if (ArgNo == 0) 
+  if (ArgNo == 0)
     return false;
 
   size_t Size = CurrentFnArguments.size();
@@ -940,8 +1058,7 @@ bool DwarfDebug::addCurrentFnArgument(const MachineFunction *MF,
   return true;
 }
 
-/// collectVariableInfoFromMMITable - Collect variable information from
-/// side table maintained by MMI.
+// Collect variable information from side table maintained by MMI.
 void
 DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction *MF,
                                    SmallPtrSet<const MDNode *, 16> &Processed) {
@@ -970,8 +1087,8 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction *MF,
   }
 }
 
-/// isDbgValueInDefinedReg - Return true if debug value, encoded by
-/// DBG_VALUE instruction, is in a defined reg.
+// Return true if debug value, encoded by DBG_VALUE instruction, is in a
+// defined reg.
 static bool isDbgValueInDefinedReg(const MachineInstr *MI) {
   assert(MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!");
   return MI->getNumOperands() == 3 &&
@@ -979,10 +1096,9 @@ static bool isDbgValueInDefinedReg(const MachineInstr *MI) {
          MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0;
 }
 
-/// getDebugLocEntry - Get .debug_loc entry for the instruction range starting
-/// at MI.
-static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm, 
-                                         const MCSymbol *FLabel, 
+// Get .debug_loc entry for the instruction range starting at MI.
+static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm,
+                                         const MCSymbol *FLabel,
                                          const MCSymbol *SLabel,
                                          const MachineInstr *MI) {
   const MDNode *Var =  MI->getOperand(MI->getNumOperands() - 1).getMetadata();
@@ -1006,12 +1122,12 @@ static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm,
   llvm_unreachable("Unexpected 3 operand DBG_VALUE instruction!");
 }
 
-/// collectVariableInfo - Find variables for each lexical scope.
+// Find variables for each lexical scope.
 void
 DwarfDebug::collectVariableInfo(const MachineFunction *MF,
                                 SmallPtrSet<const MDNode *, 16> &Processed) {
 
-  /// collection info from MMI table.
+  // collection info from MMI table.
   collectVariableInfoFromMMITable(MF, Processed);
 
   for (SmallVectorImpl<const MDNode*>::const_iterator
@@ -1056,7 +1172,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
     if (AbsVar)
       AbsVar->setMInsn(MInsn);
 
-    // Simple ranges that are fully coalesced.
+    // Simplify ranges that are fully coalesced.
     if (History.size() <= 1 || (History.size() == 2 &&
                                 MInsn->isIdenticalTo(History.back()))) {
       RegVar->setMInsn(MInsn);
@@ -1086,7 +1202,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
         SLabel = FunctionEndSym;
       else {
         const MachineInstr *End = HI[1];
-        DEBUG(dbgs() << "DotDebugLoc Pair:\n" 
+        DEBUG(dbgs() << "DotDebugLoc Pair:\n"
               << "\t" << *Begin << "\t" << *End << "\n");
         if (End->isDebugValue())
           SLabel = getLabelBeforeInsn(End);
@@ -1117,19 +1233,19 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
   }
 }
 
-/// getLabelBeforeInsn - Return Label preceding the instruction.
+// Return Label preceding the instruction.
 const MCSymbol *DwarfDebug::getLabelBeforeInsn(const MachineInstr *MI) {
   MCSymbol *Label = LabelsBeforeInsn.lookup(MI);
   assert(Label && "Didn't insert label before instruction");
   return Label;
 }
 
-/// getLabelAfterInsn - Return Label immediately following the instruction.
+// Return Label immediately following the instruction.
 const MCSymbol *DwarfDebug::getLabelAfterInsn(const MachineInstr *MI) {
   return LabelsAfterInsn.lookup(MI);
 }
 
-/// beginInstruction - Process beginning of an instruction.
+// Process beginning of an instruction.
 void DwarfDebug::beginInstruction(const MachineInstr *MI) {
   // Check if source location changes, but ignore DBG_VALUE locations.
   if (!MI->isDebugValue()) {
@@ -1171,7 +1287,7 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
   I->second = PrevLabel;
 }
 
-/// endInstruction - Process end of an instruction.
+// Process end of an instruction.
 void DwarfDebug::endInstruction(const MachineInstr *MI) {
   // Don't create a new label after DBG_VALUE instructions.
   // They don't generate code.
@@ -1197,11 +1313,10 @@ void DwarfDebug::endInstruction(const MachineInstr *MI) {
   I->second = PrevLabel;
 }
 
-/// identifyScopeMarkers() -
-/// Each LexicalScope has first instruction and last instruction to mark
-/// beginning and end of a scope respectively. Create an inverse map that list
-/// scopes starts (and ends) with an instruction. One instruction may start (or
-/// end) multiple scopes. Ignore scopes that are not reachable.
+// Each LexicalScope has first instruction and last instruction to mark
+// beginning and end of a scope respectively. Create an inverse map that list
+// scopes starts (and ends) with an instruction. One instruction may start (or
+// end) multiple scopes. Ignore scopes that are not reachable.
 void DwarfDebug::identifyScopeMarkers() {
   SmallVector<LexicalScope *, 4> WorkList;
   WorkList.push_back(LScopes.getCurrentFunctionScope());
@@ -1230,15 +1345,15 @@ void DwarfDebug::identifyScopeMarkers() {
   }
 }
 
-/// getScopeNode - Get MDNode for DebugLoc's scope.
+// Get MDNode for DebugLoc's scope.
 static MDNode *getScopeNode(DebugLoc DL, const LLVMContext &Ctx) {
   if (MDNode *InlinedAt = DL.getInlinedAt(Ctx))
     return getScopeNode(DebugLoc::getFromDILocation(InlinedAt), Ctx);
   return DL.getScope(Ctx);
 }
 
-/// getFnDebugLoc - Walk up the scope chain of given debug loc and find
-/// line number info for the function.
+// Walk up the scope chain of given debug loc and find line number info
+// for the function.
 static DebugLoc getFnDebugLoc(DebugLoc DL, const LLVMContext &Ctx) {
   const MDNode *Scope = getScopeNode(DL, Ctx);
   DISubprogram SP = getDISubprogram(Scope);
@@ -1254,8 +1369,8 @@ static DebugLoc getFnDebugLoc(DebugLoc DL, const LLVMContext &Ctx) {
   return DebugLoc();
 }
 
-/// beginFunction - Gather pre-function debug information.  Assumes being
-/// emitted immediately after the function entry point.
+// Gather pre-function debug information.  Assumes being called immediately
+// after the function entry point has been emitted.
 void DwarfDebug::beginFunction(const MachineFunction *MF) {
   if (!MMI->hasDebugInfo()) return;
   LScopes.initialize(*MF);
@@ -1270,7 +1385,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
   assert(UserVariables.empty() && DbgValues.empty() && "Maps weren't cleaned");
 
   const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo();
-  /// LiveUserVar - Map physreg numbers to the MDNode they contain.
+  // LiveUserVar - Map physreg numbers to the MDNode they contain.
   std::vector<const MDNode*> LiveUserVar(TRI->getNumRegs());
 
   for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
@@ -1309,8 +1424,8 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
             // Coalesce identical entries at the end of History.
             if (History.size() >= 2 &&
                 Prev->isIdenticalTo(History[History.size() - 2])) {
-              DEBUG(dbgs() << "Coalesce identical DBG_VALUE entries:\n"
-                    << "\t" << *Prev 
+              DEBUG(dbgs() << "Coalescing identical DBG_VALUE entries:\n"
+                    << "\t" << *Prev
                     << "\t" << *History[History.size() - 2] << "\n");
               History.pop_back();
             }
@@ -1325,7 +1440,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
                 PrevMBB->getLastNonDebugInstr();
               if (LastMI == PrevMBB->end()) {
                 // Drop DBG_VALUE for empty range.
-                DEBUG(dbgs() << "Drop DBG_VALUE for empty range:\n"
+                DEBUG(dbgs() << "Dropping DBG_VALUE for empty range:\n"
                       << "\t" << *Prev << "\n");
                 History.pop_back();
               }
@@ -1342,9 +1457,10 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
         if (!MI->isLabel())
           AtBlockEntry = false;
 
-        // First known non DBG_VALUE location marks beginning of function
-        // body.
-        if (PrologEndLoc.isUnknown() && !MI->getDebugLoc().isUnknown())
+        // First known non-DBG_VALUE and non-frame setup location marks
+        // the beginning of the function body.
+        if (!MI->getFlag(MachineInstr::FrameSetup) &&
+            (PrologEndLoc.isUnknown() && !MI->getDebugLoc().isUnknown()))
           PrologEndLoc = MI->getDebugLoc();
 
         // Check if the instruction clobbers any registers with debug vars.
@@ -1395,7 +1511,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
     const MachineInstr *Prev = History.back();
     if (Prev->isDebugValue() && isDbgValueInDefinedReg(Prev)) {
       const MachineBasicBlock *PrevMBB = Prev->getParent();
-      MachineBasicBlock::const_iterator LastMI = 
+      MachineBasicBlock::const_iterator LastMI =
         PrevMBB->getLastNonDebugInstr();
       if (LastMI == PrevMBB->end())
         // Drop DBG_VALUE for empty range.
@@ -1424,7 +1540,9 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
                                        MF->getFunction()->getContext());
     recordSourceLine(FnStartDL.getLine(), FnStartDL.getCol(),
                      FnStartDL.getScope(MF->getFunction()->getContext()),
-                     DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0);
+    // We'd like to list the prologue as "not statements" but GDB behaves
+    // poorly if we do that. Revisit this with caution/GDB (7.5+) testing.
+                     DWARF2_FLAG_IS_STMT);
   }
 }
 
@@ -1434,8 +1552,7 @@ void DwarfDebug::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {
 //  Vars.push_back(Var);
 }
 
-/// endFunction - Gather and emit post-function debug information.
-///
+// Gather and emit post-function debug information.
 void DwarfDebug::endFunction(const MachineFunction *MF) {
   if (!MMI->hasDebugInfo() || LScopes.empty()) return;
 
@@ -1444,10 +1561,10 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
                                       Asm->getFunctionNumber());
   // Assumes in correct section after the entry point.
   Asm->OutStreamer.EmitLabel(FunctionEndSym);
-  
+
   SmallPtrSet<const MDNode *, 16> ProcessedVars;
   collectVariableInfo(MF, ProcessedVars);
-  
+
   LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
   CompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode());
   assert(TheCU && "Unable to find compile unit!");
@@ -1477,9 +1594,9 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
     if (ProcessedSPNodes.count(AScope->getScopeNode()) == 0)
       constructScopeDIE(TheCU, AScope);
   }
-  
+
   DIE *CurFnDIE = constructScopeDIE(TheCU, FnScope);
-  
+
   if (!MF->getTarget().Options.DisableFramePointerElim(*MF))
     TheCU->addFlag(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr);
 
@@ -1500,9 +1617,8 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
   PrevLabel = NULL;
 }
 
-/// recordSourceLine - Register a source line with debug info. Returns the
-/// unique label that was emitted and which provides correspondence to
-/// the source line list.
+// Register a source line with debug info. Returns the  unique label that was
+// emitted and which provides correspondence to the source line list.
 void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
                                   unsigned Flags) {
   StringRef Fn;
@@ -1534,7 +1650,7 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
     } else
       llvm_unreachable("Unexpected scope info");
 
-    Src = GetOrCreateSourceID(Fn, Dir);
+    Src = getOrCreateSourceID(Fn, Dir);
   }
   Asm->OutStreamer.EmitDwarfLocDirective(Src, Line, Col, Flags, 0, 0, Fn);
 }
@@ -1543,10 +1659,9 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
 // Emit Methods
 //===----------------------------------------------------------------------===//
 
-/// computeSizeAndOffset - Compute the size and offset of a DIE.
-///
+// Compute the size and offset of a DIE.
 unsigned
-DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last) {
+DwarfUnits::computeSizeAndOffset(DIE *Die, unsigned Offset) {
   // Get the children.
   const std::vector<DIE *> &Children = Die->getChildren();
 
@@ -1555,7 +1670,7 @@ DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last) {
 
   // Get the abbreviation for this DIE.
   unsigned AbbrevNumber = Die->getAbbrevNumber();
-  const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1];
+  const DIEAbbrev *Abbrev = Abbreviations->at(AbbrevNumber - 1);
 
   // Set DIE offset
   Die->setOffset(Offset);
@@ -1577,7 +1692,7 @@ DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last) {
            "Children flag not set");
 
     for (unsigned j = 0, M = Children.size(); j < M; ++j)
-      Offset = computeSizeAndOffset(Children[j], Offset, (j + 1) == M);
+      Offset = computeSizeAndOffset(Children[j], Offset);
 
     // End of children marker.
     Offset += sizeof(int8_t);
@@ -1587,57 +1702,61 @@ DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last) {
   return Offset;
 }
 
-/// computeSizeAndOffsets - Compute the size and offset of all the DIEs.
-///
-void DwarfDebug::computeSizeAndOffsets() {
-  for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
-         E = CUMap.end(); I != E; ++I) {
-    // Compute size of compile unit header.
-    unsigned Offset = 
+// Compute the size and offset of all the DIEs.
+void DwarfUnits::computeSizeAndOffsets() {
+  for (SmallVector<CompileUnit *, 1>::iterator I = CUs.begin(),
+         E = CUs.end(); I != E; ++I) {
+    unsigned Offset =
       sizeof(int32_t) + // Length of Compilation Unit Info
       sizeof(int16_t) + // DWARF version number
       sizeof(int32_t) + // Offset Into Abbrev. Section
       sizeof(int8_t);   // Pointer Size (in bytes)
-    computeSizeAndOffset(I->second->getCUDie(), Offset, true);
+
+    computeSizeAndOffset((*I)->getCUDie(), Offset);
   }
 }
 
-/// EmitSectionLabels - Emit initial Dwarf sections with a label at
-/// the start of each one.
-void DwarfDebug::EmitSectionLabels() {
+// Emit initial Dwarf sections with a label at the start of each one.
+void DwarfDebug::emitSectionLabels() {
   const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
 
   // Dwarf sections base addresses.
   DwarfInfoSectionSym =
-    EmitSectionSym(Asm, TLOF.getDwarfInfoSection(), "section_info");
+    emitSectionSym(Asm, TLOF.getDwarfInfoSection(), "section_info");
   DwarfAbbrevSectionSym =
-    EmitSectionSym(Asm, TLOF.getDwarfAbbrevSection(), "section_abbrev");
-  EmitSectionSym(Asm, TLOF.getDwarfARangesSection());
+    emitSectionSym(Asm, TLOF.getDwarfAbbrevSection(), "section_abbrev");
+  if (useSplitDwarf())
+    DwarfAbbrevDWOSectionSym =
+      emitSectionSym(Asm, TLOF.getDwarfAbbrevDWOSection(),
+                     "section_abbrev_dwo");
+  emitSectionSym(Asm, TLOF.getDwarfARangesSection());
 
   if (const MCSection *MacroInfo = TLOF.getDwarfMacroInfoSection())
-    EmitSectionSym(Asm, MacroInfo);
+    emitSectionSym(Asm, MacroInfo);
 
-  EmitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line");
-  EmitSectionSym(Asm, TLOF.getDwarfLocSection());
-  EmitSectionSym(Asm, TLOF.getDwarfPubTypesSection());
+  emitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line");
+  emitSectionSym(Asm, TLOF.getDwarfLocSection());
+  emitSectionSym(Asm, TLOF.getDwarfPubTypesSection());
   DwarfStrSectionSym =
-    EmitSectionSym(Asm, TLOF.getDwarfStrSection(), "section_str");
-  DwarfDebugRangeSectionSym = EmitSectionSym(Asm, TLOF.getDwarfRangesSection(),
+    emitSectionSym(Asm, TLOF.getDwarfStrSection(), "info_string");
+  if (useSplitDwarf())
+    DwarfStrDWOSectionSym =
+      emitSectionSym(Asm, TLOF.getDwarfStrDWOSection(), "skel_string");
+  DwarfDebugRangeSectionSym = emitSectionSym(Asm, TLOF.getDwarfRangesSection(),
                                              "debug_range");
 
-  DwarfDebugLocSectionSym = EmitSectionSym(Asm, TLOF.getDwarfLocSection(),
+  DwarfDebugLocSectionSym = emitSectionSym(Asm, TLOF.getDwarfLocSection(),
                                            "section_debug_loc");
 
-  TextSectionSym = EmitSectionSym(Asm, TLOF.getTextSection(), "text_begin");
-  EmitSectionSym(Asm, TLOF.getDataSection());
+  TextSectionSym = emitSectionSym(Asm, TLOF.getTextSection(), "text_begin");
+  emitSectionSym(Asm, TLOF.getDataSection());
 }
 
-/// emitDIE - Recursively emits a debug information entry.
-///
-void DwarfDebug::emitDIE(DIE *Die) {
+// Recursively emits a debug information entry.
+void DwarfDebug::emitDIE(DIE *Die, std::vector<DIEAbbrev *> *Abbrevs) {
   // Get the abbreviation for this DIE.
   unsigned AbbrevNumber = Die->getAbbrevNumber();
-  const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1];
+  const DIEAbbrev *Abbrev = Abbrevs->at(AbbrevNumber - 1);
 
   // Emit the code (index) for the abbreviation.
   if (Asm->isVerbose())
@@ -1714,7 +1833,7 @@ void DwarfDebug::emitDIE(DIE *Die) {
     const std::vector<DIE *> &Children = Die->getChildren();
 
     for (unsigned j = 0, M = Children.size(); j < M; ++j)
-      emitDIE(Children[j]);
+      emitDIE(Children[j], Abbrevs);
 
     if (Asm->isVerbose())
       Asm->OutStreamer.AddComment("End Of Children Mark");
@@ -1722,20 +1841,22 @@ void DwarfDebug::emitDIE(DIE *Die) {
   }
 }
 
-/// emitDebugInfo - Emit the debug info section.
-///
-void DwarfDebug::emitDebugInfo() {
-  // Start debug info section.
-  Asm->OutStreamer.SwitchSection(
-                            Asm->getObjFileLowering().getDwarfInfoSection());
-  for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
-         E = CUMap.end(); I != E; ++I) {
-    CompileUnit *TheCU = I->second;
+// Emit the various dwarf units to the unit section USection with
+// the abbreviations going into ASection.
+void DwarfUnits::emitUnits(DwarfDebug *DD,
+                           const MCSection *USection,
+                           const MCSection *ASection,
+                           const MCSymbol *ASectionSym) {
+  Asm->OutStreamer.SwitchSection(USection);
+  for (SmallVector<CompileUnit *, 1>::iterator I = CUs.begin(),
+         E = CUs.end(); I != E; ++I) {
+    CompileUnit *TheCU = *I;
     DIE *Die = TheCU->getCUDie();
 
     // Emit the compile units header.
-    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_begin",
-                                                  TheCU->getID()));
+    Asm->OutStreamer
+      .EmitLabel(Asm->GetTempSymbol(USection->getLabelBeginName(),
+                                    TheCU->getUniqueID()));
 
     // Emit size of content not including length itself
     unsigned ContentSize = Die->getSize() +
@@ -1748,31 +1869,49 @@ void DwarfDebug::emitDebugInfo() {
     Asm->OutStreamer.AddComment("DWARF version number");
     Asm->EmitInt16(dwarf::DWARF_VERSION);
     Asm->OutStreamer.AddComment("Offset Into Abbrev. Section");
-    Asm->EmitSectionOffset(Asm->GetTempSymbol("abbrev_begin"),
-                           DwarfAbbrevSectionSym);
+    Asm->EmitSectionOffset(Asm->GetTempSymbol(ASection->getLabelBeginName()),
+                           ASectionSym);
     Asm->OutStreamer.AddComment("Address Size (in bytes)");
-    Asm->EmitInt8(Asm->getTargetData().getPointerSize());
+    Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
 
-    emitDIE(Die);
-    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_end", TheCU->getID()));
+    DD->emitDIE(Die, Abbreviations);
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol(USection->getLabelEndName(),
+                                                  TheCU->getUniqueID()));
   }
 }
 
-/// emitAbbreviations - Emit the abbreviation section.
-///
-void DwarfDebug::emitAbbreviations() const {
+// Emit the debug info section.
+void DwarfDebug::emitDebugInfo() {
+  DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
+
+  Holder.emitUnits(this, Asm->getObjFileLowering().getDwarfInfoSection(),
+                   Asm->getObjFileLowering().getDwarfAbbrevSection(),
+                   DwarfAbbrevSectionSym);
+}
+
+// Emit the abbreviation section.
+void DwarfDebug::emitAbbreviations() {
+  if (!useSplitDwarf())
+    emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevSection(),
+                &Abbreviations);
+  else
+    emitSkeletonAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevSection());
+}
+
+void DwarfDebug::emitAbbrevs(const MCSection *Section,
+                             std::vector<DIEAbbrev *> *Abbrevs) {
   // Check to see if it is worth the effort.
-  if (!Abbreviations.empty()) {
+  if (!Abbrevs->empty()) {
     // Start the debug abbrev section.
-    Asm->OutStreamer.SwitchSection(
-                            Asm->getObjFileLowering().getDwarfAbbrevSection());
+    Asm->OutStreamer.SwitchSection(Section);
 
-    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("abbrev_begin"));
+    MCSymbol *Begin = Asm->GetTempSymbol(Section->getLabelBeginName());
+    Asm->OutStreamer.EmitLabel(Begin);
 
     // For each abbrevation.
-    for (unsigned i = 0, N = Abbreviations.size(); i < N; ++i) {
+    for (unsigned i = 0, N = Abbrevs->size(); i < N; ++i) {
       // Get abbreviation data
-      const DIEAbbrev *Abbrev = Abbreviations[i];
+      const DIEAbbrev *Abbrev = Abbrevs->at(i);
 
       // Emit the abbrevations code (base 1 index.)
       Asm->EmitULEB128(Abbrev->getNumber(), "Abbreviation Code");
@@ -1784,27 +1923,26 @@ void DwarfDebug::emitAbbreviations() const {
     // Mark end of abbreviations.
     Asm->EmitULEB128(0, "EOM(3)");
 
-    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("abbrev_end"));
+    MCSymbol *End = Asm->GetTempSymbol(Section->getLabelEndName());
+    Asm->OutStreamer.EmitLabel(End);
   }
 }
 
-/// emitEndOfLineMatrix - Emit the last address of the section and the end of
-/// the line matrix.
-///
+// Emit the last address of the section and the end of the line matrix.
 void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) {
   // Define last address of section.
   Asm->OutStreamer.AddComment("Extended Op");
   Asm->EmitInt8(0);
 
   Asm->OutStreamer.AddComment("Op size");
-  Asm->EmitInt8(Asm->getTargetData().getPointerSize() + 1);
+  Asm->EmitInt8(Asm->getDataLayout().getPointerSize() + 1);
   Asm->OutStreamer.AddComment("DW_LNE_set_address");
   Asm->EmitInt8(dwarf::DW_LNE_set_address);
 
   Asm->OutStreamer.AddComment("Section end label");
 
   Asm->OutStreamer.EmitSymbolValue(Asm->GetTempSymbol("section_end",SectionEnd),
-                                   Asm->getTargetData().getPointerSize(),
+                                   Asm->getDataLayout().getPointerSize(),
                                    0/*AddrSpace*/);
 
   // Mark end of matrix.
@@ -1814,8 +1952,7 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) {
   Asm->EmitInt8(1);
 }
 
-/// emitAccelNames - Emit visible names into a hashed accelerator table
-/// section.
+// Emit visible names into a hashed accelerator table section.
 void DwarfDebug::emitAccelNames() {
   DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset,
                                            dwarf::DW_FORM_data4));
@@ -1840,11 +1977,11 @@ void DwarfDebug::emitAccelNames() {
   Asm->OutStreamer.EmitLabel(SectionBegin);
 
   // Emit the full data.
-  AT.Emit(Asm, SectionBegin, this);
+  AT.Emit(Asm, SectionBegin, &InfoHolder);
 }
 
-/// emitAccelObjC - Emit objective C classes and categories into a hashed
-/// accelerator table section.
+// Emit objective C classes and categories into a hashed accelerator table
+// section.
 void DwarfDebug::emitAccelObjC() {
   DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset,
                                            dwarf::DW_FORM_data4));
@@ -1869,11 +2006,10 @@ void DwarfDebug::emitAccelObjC() {
   Asm->OutStreamer.EmitLabel(SectionBegin);
 
   // Emit the full data.
-  AT.Emit(Asm, SectionBegin, this);
+  AT.Emit(Asm, SectionBegin, &InfoHolder);
 }
 
-/// emitAccelNamespace - Emit namespace dies into a hashed accelerator
-/// table.
+// Emit namespace dies into a hashed accelerator table.
 void DwarfDebug::emitAccelNamespaces() {
   DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset,
                                            dwarf::DW_FORM_data4));
@@ -1898,10 +2034,10 @@ void DwarfDebug::emitAccelNamespaces() {
   Asm->OutStreamer.EmitLabel(SectionBegin);
 
   // Emit the full data.
-  AT.Emit(Asm, SectionBegin, this);
+  AT.Emit(Asm, SectionBegin, &InfoHolder);
 }
 
-/// emitAccelTypes() - Emit type dies into a hashed accelerator table.
+// Emit type dies into a hashed accelerator table.
 void DwarfDebug::emitAccelTypes() {
   std::vector<DwarfAccelTable::Atom> Atoms;
   Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset,
@@ -1933,7 +2069,7 @@ void DwarfDebug::emitAccelTypes() {
   Asm->OutStreamer.EmitLabel(SectionBegin);
 
   // Emit the full data.
-  AT.Emit(Asm, SectionBegin, this);
+  AT.Emit(Asm, SectionBegin, &InfoHolder);
 }
 
 void DwarfDebug::emitDebugPubTypes() {
@@ -1945,22 +2081,26 @@ void DwarfDebug::emitDebugPubTypes() {
       Asm->getObjFileLowering().getDwarfPubTypesSection());
     Asm->OutStreamer.AddComment("Length of Public Types Info");
     Asm->EmitLabelDifference(
-      Asm->GetTempSymbol("pubtypes_end", TheCU->getID()),
-      Asm->GetTempSymbol("pubtypes_begin", TheCU->getID()), 4);
+      Asm->GetTempSymbol("pubtypes_end", TheCU->getUniqueID()),
+      Asm->GetTempSymbol("pubtypes_begin", TheCU->getUniqueID()), 4);
 
     Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_begin",
-                                                  TheCU->getID()));
+                                                  TheCU->getUniqueID()));
 
     if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DWARF Version");
     Asm->EmitInt16(dwarf::DWARF_VERSION);
 
     Asm->OutStreamer.AddComment("Offset of Compilation Unit Info");
-    Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", TheCU->getID()),
+    const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection();
+    Asm->EmitSectionOffset(Asm->GetTempSymbol(ISec->getLabelBeginName(),
+                                              TheCU->getUniqueID()),
                            DwarfInfoSectionSym);
 
     Asm->OutStreamer.AddComment("Compilation Unit Length");
-    Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", TheCU->getID()),
-                             Asm->GetTempSymbol("info_begin", TheCU->getID()),
+    Asm->EmitLabelDifference(Asm->GetTempSymbol(ISec->getLabelEndName(),
+                                                TheCU->getUniqueID()),
+                             Asm->GetTempSymbol(ISec->getLabelBeginName(),
+                                                TheCU->getUniqueID()),
                              4);
 
     const StringMap<DIE*> &Globals = TheCU->getGlobalTypes();
@@ -1980,27 +2120,28 @@ void DwarfDebug::emitDebugPubTypes() {
     Asm->OutStreamer.AddComment("End Mark");
     Asm->EmitInt32(0);
     Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_end",
-                                                  TheCU->getID()));
+                                                  TheCU->getUniqueID()));
   }
 }
 
-/// emitDebugStr - Emit visible names into a debug str section.
-///
-void DwarfDebug::emitDebugStr() {
-  // Check to see if it is worth the effort.
+// Emit strings into a string section.
+void DwarfUnits::emitStrings(const MCSection *StrSection,
+                             const MCSection *OffsetSection = NULL,
+                             const MCSymbol *StrSecSym = NULL) {
+
   if (StringPool.empty()) return;
 
   // Start the dwarf str section.
-  Asm->OutStreamer.SwitchSection(
-                                Asm->getObjFileLowering().getDwarfStrSection());
+  Asm->OutStreamer.SwitchSection(StrSection);
 
   // Get all of the string pool entries and put them in an array by their ID so
   // we can sort them.
   SmallVector<std::pair<unsigned,
-      StringMapEntry<std::pair<MCSymbol*, unsigned> >*>, 64> Entries;
+                 StringMapEntry<std::pair<MCSymbol*, unsigned> >*>, 64> Entries;
 
   for (StringMap<std::pair<MCSymbol*, unsigned> >::iterator
-       I = StringPool.begin(), E = StringPool.end(); I != E; ++I)
+         I = StringPool.begin(), E = StringPool.end();
+       I != E; ++I)
     Entries.push_back(std::make_pair(I->second.second, &*I));
 
   array_pod_sort(Entries.begin(), Entries.end());
@@ -2014,10 +2155,26 @@ void DwarfDebug::emitDebugStr() {
                                          Entries[i].second->getKeyLength()+1),
                                0/*addrspace*/);
   }
+
+  // If we've got an offset section go ahead and emit that now as well.
+  if (OffsetSection) {
+    Asm->OutStreamer.SwitchSection(OffsetSection);
+    unsigned offset = 0;
+    unsigned size = 4;
+    for (unsigned i = 0, e = Entries.size(); i != e; ++i) {
+      Asm->OutStreamer.EmitIntValue(offset, size, 0);
+      offset += Entries[i].second->getKeyLength() + 1;
+    }
+  }
+}
+
+// Emit visible names into a debug str section.
+void DwarfDebug::emitDebugStr() {
+  DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
+  Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection());
 }
 
-/// emitDebugLoc - Emit visible names into a debug loc section.
-///
+// Emit visible names into a debug loc section.
 void DwarfDebug::emitDebugLoc() {
   if (DotDebugLocEntries.empty())
     return;
@@ -2033,7 +2190,7 @@ void DwarfDebug::emitDebugLoc() {
   // Start the dwarf loc section.
   Asm->OutStreamer.SwitchSection(
     Asm->getObjFileLowering().getDwarfLocSection());
-  unsigned char Size = Asm->getTargetData().getPointerSize();
+  unsigned char Size = Asm->getDataLayout().getPointerSize();
   Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", 0));
   unsigned index = 1;
   for (SmallVector<DotDebugLocEntry, 4>::iterator
@@ -2057,7 +2214,7 @@ void DwarfDebug::emitDebugLoc() {
       if (Entry.isInt()) {
         DIBasicType BTy(DV.getType());
         if (BTy.Verify() &&
-            (BTy.getEncoding()  == dwarf::DW_ATE_signed 
+            (BTy.getEncoding()  == dwarf::DW_ATE_signed
              || BTy.getEncoding() == dwarf::DW_ATE_signed_char)) {
           Asm->OutStreamer.AddComment("DW_OP_consts");
           Asm->EmitInt8(dwarf::DW_OP_consts);
@@ -2068,7 +2225,7 @@ void DwarfDebug::emitDebugLoc() {
           Asm->EmitULEB128(Entry.getInt());
         }
       } else if (Entry.isLocation()) {
-        if (!DV.hasComplexAddress()) 
+        if (!DV.hasComplexAddress())
           // Regular entry.
           Asm->EmitDwarfRegOp(Entry.Loc);
         else {
@@ -2094,7 +2251,7 @@ void DwarfDebug::emitDebugLoc() {
           } else {
             Asm->EmitDwarfRegOp(Entry.Loc);
           }
-          
+
           // Emit remaining complex address elements.
           for (; i < N; ++i) {
             uint64_t Element = DV.getAddrElement(i);
@@ -2116,21 +2273,19 @@ void DwarfDebug::emitDebugLoc() {
   }
 }
 
-/// EmitDebugARanges - Emit visible names into a debug aranges section.
-///
-void DwarfDebug::EmitDebugARanges() {
+// Emit visible names into a debug aranges section.
+void DwarfDebug::emitDebugARanges() {
   // Start the dwarf aranges section.
   Asm->OutStreamer.SwitchSection(
                           Asm->getObjFileLowering().getDwarfARangesSection());
 }
 
-/// emitDebugRanges - Emit visible names into a debug ranges section.
-///
+// Emit visible names into a debug ranges section.
 void DwarfDebug::emitDebugRanges() {
   // Start the dwarf ranges section.
   Asm->OutStreamer.SwitchSection(
     Asm->getObjFileLowering().getDwarfRangesSection());
-  unsigned char Size = Asm->getTargetData().getPointerSize();
+  unsigned char Size = Asm->getDataLayout().getPointerSize();
   for (SmallVector<const MCSymbol *, 8>::iterator
          I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end();
        I != E; ++I) {
@@ -2141,8 +2296,7 @@ void DwarfDebug::emitDebugRanges() {
   }
 }
 
-/// emitDebugMacInfo - Emit visible names into a debug macinfo section.
-///
+// Emit visible names into a debug macinfo section.
 void DwarfDebug::emitDebugMacInfo() {
   if (const MCSection *LineInfo =
       Asm->getObjFileLowering().getDwarfMacroInfoSection()) {
@@ -2151,24 +2305,24 @@ void DwarfDebug::emitDebugMacInfo() {
   }
 }
 
-/// emitDebugInlineInfo - Emit inline info using following format.
-/// Section Header:
-/// 1. length of section
-/// 2. Dwarf version number
-/// 3. address size.
-///
-/// Entries (one "entry" for each function that was inlined):
-///
-/// 1. offset into __debug_str section for MIPS linkage name, if exists;
-///   otherwise offset into __debug_str for regular function name.
-/// 2. offset into __debug_str section for regular function name.
-/// 3. an unsigned LEB128 number indicating the number of distinct inlining
-/// instances for the function.
-///
-/// The rest of the entry consists of a {die_offset, low_pc} pair for each
-/// inlined instance; the die_offset points to the inlined_subroutine die in the
-/// __debug_info section, and the low_pc is the starting address for the
-/// inlining instance.
+// Emit inline info using following format.
+// Section Header:
+// 1. length of section
+// 2. Dwarf version number
+// 3. address size.
+//
+// Entries (one "entry" for each function that was inlined):
+//
+// 1. offset into __debug_str section for MIPS linkage name, if exists;
+//   otherwise offset into __debug_str for regular function name.
+// 2. offset into __debug_str section for regular function name.
+// 3. an unsigned LEB128 number indicating the number of distinct inlining
+// instances for the function.
+//
+// The rest of the entry consists of a {die_offset, low_pc} pair for each
+// inlined instance; the die_offset points to the inlined_subroutine die in the
+// __debug_info section, and the low_pc is the starting address for the
+// inlining instance.
 void DwarfDebug::emitDebugInlineInfo() {
   if (!Asm->MAI->doesDwarfUseInlineInfoSection())
     return;
@@ -2188,7 +2342,7 @@ void DwarfDebug::emitDebugInlineInfo() {
   Asm->OutStreamer.AddComment("Dwarf Version");
   Asm->EmitInt16(dwarf::DWARF_VERSION);
   Asm->OutStreamer.AddComment("Address Size (in bytes)");
-  Asm->EmitInt8(Asm->getTargetData().getPointerSize());
+  Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
 
   for (SmallVector<const MDNode *, 4>::iterator I = InlinedSPNodes.begin(),
          E = InlinedSPNodes.end(); I != E; ++I) {
@@ -2203,13 +2357,16 @@ void DwarfDebug::emitDebugInlineInfo() {
 
     Asm->OutStreamer.AddComment("MIPS linkage name");
     if (LName.empty())
-      Asm->EmitSectionOffset(getStringPoolEntry(Name), DwarfStrSectionSym);
+      Asm->EmitSectionOffset(InfoHolder.getStringPoolEntry(Name),
+                             DwarfStrSectionSym);
     else
-      Asm->EmitSectionOffset(getStringPoolEntry(getRealLinkageName(LName)),
+      Asm->EmitSectionOffset(InfoHolder
+                             .getStringPoolEntry(getRealLinkageName(LName)),
                              DwarfStrSectionSym);
 
     Asm->OutStreamer.AddComment("Function name");
-    Asm->EmitSectionOffset(getStringPoolEntry(Name), DwarfStrSectionSym);
+    Asm->EmitSectionOffset(InfoHolder.getStringPoolEntry(Name),
+                           DwarfStrSectionSym);
     Asm->EmitULEB128(Labels.size(), "Inline count");
 
     for (SmallVector<InlineInfoLabels, 4>::iterator LI = Labels.begin(),
@@ -2219,9 +2376,113 @@ void DwarfDebug::emitDebugInlineInfo() {
 
       if (Asm->isVerbose()) Asm->OutStreamer.AddComment("low_pc");
       Asm->OutStreamer.EmitSymbolValue(LI->first,
-                                       Asm->getTargetData().getPointerSize(),0);
+                                       Asm->getDataLayout().getPointerSize(),0);
     }
   }
 
   Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_inlined_end", 1));
 }
+
+// DWARF5 Experimental Separate Dwarf emitters.
+
+// This DIE has the following attributes: DW_AT_comp_dir, DW_AT_stmt_list,
+// DW_AT_low_pc, DW_AT_high_pc, DW_AT_ranges, DW_AT_dwo_name, DW_AT_dwo_id,
+// DW_AT_ranges_base, DW_AT_addr_base. If DW_AT_ranges is present,
+// DW_AT_low_pc and DW_AT_high_pc are not used, and vice versa.
+CompileUnit *DwarfDebug::constructSkeletonCU(const MDNode *N) {
+  DICompileUnit DIUnit(N);
+  StringRef FN = DIUnit.getFilename();
+  CompilationDir = DIUnit.getDirectory();
+
+  DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
+  CompileUnit *NewCU = new CompileUnit(GlobalCUIndexCount++,
+                                       DIUnit.getLanguage(), Die, Asm,
+                                       this, &SkeletonHolder);
+  // FIXME: This should be the .dwo file.
+  NewCU->addLocalString(Die, dwarf::DW_AT_GNU_dwo_name, FN);
+
+  // FIXME: We also need DW_AT_addr_base and DW_AT_dwo_id.
+
+  // 2.17.1 requires that we use DW_AT_low_pc for a single entry point
+  // into an entity.
+  NewCU->addUInt(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0);
+  // DW_AT_stmt_list is a offset of line number information for this
+  // compile unit in debug_line section.
+  if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
+    NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
+                    Asm->GetTempSymbol("section_line"));
+  else
+    NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0);
+
+  if (!CompilationDir.empty())
+    NewCU->addLocalString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
+
+  SkeletonHolder.addUnit(NewCU);
+
+  return NewCU;
+}
+
+void DwarfDebug::emitSkeletonCU(const MCSection *Section) {
+  Asm->OutStreamer.SwitchSection(Section);
+  DIE *Die = SkeletonCU->getCUDie();
+
+  // Emit the compile units header.
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol(Section->getLabelBeginName(),
+                                                SkeletonCU->getUniqueID()));
+
+  // Emit size of content not including length itself
+  unsigned ContentSize = Die->getSize() +
+    sizeof(int16_t) + // DWARF version number
+    sizeof(int32_t) + // Offset Into Abbrev. Section
+    sizeof(int8_t);   // Pointer Size (in bytes)
+
+  Asm->OutStreamer.AddComment("Length of Compilation Unit Info");
+  Asm->EmitInt32(ContentSize);
+  Asm->OutStreamer.AddComment("DWARF version number");
+  Asm->EmitInt16(dwarf::DWARF_VERSION);
+  Asm->OutStreamer.AddComment("Offset Into Abbrev. Section");
+
+  const MCSection *ASec = Asm->getObjFileLowering().getDwarfAbbrevSection();
+  Asm->EmitSectionOffset(Asm->GetTempSymbol(ASec->getLabelBeginName()),
+                         DwarfAbbrevSectionSym);
+  Asm->OutStreamer.AddComment("Address Size (in bytes)");
+  Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
+
+  emitDIE(Die, &SkeletonAbbrevs);
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol(Section->getLabelEndName(),
+                                                SkeletonCU->getUniqueID()));
+}
+
+void DwarfDebug::emitSkeletonAbbrevs(const MCSection *Section) {
+  assert(useSplitDwarf() && "No split dwarf debug info?");
+  emitAbbrevs(Section, &SkeletonAbbrevs);
+}
+
+// Emit the .debug_info.dwo section for separated dwarf. This contains the
+// compile units that would normally be in debug_info.
+void DwarfDebug::emitDebugInfoDWO() {
+  assert(useSplitDwarf() && "No split dwarf debug info?");
+  InfoHolder.emitUnits(this, Asm->getObjFileLowering().getDwarfInfoDWOSection(),
+                       Asm->getObjFileLowering().getDwarfAbbrevDWOSection(),
+                       DwarfAbbrevDWOSectionSym);
+}
+
+// Emit the .debug_abbrev.dwo section for separated dwarf. This contains the
+// abbreviations for the .debug_info.dwo section.
+void DwarfDebug::emitDebugAbbrevDWO() {
+  assert(useSplitDwarf() && "No split dwarf?");
+  emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevDWOSection(),
+              &Abbreviations);
+}
+
+// Emit the .debug_str.dwo section for separated dwarf. This contains the
+// string section and is identical in format to traditional .debug_str
+// sections.
+void DwarfDebug::emitDebugStrDWO() {
+  assert(useSplitDwarf() && "No split dwarf?");
+  const MCSection *OffSec = Asm->getObjFileLowering()
+                            .getDwarfStrOffDWOSection();
+  const MCSymbol *StrSym = DwarfStrSectionSym;
+  InfoHolder.emitStrings(Asm->getObjFileLowering().getDwarfStrDWOSection(),
+                         OffSec, StrSym);
+}
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index f94c9d0..1e471f7 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -15,15 +15,15 @@
 #define CODEGEN_ASMPRINTER_DWARFDEBUG_H__
 
 #include "DIE.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/LexicalScopes.h"
-#include "llvm/MC/MachineLocation.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/UniqueVector.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/MC/MachineLocation.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/DebugLoc.h"
 
@@ -41,10 +41,10 @@ class DIEAbbrev;
 class DIE;
 class DIEBlock;
 class DIEEntry;
+class DwarfDebug;
 
 //===----------------------------------------------------------------------===//
-/// SrcLineInfo - This class is used to record source line correspondence.
-///
+/// \brief This class is used to record source line correspondence.
 class SrcLineInfo {
   unsigned Line;                     // Source line number.
   unsigned Column;                   // Source column.
@@ -61,8 +61,8 @@ public:
   MCSymbol *getLabel() const { return Label; }
 };
 
-/// DotDebugLocEntry - This struct describes location entries emitted in
-/// .debug_loc section.
+/// \brief This struct describes location entries emitted in the .debug_loc
+/// section.
 typedef struct DotDebugLocEntry {
   const MCSymbol *Begin;
   const MCSymbol *End;
@@ -83,24 +83,25 @@ typedef struct DotDebugLocEntry {
     const ConstantFP *CFP;
     const ConstantInt *CIP;
   } Constants;
-  DotDebugLocEntry() 
-    : Begin(0), End(0), Variable(0), Merged(false), 
+  DotDebugLocEntry()
+    : Begin(0), End(0), Variable(0), Merged(false),
       Constant(false) { Constants.Int = 0;}
   DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, MachineLocation &L,
-                   const MDNode *V) 
-    : Begin(B), End(E), Loc(L), Variable(V), Merged(false), 
+                   const MDNode *V)
+    : Begin(B), End(E), Loc(L), Variable(V), Merged(false),
       Constant(false) { Constants.Int = 0; EntryKind = E_Location; }
   DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, int64_t i)
-    : Begin(B), End(E), Variable(0), Merged(false), 
+    : Begin(B), End(E), Variable(0), Merged(false),
       Constant(true) { Constants.Int = i; EntryKind = E_Integer; }
   DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantFP *FPtr)
-    : Begin(B), End(E), Variable(0), Merged(false), 
+    : Begin(B), End(E), Variable(0), Merged(false),
       Constant(true) { Constants.CFP = FPtr; EntryKind = E_ConstantFP; }
-  DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantInt *IPtr)
-    : Begin(B), End(E), Variable(0), Merged(false), 
+  DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E,
+                   const ConstantInt *IPtr)
+    : Begin(B), End(E), Variable(0), Merged(false),
       Constant(true) { Constants.CIP = IPtr; EntryKind = E_ConstantInt; }
 
-  /// Empty entries are also used as a trigger to emit temp label. Such
+  /// \brief Empty entries are also used as a trigger to emit temp label. Such
   /// labels are referenced is used to find debug_loc offset for a given DIE.
   bool isEmpty() { return Begin == 0 && End == 0; }
   bool isMerged() { return Merged; }
@@ -120,8 +121,7 @@ typedef struct DotDebugLocEntry {
 } DotDebugLocEntry;
 
 //===----------------------------------------------------------------------===//
-/// DbgVariable - This class is used to track local variable information.
-///
+/// \brief This class is used to track local variable information.
 class DbgVariable {
   DIVariable Var;                    // Variable Descriptor.
   DIE *TheDIE;                       // Variable DIE.
@@ -131,7 +131,7 @@ class DbgVariable {
   int FrameIndex;
 public:
   // AbsVar may be NULL.
-  DbgVariable(DIVariable V, DbgVariable *AV) 
+  DbgVariable(DIVariable V, DbgVariable *AV)
     : Var(V), TheDIE(0), DotDebugLocOffset(~0U), AbsVar(AV), MInsn(0),
       FrameIndex(~0) {}
 
@@ -147,22 +147,30 @@ public:
   void setMInsn(const MachineInstr *M)     { MInsn = M; }
   int getFrameIndex()                const { return FrameIndex; }
   void setFrameIndex(int FI)               { FrameIndex = FI; }
-  // Translate tag to proper Dwarf tag.  
-  unsigned getTag()                  const { 
+  // Translate tag to proper Dwarf tag.
+  unsigned getTag()                  const {
     if (Var.getTag() == dwarf::DW_TAG_arg_variable)
       return dwarf::DW_TAG_formal_parameter;
-    
+
     return dwarf::DW_TAG_variable;
   }
-  /// isArtificial - Return true if DbgVariable is artificial.
+  /// \brief Return true if DbgVariable is artificial.
   bool isArtificial()                const {
     if (Var.isArtificial())
       return true;
-    if (Var.getTag() == dwarf::DW_TAG_arg_variable
-        && getType().isArtificial())
+    if (getType().isArtificial())
+      return true;
+    return false;
+  }
+
+  bool isObjectPointer()             const {
+    if (Var.isObjectPointer())
+      return true;
+    if (getType().isObjectPointer())
       return true;
     return false;
   }
+
   bool variableHasComplexAddress()   const {
     assert(Var.Verify() && "Invalid complex DbgVariable!");
     return Var.hasComplexAddress();
@@ -171,7 +179,7 @@ public:
     assert(Var.Verify() && "Invalid complex DbgVariable!");
     return Var.isBlockByrefVariable();
   }
-  unsigned getNumAddrElements()      const { 
+  unsigned getNumAddrElements()      const {
     assert(Var.Verify() && "Invalid complex DbgVariable!");
     return Var.getNumAddrElements();
   }
@@ -181,108 +189,167 @@ public:
   DIType getType() const;
 };
 
+
+// A String->Symbol mapping of strings used by indirect
+// references.
+typedef StringMap<std::pair<MCSymbol*, unsigned>,
+                  BumpPtrAllocator&> StrPool;
+
+/// \brief Collects and handles information specific to a particular
+/// collection of units.
+class DwarfUnits {
+  // Target of Dwarf emission, used for sizing of abbreviations.
+  AsmPrinter *Asm;
+
+  // Used to uniquely define abbreviations.
+  FoldingSet<DIEAbbrev> *AbbreviationsSet;
+
+  // A list of all the unique abbreviations in use.
+  std::vector<DIEAbbrev *> *Abbreviations;
+
+  // A pointer to all units in the section.
+  SmallVector<CompileUnit *, 1> CUs;
+
+  // Collection of strings for this unit and assorted symbols.
+  StrPool StringPool;
+  unsigned NextStringPoolNumber;
+  std::string StringPref;
+
+public:
+  DwarfUnits(AsmPrinter *AP, FoldingSet<DIEAbbrev> *AS,
+             std::vector<DIEAbbrev *> *A, const char *Pref,
+             BumpPtrAllocator &DA) :
+    Asm(AP), AbbreviationsSet(AS), Abbreviations(A),
+    StringPool(DA), NextStringPoolNumber(0), StringPref(Pref) {}
+
+  /// \brief Compute the size and offset of a DIE given an incoming Offset.
+  unsigned computeSizeAndOffset(DIE *Die, unsigned Offset);
+
+  /// \brief Compute the size and offset of all the DIEs.
+  void computeSizeAndOffsets();
+
+  /// \brief Define a unique number for the abbreviation.
+  void assignAbbrevNumber(DIEAbbrev &Abbrev);
+
+  /// \brief Add a unit to the list of CUs.
+  void addUnit(CompileUnit *CU) { CUs.push_back(CU); }
+
+  /// \brief Emit all of the units to the section listed with the given
+  /// abbreviation section.
+  void emitUnits(DwarfDebug *, const MCSection *, const MCSection *,
+                 const MCSymbol *);
+
+  /// \brief Emit all of the strings to the section given.
+  void emitStrings(const MCSection *, const MCSection *, const MCSymbol *);
+
+  /// \brief Returns the entry into the start of the pool.
+  MCSymbol *getStringPoolSym();
+
+  /// \brief Returns an entry into the string pool with the given
+  /// string text.
+  MCSymbol *getStringPoolEntry(StringRef Str);
+
+  /// \brief Returns the index into the string pool with the given
+  /// string text.
+  unsigned getStringPoolIndex(StringRef Str);
+
+  /// \brief Returns the string pool.
+  StrPool *getStringPool() { return &StringPool; }
+};
+
+/// \brief Collects and handles dwarf debug information.
 class DwarfDebug {
-  /// Asm - Target of Dwarf emission.
+  // Target of Dwarf emission.
   AsmPrinter *Asm;
 
-  /// MMI - Collected machine module information.
+  // Collected machine module information.
   MachineModuleInfo *MMI;
 
-  /// DIEValueAllocator - All DIEValues are allocated through this allocator.
+  // All DIEValues are allocated through this allocator.
   BumpPtrAllocator DIEValueAllocator;
 
   //===--------------------------------------------------------------------===//
-  // Attributes used to construct specific Dwarf sections.
+  // Attribute used to construct specific Dwarf sections.
   //
 
   CompileUnit *FirstCU;
 
-  /// Maps MDNode with its corresponding CompileUnit.
+  // Maps MDNode with its corresponding CompileUnit.
   DenseMap <const MDNode *, CompileUnit *> CUMap;
 
-  /// Maps subprogram MDNode with its corresponding CompileUnit.
+  // Maps subprogram MDNode with its corresponding CompileUnit.
   DenseMap <const MDNode *, CompileUnit *> SPMap;
 
-  /// AbbreviationsSet - Used to uniquely define abbreviations.
-  ///
+  // Used to uniquely define abbreviations.
   FoldingSet<DIEAbbrev> AbbreviationsSet;
 
-  /// Abbreviations - A list of all the unique abbreviations in use.
-  ///
+  // A list of all the unique abbreviations in use.
   std::vector<DIEAbbrev *> Abbreviations;
 
-  /// SourceIdMap - Source id map, i.e. pair of source filename and directory,
-  /// separated by a zero byte, mapped to a unique id.
+  // Source id map, i.e. pair of source filename and directory,
+  // separated by a zero byte, mapped to a unique id.
   StringMap<unsigned, BumpPtrAllocator&> SourceIdMap;
 
-  /// StringPool - A String->Symbol mapping of strings used by indirect
-  /// references.
-  StringMap<std::pair<MCSymbol*, unsigned>, BumpPtrAllocator&> StringPool;
-  unsigned NextStringPoolNumber;
-  
-  /// SectionMap - Provides a unique id per text section.
-  ///
-  UniqueVector<const MCSection*> SectionMap;
+  // Provides a unique id per text section.
+  SetVector<const MCSection*> SectionMap;
 
-  /// CurrentFnArguments - List of Arguments (DbgValues) for current function.
+  // List of Arguments (DbgValues) for current function.
   SmallVector<DbgVariable *, 8> CurrentFnArguments;
 
   LexicalScopes LScopes;
 
-  /// AbstractSPDies - Collection of abstract subprogram DIEs.
+  // Collection of abstract subprogram DIEs.
   DenseMap<const MDNode *, DIE *> AbstractSPDies;
 
-  /// ScopeVariables - Collection of dbg variables of a scope.
+  // Collection of dbg variables of a scope.
   DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8> > ScopeVariables;
 
-  /// AbstractVariables - Collection of abstract variables.
+  // Collection of abstract variables.
   DenseMap<const MDNode *, DbgVariable *> AbstractVariables;
 
-  /// DotDebugLocEntries - Collection of DotDebugLocEntry.
+  // Collection of DotDebugLocEntry.
   SmallVector<DotDebugLocEntry, 4> DotDebugLocEntries;
 
-  /// InlinedSubprogramDIEs - Collection of subprogram DIEs that are marked
-  /// (at the end of the module) as DW_AT_inline.
+  // Collection of subprogram DIEs that are marked (at the end of the module)
+  // as DW_AT_inline.
   SmallPtrSet<DIE *, 4> InlinedSubprogramDIEs;
 
-  /// InlineInfo - Keep track of inlined functions and their location.  This
-  /// information is used to populate the debug_inlined section.
+  // Keep track of inlined functions and their location.  This
+  // information is used to populate the debug_inlined section.
   typedef std::pair<const MCSymbol *, DIE *> InlineInfoLabels;
   DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> > InlineInfo;
   SmallVector<const MDNode *, 4> InlinedSPNodes;
 
-  // ProcessedSPNodes - This is a collection of subprogram MDNodes that
-  // are processed to create DIEs.
+  // This is a collection of subprogram MDNodes that are processed to
+  // create DIEs.
   SmallPtrSet<const MDNode *, 16> ProcessedSPNodes;
 
-  /// LabelsBeforeInsn - Maps instruction with label emitted before 
-  /// instruction.
+  // Maps instruction with label emitted before instruction.
   DenseMap<const MachineInstr *, MCSymbol *> LabelsBeforeInsn;
 
-  /// LabelsAfterInsn - Maps instruction with label emitted after
-  /// instruction.
+  // Maps instruction with label emitted after instruction.
   DenseMap<const MachineInstr *, MCSymbol *> LabelsAfterInsn;
 
-  /// UserVariables - Every user variable mentioned by a DBG_VALUE instruction
-  /// in order of appearance.
+  // Every user variable mentioned by a DBG_VALUE instruction in order of
+  // appearance.
   SmallVector<const MDNode*, 8> UserVariables;
 
-  /// DbgValues - For each user variable, keep a list of DBG_VALUE
-  /// instructions in order. The list can also contain normal instructions that
-  /// clobber the previous DBG_VALUE.
+  // For each user variable, keep a list of DBG_VALUE instructions in order.
+  // The list can also contain normal instructions that clobber the previous
+  // DBG_VALUE.
   typedef DenseMap<const MDNode*, SmallVector<const MachineInstr*, 4> >
     DbgValueHistoryMap;
   DbgValueHistoryMap DbgValues;
 
   SmallVector<const MCSymbol *, 8> DebugRangeSymbols;
 
-  /// Previous instruction's location information. This is used to determine
-  /// label location to indicate scope boundries in dwarf debug info.
+  // Previous instruction's location information. This is used to determine
+  // label location to indicate scope boundries in dwarf debug info.
   DebugLoc PrevInstLoc;
   MCSymbol *PrevLabel;
 
-  /// PrologEndLoc - This location indicates end of function prologue and
-  /// beginning of function body.
+  // This location indicates end of function prologue and beginning of function
+  // body.
   DebugLoc PrologEndLoc;
 
   struct FunctionDebugFrameInfo {
@@ -302,177 +369,205 @@ class DwarfDebug {
   MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym;
   MCSymbol *DwarfDebugLocSectionSym;
   MCSymbol *FunctionBeginSym, *FunctionEndSym;
+  MCSymbol *DwarfAbbrevDWOSectionSym, *DwarfStrDWOSectionSym;
 
   // As an optimization, there is no need to emit an entry in the directory
   // table for the same directory as DW_at_comp_dir.
   StringRef CompilationDir;
 
-  // A holder for the DarwinGDBCompat flag so that the compile unit can use it.
-  bool isDarwinGDBCompat;
-  bool hasDwarfAccelTables;
-private:
+  // Counter for assigning globally unique IDs for CUs.
+  unsigned GlobalCUIndexCount;
 
-  /// assignAbbrevNumber - Define a unique number for the abbreviation.
-  ///
-  void assignAbbrevNumber(DIEAbbrev &Abbrev);
+  // Holder for the file specific debug information.
+  DwarfUnits InfoHolder;
+
+  // Holders for the various debug information flags that we might need to
+  // have exposed. See accessor functions below for description.
+
+  // Whether or not we're emitting info for older versions of gdb on darwin.
+  bool IsDarwinGDBCompat;
+
+  // DWARF5 Experimental Options
+  bool HasDwarfAccelTables;
+  bool HasSplitDwarf;
+
+  // Separated Dwarf Variables
+  // In general these will all be for bits that are left in the
+  // original object file, rather than things that are meant
+  // to be in the .dwo sections.
+
+  // The CU left in the original object file for separated debug info.
+  CompileUnit *SkeletonCU;
+
+  // Used to uniquely define abbreviations for the skeleton emission.
+  FoldingSet<DIEAbbrev> SkeletonAbbrevSet;
+
+  // A list of all the unique abbreviations in use.
+  std::vector<DIEAbbrev *> SkeletonAbbrevs;
+
+  // Holder for the skeleton information.
+  DwarfUnits SkeletonHolder;
+
+private:
 
   void addScopeVariable(LexicalScope *LS, DbgVariable *Var);
 
-  /// findAbstractVariable - Find abstract variable associated with Var.
+  /// \brief Find abstract variable associated with Var.
   DbgVariable *findAbstractVariable(DIVariable &Var, DebugLoc Loc);
 
-  /// updateSubprogramScopeDIE - Find DIE for the given subprogram and 
-  /// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes.
-  /// If there are global variables in this scope then create and insert
-  /// DIEs for these variables.
+  /// \brief Find DIE for the given subprogram and attach appropriate
+  /// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global
+  /// variables in this scope then create and insert DIEs for these
+  /// variables.
   DIE *updateSubprogramScopeDIE(CompileUnit *SPCU, const MDNode *SPNode);
 
-  /// constructLexicalScope - Construct new DW_TAG_lexical_block 
-  /// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels.
+  /// \brief Construct new DW_TAG_lexical_block for this scope and
+  /// attach DW_AT_low_pc/DW_AT_high_pc labels.
   DIE *constructLexicalScopeDIE(CompileUnit *TheCU, LexicalScope *Scope);
 
-  /// constructInlinedScopeDIE - This scope represents inlined body of
-  /// a function. Construct DIE to represent this concrete inlined copy
-  /// of the function.
+  /// \brief This scope represents inlined body of a function. Construct
+  /// DIE to represent this concrete inlined copy of the function.
   DIE *constructInlinedScopeDIE(CompileUnit *TheCU, LexicalScope *Scope);
 
-  /// constructScopeDIE - Construct a DIE for this scope.
+  /// \brief Construct a DIE for this scope.
   DIE *constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope);
 
-  /// EmitSectionLabels - Emit initial Dwarf sections with a label at
-  /// the start of each one.
-  void EmitSectionLabels();
+  /// \brief Emit initial Dwarf sections with a label at the start of each one.
+  void emitSectionLabels();
 
-  /// emitDIE - Recursively Emits a debug information entry.
-  ///
-  void emitDIE(DIE *Die);
+  /// \brief Compute the size and offset of a DIE given an incoming Offset.
+  unsigned computeSizeAndOffset(DIE *Die, unsigned Offset);
 
-  /// computeSizeAndOffset - Compute the size and offset of a DIE.
-  ///
-  unsigned computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last);
-
-  /// computeSizeAndOffsets - Compute the size and offset of all the DIEs.
-  ///
+  /// \brief Compute the size and offset of all the DIEs.
   void computeSizeAndOffsets();
 
-  /// EmitDebugInfo - Emit the debug info section.
-  ///
+  /// \brief Attach DW_AT_inline attribute with inlined subprogram DIEs.
+  void computeInlinedDIEs();
+
+  /// \brief Collect info for variables that were optimized out.
+  void collectDeadVariables();
+
+  /// \brief Finish off debug information after all functions have been
+  /// processed.
+  void finalizeModuleInfo();
+
+  /// \brief Emit labels to close any remaining sections that have been left
+  /// open.
+  void endSections();
+
+  /// \brief Emit a set of abbreviations to the specific section.
+  void emitAbbrevs(const MCSection *, std::vector<DIEAbbrev*> *);
+
+  /// \brief Emit the debug info section.
   void emitDebugInfo();
 
-  /// emitAbbreviations - Emit the abbreviation section.
-  ///
-  void emitAbbreviations() const;
+  /// \brief Emit the abbreviation section.
+  void emitAbbreviations();
 
-  /// emitEndOfLineMatrix - Emit the last address of the section and the end of
+  /// \brief Emit the last address of the section and the end of
   /// the line matrix.
-  ///
   void emitEndOfLineMatrix(unsigned SectionEnd);
 
-  /// emitAccelNames - Emit visible names into a hashed accelerator table
-  /// section.
+  /// \brief Emit visible names into a hashed accelerator table section.
   void emitAccelNames();
-  
-  /// emitAccelObjC - Emit objective C classes and categories into a hashed
+
+  /// \brief Emit objective C classes and categories into a hashed
   /// accelerator table section.
   void emitAccelObjC();
 
-  /// emitAccelNamespace - Emit namespace dies into a hashed accelerator
-  /// table.
+  /// \brief Emit namespace dies into a hashed accelerator table.
   void emitAccelNamespaces();
 
-  /// emitAccelTypes() - Emit type dies into a hashed accelerator table.
-  ///
+  /// \brief Emit type dies into a hashed accelerator table.
   void emitAccelTypes();
-  
-  /// emitDebugPubTypes - Emit visible types into a debug pubtypes section.
-  ///
+
+  /// \brief Emit visible types into a debug pubtypes section.
   void emitDebugPubTypes();
 
-  /// emitDebugStr - Emit visible names into a debug str section.
-  ///
+  /// \brief Emit visible names into a debug str section.
   void emitDebugStr();
 
-  /// emitDebugLoc - Emit visible names into a debug loc section.
-  ///
+  /// \brief Emit visible names into a debug loc section.
   void emitDebugLoc();
 
-  /// EmitDebugARanges - Emit visible names into a debug aranges section.
-  ///
-  void EmitDebugARanges();
+  /// \brief Emit visible names into a debug aranges section.
+  void emitDebugARanges();
 
-  /// emitDebugRanges - Emit visible names into a debug ranges section.
-  ///
+  /// \brief Emit visible names into a debug ranges section.
   void emitDebugRanges();
 
-  /// emitDebugMacInfo - Emit visible names into a debug macinfo section.
-  ///
+  /// \brief Emit visible names into a debug macinfo section.
   void emitDebugMacInfo();
 
-  /// emitDebugInlineInfo - Emit inline info using following format.
-  /// Section Header:
-  /// 1. length of section
-  /// 2. Dwarf version number
-  /// 3. address size.
-  ///
-  /// Entries (one "entry" for each function that was inlined):
-  ///
-  /// 1. offset into __debug_str section for MIPS linkage name, if exists; 
-  ///   otherwise offset into __debug_str for regular function name.
-  /// 2. offset into __debug_str section for regular function name.
-  /// 3. an unsigned LEB128 number indicating the number of distinct inlining 
-  /// instances for the function.
-  /// 
-  /// The rest of the entry consists of a {die_offset, low_pc} pair for each 
-  /// inlined instance; the die_offset points to the inlined_subroutine die in
-  /// the __debug_info section, and the low_pc is the starting address for the
-  /// inlining instance.
+  /// \brief Emit inline info using custom format.
   void emitDebugInlineInfo();
 
-  /// constructCompileUnit - Create new CompileUnit for the given 
-  /// metadata node with tag DW_TAG_compile_unit.
+  /// DWARF 5 Experimental Split Dwarf Emitters
+
+  /// \brief Construct the split debug info compile unit for the debug info
+  /// section.
+  CompileUnit *constructSkeletonCU(const MDNode *);
+
+  /// \brief Emit the local split debug info section.
+  void emitSkeletonCU(const MCSection *);
+
+  /// \brief Emit the local split abbreviations.
+  void emitSkeletonAbbrevs(const MCSection *);
+
+  /// \brief Emit the debug info dwo section.
+  void emitDebugInfoDWO();
+
+  /// \brief Emit the debug abbrev dwo section.
+  void emitDebugAbbrevDWO();
+
+  /// \brief Emit the debug str dwo section.
+  void emitDebugStrDWO();
+
+  /// \brief Create new CompileUnit for the given metadata node with tag
+  /// DW_TAG_compile_unit.
   CompileUnit *constructCompileUnit(const MDNode *N);
 
-  /// construct SubprogramDIE - Construct subprogram DIE.
+  /// \brief Construct subprogram DIE.
   void constructSubprogramDIE(CompileUnit *TheCU, const MDNode *N);
 
-  /// recordSourceLine - Register a source line with debug info. Returns the
-  /// unique label that was emitted and which provides correspondence to
-  /// the source line list.
+  /// \brief Register a source line with debug info. Returns the unique
+  /// label that was emitted and which provides correspondence to the
+  /// source line list.
   void recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope,
                         unsigned Flags);
-  
-  /// identifyScopeMarkers() - Indentify instructions that are marking the
-  /// beginning of or ending of a scope.
+
+  /// \brief Indentify instructions that are marking the beginning of or
+  /// ending of a scope.
   void identifyScopeMarkers();
 
-  /// addCurrentFnArgument - If Var is an current function argument that add
-  /// it in CurrentFnArguments list.
+  /// \brief If Var is an current function argument that add it in
+  /// CurrentFnArguments list.
   bool addCurrentFnArgument(const MachineFunction *MF,
                             DbgVariable *Var, LexicalScope *Scope);
 
-  /// collectVariableInfo - Populate LexicalScope entries with variables' info.
+  /// \brief Populate LexicalScope entries with variables' info.
   void collectVariableInfo(const MachineFunction *,
                            SmallPtrSet<const MDNode *, 16> &ProcessedVars);
-  
-  /// collectVariableInfoFromMMITable - Collect variable information from
-  /// side table maintained by MMI.
+
+  /// \brief Collect variable information from the side table maintained
+  /// by MMI.
   void collectVariableInfoFromMMITable(const MachineFunction * MF,
                                        SmallPtrSet<const MDNode *, 16> &P);
 
-  /// requestLabelBeforeInsn - Ensure that a label will be emitted before MI.
+  /// \brief Ensure that a label will be emitted before MI.
   void requestLabelBeforeInsn(const MachineInstr *MI) {
     LabelsBeforeInsn.insert(std::make_pair(MI, (MCSymbol*)0));
   }
 
-  /// getLabelBeforeInsn - Return Label preceding the instruction.
+  /// \brief Return Label preceding the instruction.
   const MCSymbol *getLabelBeforeInsn(const MachineInstr *MI);
 
-  /// requestLabelAfterInsn - Ensure that a label will be emitted after MI.
+  /// \brief Ensure that a label will be emitted after MI.
   void requestLabelAfterInsn(const MachineInstr *MI) {
     LabelsAfterInsn.insert(std::make_pair(MI, (MCSymbol*)0));
   }
 
-  /// getLabelAfterInsn - Return Label immediately following the instruction.
+  /// \brief Return Label immediately following the instruction.
   const MCSymbol *getLabelAfterInsn(const MachineInstr *MI);
 
 public:
@@ -482,52 +577,54 @@ public:
   DwarfDebug(AsmPrinter *A, Module *M);
   ~DwarfDebug();
 
-  /// collectInfoFromNamedMDNodes - Collect debug info from named mdnodes such
-  /// as llvm.dbg.enum and llvm.dbg.ty
-  void collectInfoFromNamedMDNodes(Module *M);
+  /// \brief Collect debug info from named mdnodes such as llvm.dbg.enum
+  /// and llvm.dbg.ty
+  void collectInfoFromNamedMDNodes(const Module *M);
 
-  /// collectLegacyDebugInfo - Collect debug info using DebugInfoFinder.
+  /// \brief Collect debug info using DebugInfoFinder.
   /// FIXME - Remove this when DragonEgg switches to DIBuilder.
-  bool collectLegacyDebugInfo(Module *M);
+  bool collectLegacyDebugInfo(const Module *M);
 
-  /// beginModule - Emit all Dwarf sections that should come prior to the
+  /// \brief Emit all Dwarf sections that should come prior to the
   /// content.
-  void beginModule(Module *M);
+  void beginModule();
 
-  /// endModule - Emit all Dwarf sections that should come after the content.
-  ///
+  /// \brief Emit all Dwarf sections that should come after the content.
   void endModule();
 
-  /// beginFunction - Gather pre-function debug information.  Assumes being
-  /// emitted immediately after the function entry point.
+  /// \brief Gather pre-function debug information.
   void beginFunction(const MachineFunction *MF);
 
-  /// endFunction - Gather and emit post-function debug information.
-  ///
+  /// \brief Gather and emit post-function debug information.
   void endFunction(const MachineFunction *MF);
 
-  /// beginInstruction - Process beginning of an instruction.
+  /// \brief Process beginning of an instruction.
   void beginInstruction(const MachineInstr *MI);
 
-  /// endInstruction - Prcess end of an instruction.
+  /// \brief Process end of an instruction.
   void endInstruction(const MachineInstr *MI);
 
-  /// GetOrCreateSourceID - Look up the source id with the given directory and
-  /// source file names. If none currently exists, create a new id and insert it
-  /// in the SourceIds map.
-  unsigned GetOrCreateSourceID(StringRef DirName, StringRef FullName);
-
-  /// getStringPool - returns the entry into the start of the pool.
-  MCSymbol *getStringPool();
+  /// \brief Look up the source id with the given directory and source file
+  /// names. If none currently exists, create a new id and insert it in the
+  /// SourceIds map.
+  unsigned getOrCreateSourceID(StringRef DirName, StringRef FullName);
 
-  /// getStringPoolEntry - returns an entry into the string pool with the given
-  /// string text.
-  MCSymbol *getStringPoolEntry(StringRef Str);
+  /// \brief Recursively Emits a debug information entry.
+  void emitDIE(DIE *Die, std::vector<DIEAbbrev *> *Abbrevs);
 
-  /// useDarwinGDBCompat - returns whether or not to limit some of our debug
+  /// \brief Returns whether or not to limit some of our debug
   /// output to the limitations of darwin gdb.
-  bool useDarwinGDBCompat() { return isDarwinGDBCompat; }
-  bool useDwarfAccelTables() { return hasDwarfAccelTables; }
+  bool useDarwinGDBCompat() { return IsDarwinGDBCompat; }
+
+  // Experimental DWARF5 features.
+
+  /// \brief Returns whether or not to emit tables that dwarf consumers can
+  /// use to accelerate lookup.
+  bool useDwarfAccelTables() { return HasDwarfAccelTables; }
+
+  /// \brief Returns whether or not to change the current debug info for the
+  /// split dwarf proposal support.
+  bool useSplitDwarf() { return HasSplitDwarf; }
 };
 } // End of namespace llvm
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index 70cc2e5..975bb94 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -12,30 +12,30 @@
 //===----------------------------------------------------------------------===//
 
 #include "DwarfException.h"
-#include "llvm/Module.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Module.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/Dwarf.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FormattedStream.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Twine.h"
 using namespace llvm;
 
 DwarfException::DwarfException(AsmPrinter *A)
@@ -417,7 +417,7 @@ void DwarfException::EmitExceptionTable() {
     // that we're omitting that bit.
     TTypeEncoding = dwarf::DW_EH_PE_omit;
     // dwarf::DW_EH_PE_absptr
-    TypeFormatSize = Asm->getTargetData().getPointerSize();
+    TypeFormatSize = Asm->getDataLayout().getPointerSize();
   } else {
     // Okay, we have actual filters or typeinfos to emit.  As such, we need to
     // pick a type encoding for them.  We're about to emit a list of pointers to
@@ -672,6 +672,18 @@ void DwarfException::EmitExceptionTable() {
     Asm->EmitSLEB128(Action.NextAction);
   }
 
+  EmitTypeInfos(TTypeEncoding);
+
+  Asm->EmitAlignment(2);
+}
+
+void DwarfException::EmitTypeInfos(unsigned TTypeEncoding) {
+  const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
+  const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+
+  bool VerboseAsm = Asm->OutStreamer.isVerboseAsm();
+
+  int Entry = 0;
   // Emit the Catch TypeInfos.
   if (VerboseAsm && !TypeInfos.empty()) {
     Asm->OutStreamer.AddComment(">> Catch TypeInfos <<");
@@ -684,11 +696,7 @@ void DwarfException::EmitExceptionTable() {
     const GlobalVariable *GV = *I;
     if (VerboseAsm)
       Asm->OutStreamer.AddComment("TypeInfo " + Twine(Entry--));
-    if (GV)
-      Asm->EmitReference(GV, TTypeEncoding);
-    else
-      Asm->OutStreamer.EmitIntValue(0,Asm->GetSizeOfEncodedValue(TTypeEncoding),
-                                    0);
+    Asm->EmitTTypeReference(GV, TTypeEncoding);
   }
 
   // Emit the Exception Specifications.
@@ -708,8 +716,6 @@ void DwarfException::EmitExceptionTable() {
 
     Asm->EmitULEB128(TypeID);
   }
-
-  Asm->EmitAlignment(2);
 }
 
 /// EndModule - Emit all exception information that should come after the
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
index fe9e493..74b1b13 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -121,6 +121,8 @@ protected:
   ///     catches in the function.  This tables is reversed indexed base 1.
   void EmitExceptionTable();
 
+  virtual void EmitTypeInfos(unsigned TTypeEncoding);
+
 public:
   //===--------------------------------------------------------------------===//
   // Main entry points.
@@ -175,6 +177,7 @@ public:
 };
 
 class ARMException : public DwarfException {
+  void EmitTypeInfos(unsigned TTypeEncoding);
 public:
   //===--------------------------------------------------------------------===//
   // Main entry points.
diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index 1153817..b802853 100644
--- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -12,20 +12,20 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/GCs.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/GCMetadataPrinter.h"
-#include "llvm/Module.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Module.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FormattedStream.h"
 #include <cctype>
 using namespace llvm;
 
@@ -91,7 +91,7 @@ void OcamlGCMetadataPrinter::beginAssembly(AsmPrinter &AP) {
 /// either condition is detected in a function which uses the GC.
 ///
 void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) {
-  unsigned IntPtrSize = AP.TM.getTargetData()->getPointerSize();
+  unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize();
 
   AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection());
   EmitCamlGlobal(getModule(), AP, "code_end");
diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/lib/CodeGen/AsmPrinter/Win64Exception.cpp
index b83aa5a..cb25674 100644
--- a/lib/CodeGen/AsmPrinter/Win64Exception.cpp
+++ b/lib/CodeGen/AsmPrinter/Win64Exception.cpp
@@ -12,30 +12,30 @@
 //===----------------------------------------------------------------------===//
 
 #include "DwarfException.h"
-#include "llvm/Module.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Module.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/Dwarf.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FormattedStream.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Twine.h"
 using namespace llvm;
 
 Win64Exception::Win64Exception(AsmPrinter *A)
diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp
new file mode 100644
index 0000000..c27e081
--- /dev/null
+++ b/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -0,0 +1,383 @@
+//===- BasicTargetTransformInfo.cpp - Basic target-independent TTI impl ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file provides the implementation of a basic TargetTransformInfo pass
+/// predicated on the target abstractions present in the target independent
+/// code generator. It uses these (primarily TargetLowering) to model as much
+/// of the TTI query interface as possible. It is included by most targets so
+/// that they can specialize only a small subset of the query space.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "basictti"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include <utility>
+
+using namespace llvm;
+
+namespace {
+
+class BasicTTI : public ImmutablePass, public TargetTransformInfo {
+  const TargetLowering *TLI;
+
+  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
+  /// are set if the result needs to be inserted and/or extracted from vectors.
+  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
+
+public:
+  BasicTTI() : ImmutablePass(ID), TLI(0) {
+    llvm_unreachable("This pass cannot be directly constructed");
+  }
+
+  BasicTTI(const TargetLowering *TLI) : ImmutablePass(ID), TLI(TLI) {
+    initializeBasicTTIPass(*PassRegistry::getPassRegistry());
+  }
+
+  virtual void initializePass() {
+    pushTTIStack(this);
+  }
+
+  virtual void finalizePass() {
+    popTTIStack();
+  }
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    TargetTransformInfo::getAnalysisUsage(AU);
+  }
+
+  /// Pass identification.
+  static char ID;
+
+  /// Provide necessary pointer adjustments for the two base classes.
+  virtual void *getAdjustedAnalysisPointer(const void *ID) {
+    if (ID == &TargetTransformInfo::ID)
+      return (TargetTransformInfo*)this;
+    return this;
+  }
+
+  /// \name Scalar TTI Implementations
+  /// @{
+
+  virtual bool isLegalAddImmediate(int64_t imm) const;
+  virtual bool isLegalICmpImmediate(int64_t imm) const;
+  virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
+                                     int64_t BaseOffset, bool HasBaseReg,
+                                     int64_t Scale) const;
+  virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const;
+  virtual bool isTypeLegal(Type *Ty) const;
+  virtual unsigned getJumpBufAlignment() const;
+  virtual unsigned getJumpBufSize() const;
+  virtual bool shouldBuildLookupTables() const;
+
+  /// @}
+
+  /// \name Vector TTI Implementations
+  /// @{
+
+  virtual unsigned getNumberOfRegisters(bool Vector) const;
+  virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const;
+  virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
+                                  int Index, Type *SubTp) const;
+  virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
+                                    Type *Src) const;
+  virtual unsigned getCFInstrCost(unsigned Opcode) const;
+  virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+                                      Type *CondTy) const;
+  virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
+                                      unsigned Index) const;
+  virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
+                                   unsigned Alignment,
+                                   unsigned AddressSpace) const;
+  virtual unsigned getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy,
+                                         ArrayRef<Type*> Tys) const;
+  virtual unsigned getNumberOfParts(Type *Tp) const;
+
+  /// @}
+};
+
+}
+
+INITIALIZE_AG_PASS(BasicTTI, TargetTransformInfo, "basictti",
+                   "Target independent code generator's TTI", true, true, false)
+char BasicTTI::ID = 0;
+
+ImmutablePass *
+llvm::createBasicTargetTransformInfoPass(const TargetLowering *TLI) {
+  return new BasicTTI(TLI);
+}
+
+
+bool BasicTTI::isLegalAddImmediate(int64_t imm) const {
+  return TLI->isLegalAddImmediate(imm);
+}
+
+bool BasicTTI::isLegalICmpImmediate(int64_t imm) const {
+  return TLI->isLegalICmpImmediate(imm);
+}
+
+bool BasicTTI::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
+                                     int64_t BaseOffset, bool HasBaseReg,
+                                     int64_t Scale) const {
+  TargetLowering::AddrMode AM;
+  AM.BaseGV = BaseGV;
+  AM.BaseOffs = BaseOffset;
+  AM.HasBaseReg = HasBaseReg;
+  AM.Scale = Scale;
+  return TLI->isLegalAddressingMode(AM, Ty);
+}
+
+bool BasicTTI::isTruncateFree(Type *Ty1, Type *Ty2) const {
+  return TLI->isTruncateFree(Ty1, Ty2);
+}
+
+bool BasicTTI::isTypeLegal(Type *Ty) const {
+  EVT T = TLI->getValueType(Ty);
+  return TLI->isTypeLegal(T);
+}
+
+unsigned BasicTTI::getJumpBufAlignment() const {
+  return TLI->getJumpBufAlignment();
+}
+
+unsigned BasicTTI::getJumpBufSize() const {
+  return TLI->getJumpBufSize();
+}
+
+bool BasicTTI::shouldBuildLookupTables() const {
+  return TLI->supportJumpTables() &&
+      (TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
+       TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Calls used by the vectorizers.
+//
+//===----------------------------------------------------------------------===//
+
+unsigned BasicTTI::getScalarizationOverhead(Type *Ty, bool Insert,
+                                            bool Extract) const {
+  assert (Ty->isVectorTy() && "Can only scalarize vectors");
+  unsigned Cost = 0;
+
+  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
+    if (Insert)
+      Cost += TopTTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
+    if (Extract)
+      Cost += TopTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
+  }
+
+  return Cost;
+}
+
+unsigned BasicTTI::getNumberOfRegisters(bool Vector) const {
+  return 1;
+}
+
+unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const {
+  // Check if any of the operands are vector operands.
+  int ISD = TLI->InstructionOpcodeToISD(Opcode);
+  assert(ISD && "Invalid opcode");
+
+  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
+
+  if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
+    // The operation is legal. Assume it costs 1.
+    // If the type is split to multiple registers, assume that thre is some
+    // overhead to this.
+    // TODO: Once we have extract/insert subvector cost we need to use them.
+    if (LT.first > 1)
+      return LT.first * 2;
+    return LT.first * 1;
+  }
+
+  if (!TLI->isOperationExpand(ISD, LT.second)) {
+    // If the operation is custom lowered then assume
+    // thare the code is twice as expensive.
+    return LT.first * 2;
+  }
+
+  // Else, assume that we need to scalarize this op.
+  if (Ty->isVectorTy()) {
+    unsigned Num = Ty->getVectorNumElements();
+    unsigned Cost = TopTTI->getArithmeticInstrCost(Opcode, Ty->getScalarType());
+    // return the cost of multiple scalar invocation plus the cost of inserting
+    // and extracting the values.
+    return getScalarizationOverhead(Ty, true, true) + Num * Cost;
+  }
+
+  // We don't know anything about this scalar instruction.
+  return 1;
+}
+
+unsigned BasicTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
+                                  Type *SubTp) const {
+  return 1;
+}
+
+unsigned BasicTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
+                                    Type *Src) const {
+  int ISD = TLI->InstructionOpcodeToISD(Opcode);
+  assert(ISD && "Invalid opcode");
+
+  std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(Src);
+  std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(Dst);
+
+  // Handle scalar conversions.
+  if (!Src->isVectorTy() && !Dst->isVectorTy()) {
+
+    // Scalar bitcasts are usually free.
+    if (Opcode == Instruction::BitCast)
+      return 0;
+
+    if (Opcode == Instruction::Trunc &&
+        TLI->isTruncateFree(SrcLT.second, DstLT.second))
+      return 0;
+
+    if (Opcode == Instruction::ZExt &&
+        TLI->isZExtFree(SrcLT.second, DstLT.second))
+      return 0;
+
+    // Just check the op cost. If the operation is legal then assume it costs 1.
+    if (!TLI->isOperationExpand(ISD, DstLT.second))
+      return  1;
+
+    // Assume that illegal scalar instruction are expensive.
+    return 4;
+  }
+
+  // Check vector-to-vector casts.
+  if (Dst->isVectorTy() && Src->isVectorTy()) {
+
+    // If the cast is between same-sized registers, then the check is simple.
+    if (SrcLT.first == DstLT.first &&
+        SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
+
+      // Bitcast between types that are legalized to the same type are free.
+      if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc)
+        return 0;
+
+      // Assume that Zext is done using AND.
+      if (Opcode == Instruction::ZExt)
+        return 1;
+
+      // Assume that sext is done using SHL and SRA.
+      if (Opcode == Instruction::SExt)
+        return 2;
+
+      // Just check the op cost. If the operation is legal then assume it costs
+      // 1 and multiply by the type-legalization overhead.
+      if (!TLI->isOperationExpand(ISD, DstLT.second))
+        return SrcLT.first * 1;
+    }
+
+    // If we are converting vectors and the operation is illegal, or
+    // if the vectors are legalized to different types, estimate the
+    // scalarization costs.
+    unsigned Num = Dst->getVectorNumElements();
+    unsigned Cost = TopTTI->getCastInstrCost(Opcode, Dst->getScalarType(),
+                                             Src->getScalarType());
+
+    // Return the cost of multiple scalar invocation plus the cost of
+    // inserting and extracting the values.
+    return getScalarizationOverhead(Dst, true, true) + Num * Cost;
+  }
+
+  // We already handled vector-to-vector and scalar-to-scalar conversions. This
+  // is where we handle bitcast between vectors and scalars. We need to assume
+  //  that the conversion is scalarized in one way or another.
+  if (Opcode == Instruction::BitCast)
+    // Illegal bitcasts are done by storing and loading from a stack slot.
+    return (Src->isVectorTy()? getScalarizationOverhead(Src, false, true):0) +
+           (Dst->isVectorTy()? getScalarizationOverhead(Dst, true, false):0);
+
+  llvm_unreachable("Unhandled cast");
+ }
+
+unsigned BasicTTI::getCFInstrCost(unsigned Opcode) const {
+  // Branches are assumed to be predicted.
+  return 0;
+}
+
+unsigned BasicTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+                                      Type *CondTy) const {
+  int ISD = TLI->InstructionOpcodeToISD(Opcode);
+  assert(ISD && "Invalid opcode");
+
+  // Selects on vectors are actually vector selects.
+  if (ISD == ISD::SELECT) {
+    assert(CondTy && "CondTy must exist");
+    if (CondTy->isVectorTy())
+      ISD = ISD::VSELECT;
+  }
+
+  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
+
+  if (!TLI->isOperationExpand(ISD, LT.second)) {
+    // The operation is legal. Assume it costs 1. Multiply
+    // by the type-legalization overhead.
+    return LT.first * 1;
+  }
+
+  // Otherwise, assume that the cast is scalarized.
+  if (ValTy->isVectorTy()) {
+    unsigned Num = ValTy->getVectorNumElements();
+    if (CondTy)
+      CondTy = CondTy->getScalarType();
+    unsigned Cost = TopTTI->getCmpSelInstrCost(Opcode, ValTy->getScalarType(),
+                                               CondTy);
+
+    // Return the cost of multiple scalar invocation plus the cost of inserting
+    // and extracting the values.
+    return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
+  }
+
+  // Unknown scalar opcode.
+  return 1;
+}
+
+unsigned BasicTTI::getVectorInstrCost(unsigned Opcode, Type *Val,
+                                      unsigned Index) const {
+  return 1;
+}
+
+unsigned BasicTTI::getMemoryOpCost(unsigned Opcode, Type *Src,
+                                   unsigned Alignment,
+                                   unsigned AddressSpace) const {
+  assert(!Src->isVoidTy() && "Invalid type");
+  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
+
+  // Assume that all loads of legal types cost 1.
+  return LT.first;
+}
+
+unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy,
+                                         ArrayRef<Type *> Tys) const {
+  // assume that we need to scalarize this intrinsic.
+  unsigned ScalarizationCost = 0;
+  unsigned ScalarCalls = 1;
+  if (RetTy->isVectorTy()) {
+    ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
+    ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
+  }
+  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
+    if (Tys[i]->isVectorTy()) {
+      ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
+      ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
+    }
+  }
+  return ScalarCalls + ScalarizationCost;
+}
+
+unsigned BasicTTI::getNumberOfParts(Type *Tp) const {
+  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+  return LT.first;
+}
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 7df0e15..f8cc3b3 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -18,24 +18,23 @@
 
 #define DEBUG_TYPE "branchfolding"
 #include "BranchFolding.h"
-#include "llvm/Function.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -357,9 +356,8 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
   if (I1 == MBB1->begin() && I2 != MBB2->begin()) {
     --I2;
     while (I2->isDebugValue()) {
-      if (I2 == MBB2->begin()) {
+      if (I2 == MBB2->begin())
         return TailLen;
-        }
       --I2;
     }
     ++I2;
@@ -482,21 +480,19 @@ bool
 BranchFolder::MergePotentialsElt::operator<(const MergePotentialsElt &o) const {
   if (getHash() < o.getHash())
     return true;
-   else if (getHash() > o.getHash())
+  if (getHash() > o.getHash())
     return false;
-  else if (getBlock()->getNumber() < o.getBlock()->getNumber())
+  if (getBlock()->getNumber() < o.getBlock()->getNumber())
     return true;
-  else if (getBlock()->getNumber() > o.getBlock()->getNumber())
+  if (getBlock()->getNumber() > o.getBlock()->getNumber())
     return false;
-  else {
-    // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing
-    // an object with itself.
+  // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing
+  // an object with itself.
 #ifndef _GLIBCXX_DEBUG
-    llvm_unreachable("Predecessor appears twice");
+  llvm_unreachable("Predecessor appears twice");
 #else
-    return false;
+  return false;
 #endif
-  }
 }
 
 /// CountTerminators - Count the number of terminators in the given
@@ -574,7 +570,8 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1,
   // instructions that would be deleted in the merge.
   MachineFunction *MF = MBB1->getParent();
   if (EffectiveTailLen >= 2 &&
-      MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize) &&
+      MF->getFunction()->getAttributes().
+        hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize) &&
       (I1 == MBB1->begin() || I2 == MBB2->begin()))
     return true;
 
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 386509b..d5f3932 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -2,6 +2,7 @@ add_llvm_library(LLVMCodeGen
   AggressiveAntiDepBreaker.cpp
   AllocationOrder.cpp
   Analysis.cpp
+  BasicTargetTransformInfo.cpp
   BranchFolding.cpp
   CalcSpillWeights.cpp
   CallingConvLower.cpp
@@ -45,6 +46,7 @@ add_llvm_library(LLVMCodeGen
   MachineCopyPropagation.cpp
   MachineCSE.cpp
   MachineDominators.cpp
+  MachinePostDominators.cpp
   MachineFunction.cpp
   MachineFunctionAnalysis.cpp
   MachineFunctionPass.cpp
@@ -53,7 +55,6 @@ add_llvm_library(LLVMCodeGen
   MachineInstrBundle.cpp
   MachineLICM.cpp
   MachineLoopInfo.cpp
-  MachineLoopRanges.cpp
   MachineModuleInfo.cpp
   MachineModuleInfoImpls.cpp
   MachinePassRegistry.cpp
@@ -99,9 +100,11 @@ add_llvm_library(LLVMCodeGen
   StrongPHIElimination.cpp
   TailDuplication.cpp
   TargetFrameLoweringImpl.cpp
-  TargetInstrInfoImpl.cpp
+  TargetInstrInfo.cpp
   TargetLoweringObjectFileImpl.cpp
   TargetOptionsImpl.cpp
+  TargetRegisterInfo.cpp
+  TargetSchedule.cpp
   TwoAddressInstructionPass.cpp
   UnreachableBlockElim.cpp
   VirtRegMap.cpp
diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp
index bc5258e..dee339a 100644
--- a/lib/CodeGen/CalcSpillWeights.cpp
+++ b/lib/CodeGen/CalcSpillWeights.cpp
@@ -164,7 +164,7 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
       continue;
     float hweight = Hint[hint] += weight;
     if (TargetRegisterInfo::isPhysicalRegister(hint)) {
-      if (hweight > bestPhys && LIS.isAllocatable(hint))
+      if (hweight > bestPhys && mri.isAllocatable(hint))
         bestPhys = hweight, hintPhys = hint;
     } else {
       if (hweight > bestVirt)
diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp
index 0b747fd..f1d4ace 100644
--- a/lib/CodeGen/CallingConvLower.cpp
+++ b/lib/CodeGen/CallingConvLower.cpp
@@ -14,13 +14,13 @@
 
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf,
@@ -50,7 +50,7 @@ void CCState::HandleByVal(unsigned ValNo, MVT ValVT,
   if (MinAlign > (int)Align)
     Align = MinAlign;
   MF.getFrameInfo()->ensureMaxAlignment(Align);
-  TM.getTargetLowering()->HandleByVal(this, Size);
+  TM.getTargetLowering()->HandleByVal(this, Size, Align);
   unsigned Offset = AllocateStack(Size, Align);
   addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
 }
@@ -74,7 +74,7 @@ CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
     if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
 #ifndef NDEBUG
       dbgs() << "Formal argument #" << i << " has unhandled type "
-             << EVT(ArgVT).getEVTString();
+             << EVT(ArgVT).getEVTString() << '\n';
 #endif
       llvm_unreachable(0);
     }
@@ -106,7 +106,7 @@ void CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
     if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) {
 #ifndef NDEBUG
       dbgs() << "Return operand #" << i << " has unhandled type "
-             << EVT(VT).getEVTString();
+             << EVT(VT).getEVTString() << '\n';
 #endif
       llvm_unreachable(0);
     }
@@ -124,7 +124,7 @@ void CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
     if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
 #ifndef NDEBUG
       dbgs() << "Call operand #" << i << " has unhandled type "
-             << EVT(ArgVT).getEVTString();
+             << EVT(ArgVT).getEVTString() << '\n';
 #endif
       llvm_unreachable(0);
     }
@@ -143,7 +143,7 @@ void CCState::AnalyzeCallOperands(SmallVectorImpl<MVT> &ArgVTs,
     if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
 #ifndef NDEBUG
       dbgs() << "Call operand #" << i << " has unhandled type "
-             << EVT(ArgVT).getEVTString();
+             << EVT(ArgVT).getEVTString() << '\n';
 #endif
       llvm_unreachable(0);
     }
@@ -160,7 +160,7 @@ void CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
     if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) {
 #ifndef NDEBUG
       dbgs() << "Call result #" << i << " has unhandled type "
-             << EVT(VT).getEVTString() << "\n";
+             << EVT(VT).getEVTString() << '\n';
 #endif
       llvm_unreachable(0);
     }
@@ -173,7 +173,7 @@ void CCState::AnalyzeCallResult(MVT VT, CCAssignFn Fn) {
   if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this)) {
 #ifndef NDEBUG
     dbgs() << "Call result has unhandled type "
-           << EVT(VT).getEVTString();
+           << EVT(VT).getEVTString() << '\n';
 #endif
     llvm_unreachable(0);
   }
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
index 65f0941..a33b672 100644
--- a/lib/CodeGen/CodeGen.cpp
+++ b/lib/CodeGen/CodeGen.cpp
@@ -19,6 +19,7 @@ using namespace llvm;
 
 /// initializeCodeGen - Initialize all passes linked into the CodeGen library.
 void llvm::initializeCodeGen(PassRegistry &Registry) {
+  initializeBasicTTIPass(Registry);
   initializeBranchFolderPassPass(Registry);
   initializeCalculateSpillWeightsPass(Registry);
   initializeCodePlacementOptPass(Registry);
@@ -41,6 +42,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeMachineCopyPropagationPass(Registry);
   initializeMachineCSEPass(Registry);
   initializeMachineDominatorTreePass(Registry);
+  initializeMachinePostDominatorTreePass(Registry);
   initializeMachineLICMPass(Registry);
   initializeMachineLoopInfoPass(Registry);
   initializeMachineModuleInfoPass(Registry);
diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp
index 99233df..2451844 100644
--- a/lib/CodeGen/CodePlacementOpt.cpp
+++ b/lib/CodeGen/CodePlacementOpt.cpp
@@ -13,15 +13,15 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "code-placement"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
 STATISTIC(NumLoopsAligned,  "Number of loops aligned");
@@ -373,7 +373,8 @@ bool CodePlacementOpt::OptimizeIntraLoopEdges(MachineFunction &MF) {
 ///
 bool CodePlacementOpt::AlignLoops(MachineFunction &MF) {
   const Function *F = MF.getFunction();
-  if (F->hasFnAttr(Attribute::OptimizeForSize))
+  if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                      Attribute::OptimizeForSize))
     return false;
 
   unsigned Align = TLI->getPrefLoopAlignment();
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index a9de1c7..48105d9 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -17,12 +17,12 @@
 #include "CriticalAntiDepBreaker.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 
 using namespace llvm;
 
@@ -378,7 +378,7 @@ CriticalAntiDepBreaker::findSuitableFreeRegister(RegRefIter RegRefBegin,
                                                  unsigned LastNewReg,
                                                  const TargetRegisterClass *RC)
 {
-  ArrayRef<unsigned> Order = RegClassInfo.getOrder(RC);
+  ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(RC);
   for (unsigned i = 0; i != Order.size(); ++i) {
     unsigned NewReg = Order[i];
     // Don't replace a register with itself.
@@ -527,7 +527,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
         if (Edge->getKind() == SDep::Anti) {
           AntiDepReg = Edge->getReg();
           assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
-          if (!RegClassInfo.isAllocatable(AntiDepReg))
+          if (!MRI.isAllocatable(AntiDepReg))
             // Don't break anti-dependencies on non-allocatable registers.
             AntiDepReg = 0;
           else if (KeepRegs.test(AntiDepReg))
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
index ad95c48..8fb2b0e 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -17,13 +17,13 @@
 #define LLVM_CODEGEN_CRITICALANTIDEPBREAKER_H
 
 #include "AntiDepBreaker.h"
+#include "llvm/ADT/BitVector.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/ADT/BitVector.h"
 #include <map>
 
 namespace llvm {
diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp
index ff2f113..840a101 100644
--- a/lib/CodeGen/DFAPacketizer.cpp
+++ b/lib/CodeGen/DFAPacketizer.cpp
@@ -23,12 +23,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/ScheduleDAGInstrs.h"
 #include "llvm/CodeGen/DFAPacketizer.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBundle.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
 #include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetInstrInfo.h"
 using namespace llvm;
 
 DFAPacketizer::DFAPacketizer(const InstrItineraryData *I, const int (*SIT)[2],
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index b4394e8..a526d24 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -13,14 +13,14 @@
 
 #define DEBUG_TYPE "codegen-dce"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Pass.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
 STATISTIC(NumDeletes,          "Number of dead instructions deleted");
@@ -33,7 +33,6 @@ namespace {
     const MachineRegisterInfo *MRI;
     const TargetInstrInfo *TII;
     BitVector LivePhysRegs;
-    BitVector ReservedRegs;
 
   public:
     static char ID; // Pass identification, replacement for typeid
@@ -70,7 +69,7 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
       unsigned Reg = MO.getReg();
       if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
         // Don't delete live physreg defs, or any reserved register defs.
-        if (LivePhysRegs.test(Reg) || ReservedRegs.test(Reg))
+        if (LivePhysRegs.test(Reg) || MRI->isReserved(Reg))
           return false;
       } else {
         if (!MRI->use_nodbg_empty(Reg))
@@ -90,9 +89,6 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
   TRI = MF.getTarget().getRegisterInfo();
   TII = MF.getTarget().getInstrInfo();
 
-  // Treat reserved registers as always live.
-  ReservedRegs = TRI->getReservedRegs(MF);
-
   // Loop over all instructions in all blocks, from bottom to top, so that it's
   // more likely that chains of dependent but ultimately dead instructions will
   // be cleaned up.
@@ -101,7 +97,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
     MachineBasicBlock *MBB = &*I;
 
     // Start out assuming that reserved registers are live out of this block.
-    LivePhysRegs = ReservedRegs;
+    LivePhysRegs = MRI->getReservedRegs();
 
     // Also add any explicit live-out physregs for this block.
     if (!MBB->empty() && MBB->back().isReturn())
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index 7095624..4cafa96 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -13,15 +13,15 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "dwarfehprepare"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Dominators.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp
index c40c5ac..f332925 100644
--- a/lib/CodeGen/EarlyIfConversion.cpp
+++ b/lib/CodeGen/EarlyIfConversion.cpp
@@ -17,6 +17,7 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "early-ifcvt"
+#include "llvm/CodeGen/Passes.h"
 #include "MachineTraceMetrics.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/PostOrderIterator.h"
@@ -30,13 +31,12 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/MC/MCInstrItineraries.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 
 using namespace llvm;
 
@@ -777,7 +777,8 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) {
                << "********** Function: " << MF.getName() << '\n');
   TII = MF.getTarget().getInstrInfo();
   TRI = MF.getTarget().getRegisterInfo();
-  SchedModel = MF.getTarget().getInstrItineraryData()->SchedModel;
+  SchedModel =
+    MF.getTarget().getSubtarget<TargetSubtargetInfo>().getSchedModel();
   MRI = &MF.getRegInfo();
   DomTree = &getAnalysis<MachineDominatorTree>();
   Loops = getAnalysisIfAvailable<MachineLoopInfo>();
@@ -796,6 +797,5 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) {
     if (tryConvertIf(I->getBlock()))
       Changed = true;
 
-  MF.verify(this, "After early if-conversion");
   return Changed;
 }
diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp
index fee8e47..9b0e76f 100644
--- a/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/lib/CodeGen/ExecutionDepsFix.cpp
@@ -21,15 +21,15 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "execution-fix"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
 /// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track
@@ -626,9 +626,12 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
   }
   dv->Instrs.push_back(mi);
 
-  // Finally set all defs and non-collapsed uses to dv.
-  for (unsigned i = 0, e = mi->getDesc().getNumOperands(); i != e; ++i) {
-    MachineOperand &mo = mi->getOperand(i);
+  // Finally set all defs and non-collapsed uses to dv. We must iterate through
+  // all the operators, including imp-def ones.
+  for (MachineInstr::mop_iterator ii = mi->operands_begin(),
+                                  ee = mi->operands_end();
+                                  ii != ee; ++ii) {
+    MachineOperand &mo = *ii;
     if (!mo.isReg()) continue;
     int rx = regIndex(mo.getReg());
     if (rx < 0) continue;
@@ -654,7 +657,7 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
   bool anyregs = false;
   for (TargetRegisterClass::const_iterator I = RC->begin(), E = RC->end();
        I != E; ++I)
-    if (MF->getRegInfo().isPhysRegOrOverlapUsed(*I)) {
+    if (MF->getRegInfo().isPhysRegUsed(*I)) {
       anyregs = true;
       break;
     }
diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/ExpandISelPseudos.cpp
index 2c4a935..b2b6882 100644
--- a/lib/CodeGen/ExpandISelPseudos.cpp
+++ b/lib/CodeGen/ExpandISelPseudos.cpp
@@ -15,12 +15,12 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "expand-isel-pseudos"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Support/Debug.h"
 using namespace llvm;
 
 namespace {
diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp
index ffe4b63..0b9e83d 100644
--- a/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -18,11 +18,11 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 namespace {
diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp
index 1caf8c2..a6a06e4 100644
--- a/lib/CodeGen/GCMetadata.cpp
+++ b/lib/CodeGen/GCMetadata.cpp
@@ -14,10 +14,10 @@
 #include "llvm/CodeGen/GCMetadata.h"
 #include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/Pass.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Function.h"
+#include "llvm/IR/Function.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp
index 506b5cf..1173d11 100644
--- a/lib/CodeGen/GCStrategy.cpp
+++ b/lib/CodeGen/GCStrategy.cpp
@@ -16,21 +16,22 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/GCStrategy.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Module.h"
+#include "llvm/Analysis/DominatorInternals.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
@@ -387,9 +388,16 @@ void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
   const TargetFrameLowering *TFI = TM->getFrameLowering();
   assert(TFI && "TargetRegisterInfo not available!");
 
-  for (GCFunctionInfo::roots_iterator RI = FI->roots_begin(),
-                                      RE = FI->roots_end(); RI != RE; ++RI)
-    RI->StackOffset = TFI->getFrameIndexOffset(MF, RI->Num);
+  for (GCFunctionInfo::roots_iterator RI = FI->roots_begin();
+       RI != FI->roots_end();) {
+    // If the root references a dead object, no need to keep it.
+    if (MF.getFrameInfo()->isDeadObjectIndex(RI->Num)) {
+      RI = FI->removeStackRoot(RI);
+    } else {
+      RI->StackOffset = TFI->getFrameIndexOffset(MF, RI->Num);
+      ++RI;
+    }
+  }
 }
 
 bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index 31e36f0..8906991 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -12,24 +12,25 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "ifcvt"
-#include "BranchFolding.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "BranchFolding.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/MC/MCInstrItineraries.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 // Hidden options for help debugging.
@@ -994,14 +995,13 @@ static void UpdatePredRedefs(MachineInstr *MI, SmallSet<unsigned,4> &Redefs,
         Redefs.erase(*SubRegs);
     }
   }
+  MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
   for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
     unsigned Reg = Defs[i];
     if (!Redefs.insert(Reg)) {
       if (AddImpUse)
         // Treat predicated update as read + write.
-        MI->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/,
-                                              true/*IsImp*/,false/*IsKill*/,
-                                              false/*IsDead*/,true/*IsUndef*/));
+        MIB.addReg(Reg, RegState::Implicit | RegState::Undef);
     } else {
       for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
         Redefs.insert(*SubRegs);
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index 622127c..c6d1a18 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -14,7 +14,6 @@
 
 #define DEBUG_TYPE "regalloc"
 #include "Spiller.h"
-#include "VirtRegMap.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/TinyPtrVector.h"
 #include "llvm/Analysis/AliasAnalysis.h"
@@ -22,16 +21,17 @@
 #include "llvm/CodeGen/LiveRangeEdit.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
 
 using namespace llvm;
 
@@ -863,7 +863,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
   // If the instruction also writes VirtReg.reg, it had better not require the
   // same register for uses and defs.
   SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops;
-  MIBundleOperands::RegInfo RI =
+  MIBundleOperands::VirtRegInfo RI =
     MIBundleOperands(MI).analyzeVirtReg(VirtReg.reg, &Ops);
   if (RI.Tied) {
     markValueUsed(&VirtReg, ParentVNI);
@@ -1142,7 +1142,7 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
 
     // Analyze instruction.
     SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops;
-    MIBundleOperands::RegInfo RI =
+    MIBundleOperands::VirtRegInfo RI =
       MIBundleOperands(MI).analyzeVirtReg(Reg, &Ops);
 
     // Find the slot index where this instruction reads and writes OldLI.
diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp
index 1541bf0..a8e711e 100644
--- a/lib/CodeGen/InterferenceCache.cpp
+++ b/lib/CodeGen/InterferenceCache.cpp
@@ -13,9 +13,9 @@
 
 #define DEBUG_TYPE "regalloc"
 #include "InterferenceCache.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 
 using namespace llvm;
 
diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h
index 3c928a5..c02fb9a 100644
--- a/lib/CodeGen/InterferenceCache.h
+++ b/lib/CodeGen/InterferenceCache.h
@@ -15,7 +15,7 @@
 #ifndef LLVM_CODEGEN_INTERFERENCECACHE
 #define LLVM_CODEGEN_INTERFERENCECACHE
 
-#include "LiveIntervalUnion.h"
+#include "llvm/CodeGen/LiveIntervalUnion.h"
 
 namespace llvm {
 
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index 8d2282a..16e7968 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -12,16 +12,16 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/IntrinsicLowering.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/Module.h"
-#include "llvm/Type.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetData.h"
 using namespace llvm;
 
 template <class ArgIt>
@@ -413,22 +413,30 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
   }
 
   case Intrinsic::stacksave:
-  case Intrinsic::stackrestore: {
     if (!Warned)
-      errs() << "WARNING: this target does not support the llvm.stack"
-             << (Callee->getIntrinsicID() == Intrinsic::stacksave ?
-               "save" : "restore") << " intrinsic.\n";
+      Context.emitWarning("this target does not support the "
+                          "llvm.stacksave intrinsic");
+    Warned = true;
+    CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
+    break;
+
+  case Intrinsic::stackrestore:
+    if (!Warned)
+      Context.emitWarning("this target does not support the "
+                          "llvm.stackrestore intrinsic");
     Warned = true;
-    if (Callee->getIntrinsicID() == Intrinsic::stacksave)
-      CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
     break;
-  }
     
   case Intrinsic::returnaddress:
+    Context.emitWarning("this target does not support the "
+                        "llvm.returnaddress intrinsic");
+    CI->replaceAllUsesWith(ConstantPointerNull::get(
+                                            cast<PointerType>(CI->getType())));
+    break;
+
   case Intrinsic::frameaddress:
-    errs() << "WARNING: this target does not support the llvm."
-           << (Callee->getIntrinsicID() == Intrinsic::returnaddress ?
-             "return" : "frame") << "address intrinsic.\n";
+    Context.emitWarning("this target does not support the "
+                        "llvm.frameaddress intrinsic");
     CI->replaceAllUsesWith(ConstantPointerNull::get(
                                             cast<PointerType>(CI->getType())));
     break;
@@ -438,12 +446,12 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
 
   case Intrinsic::pcmarker:
     break;    // Simply strip out pcmarker on unsupported architectures
-  case Intrinsic::readcyclecounter: {
-    errs() << "WARNING: this target does not support the llvm.readcyclecoun"
-           << "ter intrinsic.  It is being lowered to a constant 0\n";
+  case Intrinsic::readcyclecounter:
+    Context.emitWarning("this target does not support the "
+                        "llvm.readcyclecounter intrinsic; "
+                        "it is being lowered to a constant 0");
     CI->replaceAllUsesWith(ConstantInt::get(Type::getInt64Ty(Context), 0));
     break;
-  }
 
   case Intrinsic::dbg_declare:
     break;    // Simply strip out debugging intrinsics
@@ -457,7 +465,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
     break;   // Strip out annotate intrinsic
     
   case Intrinsic::memcpy: {
-    IntegerType *IntPtr = TD.getIntPtrType(Context);
+    Type *IntPtr = TD.getIntPtrType(Context);
     Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
                                         /* isSigned */ false);
     Value *Ops[3];
@@ -468,7 +476,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
     break;
   }
   case Intrinsic::memmove: {
-    IntegerType *IntPtr = TD.getIntPtrType(Context);
+    Type *IntPtr = TD.getIntPtrType(Context);
     Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
                                         /* isSigned */ false);
     Value *Ops[3];
@@ -479,7 +487,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
     break;
   }
   case Intrinsic::memset: {
-    IntegerType *IntPtr = TD.getIntPtrType(Context);
+    Type *IntPtr = TD.getIntPtrType(Context);
     Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
                                         /* isSigned */ false);
     Value *Ops[3];
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index cac0c83..12cd2d1 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -11,30 +11,30 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/PassManager.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/Assembly/PrintModulePass.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/ADT/OwningPtr.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Transforms/Scalar.h"
 using namespace llvm;
 
 // Enable or disable FastISel. Both options are needed, because
@@ -79,6 +79,10 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple,
          "and that InitializeAllTargetMCs() is being invoked!");
 }
 
+void LLVMTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
+  PM.add(createBasicTargetTransformInfoPass(getTargetLowering()));
+}
+
 /// addPassesToX helper drives creation and initialization of TargetPassConfig.
 static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM,
                                           PassManagerBase &PM,
@@ -96,6 +100,8 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM,
 
   PassConfig->addIRPasses();
 
+  PassConfig->addCodeGenPrepare();
+
   PassConfig->addPassesToHandleExceptions();
 
   PassConfig->addISelPrepare();
@@ -172,7 +178,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
       const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
       MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI, STI,
                                             *Context);
-      MAB = getTarget().createMCAsmBackend(getTargetTriple());
+      MAB = getTarget().createMCAsmBackend(getTargetTriple(), TargetCPU);
     }
 
     MCStreamer *S = getTarget().createAsmStreamer(*Context, Out,
@@ -191,7 +197,8 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
     // emission fails.
     MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI,
                                                          STI, *Context);
-    MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple());
+    MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple(),
+                                                       TargetCPU);
     if (MCE == 0 || MAB == 0)
       return true;
 
@@ -199,7 +206,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
                                                          *Context, *MAB, Out,
                                                          MCE, hasMCRelaxAll(),
                                                          hasMCNoExecStack()));
-    AsmStreamer.get()->InitSections();
+    AsmStreamer.get()->setAutoInitSections(true);
     break;
   }
   case CGFT_Null:
@@ -266,7 +273,7 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
   const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
   MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI,
                                                        STI, *Ctx);
-  MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple());
+  MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple(), TargetCPU);
   if (MCE == 0 || MAB == 0)
     return true;
 
diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp
index 6b6b9d0..3c01d91 100644
--- a/lib/CodeGen/LexicalScopes.cpp
+++ b/lib/CodeGen/LexicalScopes.cpp
@@ -16,10 +16,10 @@
 
 #define DEBUG_TYPE "lexicalscopes"
 #include "llvm/CodeGen/LexicalScopes.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/Function.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index defc127..786f353 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -21,11 +21,6 @@
 
 #define DEBUG_TYPE "livedebug"
 #include "LiveDebugVariables.h"
-#include "VirtRegMap.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/Metadata.h"
-#include "llvm/Value.h"
 #include "llvm/ADT/IntervalMap.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LexicalScopes.h"
@@ -35,6 +30,11 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Value.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetInstrInfo.h"
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index 3e9b485..68f4b16 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -19,15 +19,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/LiveInterval.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "RegisterCoalescer.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "RegisterCoalescer.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -59,8 +59,16 @@ VNInfo *LiveInterval::createDeadDef(SlotIndex Def,
     return VNI;
   }
   if (SlotIndex::isSameInstr(Def, I->start)) {
-    assert(I->start == Def && "Cannot insert def, already live");
-    assert(I->valno->def == Def && "Inconsistent existing value def");
+    assert(I->valno->def == I->start && "Inconsistent existing value def");
+
+    // It is possible to have both normal and early-clobber defs of the same
+    // register on an instruction. It doesn't make a lot of sense, but it is
+    // possible to specify in inline assembly.
+    //
+    // Just convert everything to early-clobber.
+    Def = std::min(Def, I->start);
+    if (Def != I->start)
+      I->start = I->valno->def = Def;
     return I->valno;
   }
   assert(SlotIndex::isEarlierInstr(Def, I->start) && "Already live at def");
@@ -69,21 +77,6 @@ VNInfo *LiveInterval::createDeadDef(SlotIndex Def,
   return VNI;
 }
 
-/// killedInRange - Return true if the interval has kills in [Start,End).
-bool LiveInterval::killedInRange(SlotIndex Start, SlotIndex End) const {
-  Ranges::const_iterator r =
-    std::lower_bound(ranges.begin(), ranges.end(), End);
-
-  // Now r points to the first interval with start >= End, or ranges.end().
-  if (r == ranges.begin())
-    return false;
-
-  --r;
-  // Now r points to the last interval with end <= End.
-  // r->end is the kill point.
-  return r->end >= Start && r->end < End;
-}
-
 // overlaps - Return true if the intersection of the two live intervals is
 // not empty.
 //
@@ -442,7 +435,7 @@ void LiveInterval::join(LiveInterval &Other,
 
   // If we have to apply a mapping to our base interval assignment, rewrite it
   // now.
-  if (MustMapCurValNos) {
+  if (MustMapCurValNos && !empty()) {
     // Map the first live range.
 
     iterator OutIt = begin();
@@ -716,27 +709,6 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) {
   return V2;
 }
 
-void LiveInterval::Copy(const LiveInterval &RHS,
-                        MachineRegisterInfo *MRI,
-                        VNInfo::Allocator &VNInfoAllocator) {
-  ranges.clear();
-  valnos.clear();
-  std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(RHS.reg);
-  MRI->setRegAllocationHint(reg, Hint.first, Hint.second);
-
-  weight = RHS.weight;
-  for (unsigned i = 0, e = RHS.getNumValNums(); i != e; ++i) {
-    const VNInfo *VNI = RHS.getValNumInfo(i);
-    createValueCopy(VNI, VNInfoAllocator);
-  }
-  for (unsigned i = 0, e = RHS.ranges.size(); i != e; ++i) {
-    const LiveRange &LR = RHS.ranges[i];
-    addRange(LiveRange(LR.start, LR.end, getValNumInfo(LR.valno->id)));
-  }
-
-  verify();
-}
-
 unsigned LiveInterval::getSize() const {
   unsigned Sum = 0;
   for (const_iterator I = begin(), E = end(); I != E; ++I)
@@ -748,7 +720,7 @@ raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange &LR) {
   return os << '[' << LR.start << ',' << LR.end << ':' << LR.valno->id << ")";
 }
 
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void LiveRange::dump() const {
   dbgs() << *this << "\n";
 }
@@ -785,7 +757,7 @@ void LiveInterval::print(raw_ostream &OS) const {
   }
 }
 
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void LiveInterval::dump() const {
   dbgs() << *this << "\n";
 }
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 17f9d9e..4198457 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -17,27 +17,27 @@
 
 #define DEBUG_TYPE "regalloc"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/Value.h"
+#include "LiveRangeCalc.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/Value.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/STLExtras.h"
-#include "LiveRangeCalc.h"
-#include "VirtRegMap.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <algorithm>
-#include <limits>
 #include <cmath>
+#include <limits>
 using namespace llvm;
 
 // Switch to the new experimental algorithm for computing live intervals.
@@ -110,8 +110,6 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
   DomTree = &getAnalysis<MachineDominatorTree>();
   if (!LRCalc)
     LRCalc = new LiveRangeCalc();
-  AllocatableRegs = TRI->getAllocatableSet(fn);
-  ReservedRegs = TRI->getReservedRegs(fn);
 
   // Allocate space for all virtual registers.
   VirtRegIntervals.resize(MRI->getNumVirtRegs());
@@ -148,6 +146,11 @@ void LiveIntervals::print(raw_ostream &OS, const Module* ) const {
       OS << PrintReg(Reg) << " = " << getInterval(Reg) << '\n';
   }
 
+  OS << "RegMasks:";
+  for (unsigned i = 0, e = RegMaskSlots.size(); i != e; ++i)
+    OS << ' ' << RegMaskSlots[i];
+  OS << '\n';
+
   printInstrs(OS);
 }
 
@@ -156,7 +159,7 @@ void LiveIntervals::printInstrs(raw_ostream &OS) const {
   MF->print(OS, Indexes);
 }
 
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void LiveIntervals::dumpInstrs() const {
   printInstrs(dbgs());
 }
@@ -542,11 +545,11 @@ void LiveIntervals::computeRegUnitInterval(LiveInterval *LI) {
   // Ignore uses of reserved registers. We only track defs of those.
   for (MCRegUnitRootIterator Roots(Unit, TRI); Roots.isValid(); ++Roots) {
     unsigned Root = *Roots;
-    if (!isReserved(Root) && !MRI->reg_empty(Root))
+    if (!MRI->isReserved(Root) && !MRI->reg_empty(Root))
       LRCalc->extendToUses(LI, Root);
     for (MCSuperRegIterator Supers(Root, TRI); Supers.isValid(); ++Supers) {
       unsigned Reg = *Supers;
-      if (!isReserved(Reg) && !MRI->reg_empty(Reg))
+      if (!MRI->isReserved(Reg) && !MRI->reg_empty(Reg))
         LRCalc->extendToUses(LI, Reg);
     }
   }
@@ -731,6 +734,73 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
   return CanSeparate;
 }
 
+void LiveIntervals::extendToIndices(LiveInterval *LI,
+                                    ArrayRef<SlotIndex> Indices) {
+  assert(LRCalc && "LRCalc not initialized.");
+  LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
+  for (unsigned i = 0, e = Indices.size(); i != e; ++i)
+    LRCalc->extend(LI, Indices[i]);
+}
+
+void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill,
+                               SmallVectorImpl<SlotIndex> *EndPoints) {
+  LiveRangeQuery LRQ(*LI, Kill);
+  VNInfo *VNI = LRQ.valueOut();
+  if (!VNI)
+    return;
+
+  MachineBasicBlock *KillMBB = Indexes->getMBBFromIndex(Kill);
+  SlotIndex MBBStart, MBBEnd;
+  tie(MBBStart, MBBEnd) = Indexes->getMBBRange(KillMBB);
+
+  // If VNI isn't live out from KillMBB, the value is trivially pruned.
+  if (LRQ.endPoint() < MBBEnd) {
+    LI->removeRange(Kill, LRQ.endPoint());
+    if (EndPoints) EndPoints->push_back(LRQ.endPoint());
+    return;
+  }
+
+  // VNI is live out of KillMBB.
+  LI->removeRange(Kill, MBBEnd);
+  if (EndPoints) EndPoints->push_back(MBBEnd);
+
+  // Find all blocks that are reachable from KillMBB without leaving VNI's live
+  // range. It is possible that KillMBB itself is reachable, so start a DFS
+  // from each successor.
+  typedef SmallPtrSet<MachineBasicBlock*, 9> VisitedTy;
+  VisitedTy Visited;
+  for (MachineBasicBlock::succ_iterator
+       SuccI = KillMBB->succ_begin(), SuccE = KillMBB->succ_end();
+       SuccI != SuccE; ++SuccI) {
+    for (df_ext_iterator<MachineBasicBlock*, VisitedTy>
+         I = df_ext_begin(*SuccI, Visited), E = df_ext_end(*SuccI, Visited);
+         I != E;) {
+      MachineBasicBlock *MBB = *I;
+
+      // Check if VNI is live in to MBB.
+      tie(MBBStart, MBBEnd) = Indexes->getMBBRange(MBB);
+      LiveRangeQuery LRQ(*LI, MBBStart);
+      if (LRQ.valueIn() != VNI) {
+        // This block isn't part of the VNI live range. Prune the search.
+        I.skipChildren();
+        continue;
+      }
+
+      // Prune the search if VNI is killed in MBB.
+      if (LRQ.endPoint() < MBBEnd) {
+        LI->removeRange(MBBStart, LRQ.endPoint());
+        if (EndPoints) EndPoints->push_back(LRQ.endPoint());
+        I.skipChildren();
+        continue;
+      }
+
+      // VNI is live through MBB.
+      LI->removeRange(MBBStart, MBBEnd);
+      if (EndPoints) EndPoints->push_back(MBBEnd);
+      ++I;
+    }
+  }
+}
 
 //===----------------------------------------------------------------------===//
 // Register allocator hooks.
@@ -943,497 +1013,325 @@ private:
   LiveIntervals& LIS;
   const MachineRegisterInfo& MRI;
   const TargetRegisterInfo& TRI;
+  SlotIndex OldIdx;
   SlotIndex NewIdx;
-
-  typedef std::pair<LiveInterval*, LiveRange*> IntRangePair;
-  typedef DenseSet<IntRangePair> RangeSet;
-
-  struct RegRanges {
-    LiveRange* Use;
-    LiveRange* EC;
-    LiveRange* Dead;
-    LiveRange* Def;
-    RegRanges() : Use(0), EC(0), Dead(0), Def(0) {}
-  };
-  typedef DenseMap<unsigned, RegRanges> BundleRanges;
+  SmallPtrSet<LiveInterval*, 8> Updated;
+  bool UpdateFlags;
 
 public:
   HMEditor(LiveIntervals& LIS, const MachineRegisterInfo& MRI,
-           const TargetRegisterInfo& TRI, SlotIndex NewIdx)
-    : LIS(LIS), MRI(MRI), TRI(TRI), NewIdx(NewIdx) {}
-
-  // Update intervals for all operands of MI from OldIdx to NewIdx.
-  // This assumes that MI used to be at OldIdx, and now resides at
-  // NewIdx.
-  void moveAllRangesFrom(MachineInstr* MI, SlotIndex OldIdx) {
-    assert(NewIdx != OldIdx && "No-op move? That's a bit strange.");
-
-    // Collect the operands.
-    RangeSet Entering, Internal, Exiting;
-    bool hasRegMaskOp = false;
-    collectRanges(MI, Entering, Internal, Exiting, hasRegMaskOp, OldIdx);
-
-    // To keep the LiveRanges valid within an interval, move the ranges closest
-    // to the destination first. This prevents ranges from overlapping, to that
-    // APIs like removeRange still work.
-    if (NewIdx < OldIdx) {
-      moveAllEnteringFrom(OldIdx, Entering);
-      moveAllInternalFrom(OldIdx, Internal);
-      moveAllExitingFrom(OldIdx, Exiting);
-    }
-    else {
-      moveAllExitingFrom(OldIdx, Exiting);
-      moveAllInternalFrom(OldIdx, Internal);
-      moveAllEnteringFrom(OldIdx, Entering);
-    }
-
-    if (hasRegMaskOp)
-      updateRegMaskSlots(OldIdx);
-
-#ifndef NDEBUG
-    LIValidator validator;
-    validator = std::for_each(Entering.begin(), Entering.end(), validator);
-    validator = std::for_each(Internal.begin(), Internal.end(), validator);
-    validator = std::for_each(Exiting.begin(), Exiting.end(), validator);
-    assert(validator.rangesOk() && "moveAllOperandsFrom broke liveness.");
-#endif
-
+           const TargetRegisterInfo& TRI,
+           SlotIndex OldIdx, SlotIndex NewIdx, bool UpdateFlags)
+    : LIS(LIS), MRI(MRI), TRI(TRI), OldIdx(OldIdx), NewIdx(NewIdx),
+      UpdateFlags(UpdateFlags) {}
+
+  // FIXME: UpdateFlags is a workaround that creates live intervals for all
+  // physregs, even those that aren't needed for regalloc, in order to update
+  // kill flags. This is wasteful. Eventually, LiveVariables will strip all kill
+  // flags, and postRA passes will use a live register utility instead.
+  LiveInterval *getRegUnitLI(unsigned Unit) {
+    if (UpdateFlags)
+      return &LIS.getRegUnit(Unit);
+    return LIS.getCachedRegUnit(Unit);
   }
 
-  // Update intervals for all operands of MI to refer to BundleStart's
-  // SlotIndex.
-  void moveAllRangesInto(MachineInstr* MI, MachineInstr* BundleStart) {
-    if (MI == BundleStart)
-      return; // Bundling instr with itself - nothing to do.
-
-    SlotIndex OldIdx = LIS.getSlotIndexes()->getInstructionIndex(MI);
-    assert(LIS.getSlotIndexes()->getInstructionFromIndex(OldIdx) == MI &&
-           "SlotIndex <-> Instruction mapping broken for MI");
-
-    // Collect all ranges already in the bundle.
-    MachineBasicBlock::instr_iterator BII(BundleStart);
-    RangeSet Entering, Internal, Exiting;
-    bool hasRegMaskOp = false;
-    collectRanges(BII, Entering, Internal, Exiting, hasRegMaskOp, NewIdx);
-    assert(!hasRegMaskOp && "Can't have RegMask operand in bundle.");
-    for (++BII; &*BII == MI || BII->isInsideBundle(); ++BII) {
-      if (&*BII == MI)
-        continue;
-      collectRanges(BII, Entering, Internal, Exiting, hasRegMaskOp, NewIdx);
-      assert(!hasRegMaskOp && "Can't have RegMask operand in bundle.");
-    }
-
-    BundleRanges BR = createBundleRanges(Entering, Internal, Exiting);
-
-    Entering.clear();
-    Internal.clear();
-    Exiting.clear();
-    collectRanges(MI, Entering, Internal, Exiting, hasRegMaskOp, OldIdx);
-    assert(!hasRegMaskOp && "Can't have RegMask operand in bundle.");
-
-    DEBUG(dbgs() << "Entering: " << Entering.size() << "\n");
-    DEBUG(dbgs() << "Internal: " << Internal.size() << "\n");
-    DEBUG(dbgs() << "Exiting: " << Exiting.size() << "\n");
-
-    moveAllEnteringFromInto(OldIdx, Entering, BR);
-    moveAllInternalFromInto(OldIdx, Internal, BR);
-    moveAllExitingFromInto(OldIdx, Exiting, BR);
-
-
-#ifndef NDEBUG
-    LIValidator validator;
-    validator = std::for_each(Entering.begin(), Entering.end(), validator);
-    validator = std::for_each(Internal.begin(), Internal.end(), validator);
-    validator = std::for_each(Exiting.begin(), Exiting.end(), validator);
-    assert(validator.rangesOk() && "moveAllOperandsInto broke liveness.");
-#endif
-  }
-
-private:
-
-#ifndef NDEBUG
-  class LIValidator {
-  private:
-    DenseSet<const LiveInterval*> Checked, Bogus;
-  public:
-    void operator()(const IntRangePair& P) {
-      const LiveInterval* LI = P.first;
-      if (Checked.count(LI))
-        return;
-      Checked.insert(LI);
-      if (LI->empty())
-        return;
-      SlotIndex LastEnd = LI->begin()->start;
-      for (LiveInterval::const_iterator LRI = LI->begin(), LRE = LI->end();
-           LRI != LRE; ++LRI) {
-        const LiveRange& LR = *LRI;
-        if (LastEnd > LR.start || LR.start >= LR.end)
-          Bogus.insert(LI);
-        LastEnd = LR.end;
-      }
-    }
-
-    bool rangesOk() const {
-      return Bogus.empty();
-    }
-  };
-#endif
-
-  // Collect IntRangePairs for all operands of MI that may need fixing.
-  // Treat's MI's index as OldIdx (regardless of what it is in SlotIndexes'
-  // maps).
-  void collectRanges(MachineInstr* MI, RangeSet& Entering, RangeSet& Internal,
-                     RangeSet& Exiting, bool& hasRegMaskOp, SlotIndex OldIdx) {
-    hasRegMaskOp = false;
-    for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
-                                    MOE = MI->operands_end();
-         MOI != MOE; ++MOI) {
-      const MachineOperand& MO = *MOI;
-
-      if (MO.isRegMask()) {
-        hasRegMaskOp = true;
+  /// Update all live ranges touched by MI, assuming a move from OldIdx to
+  /// NewIdx.
+  void updateAllRanges(MachineInstr *MI) {
+    DEBUG(dbgs() << "handleMove " << OldIdx << " -> " << NewIdx << ": " << *MI);
+    bool hasRegMask = false;
+    for (MIOperands MO(MI); MO.isValid(); ++MO) {
+      if (MO->isRegMask())
+        hasRegMask = true;
+      if (!MO->isReg())
         continue;
-      }
+      // Aggressively clear all kill flags.
+      // They are reinserted by VirtRegRewriter.
+      if (MO->isUse())
+        MO->setIsKill(false);
 
-      if (!MO.isReg() || MO.getReg() == 0)
+      unsigned Reg = MO->getReg();
+      if (!Reg)
         continue;
-
-      unsigned Reg = MO.getReg();
-
-      // TODO: Currently we're skipping uses that are reserved or have no
-      // interval, but we're not updating their kills. This should be
-      // fixed.
-      if (TargetRegisterInfo::isPhysicalRegister(Reg) && LIS.isReserved(Reg))
+      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+        updateRange(LIS.getInterval(Reg));
         continue;
-
-      // Collect ranges for register units. These live ranges are computed on
-      // demand, so just skip any that haven't been computed yet.
-      if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
-        for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units)
-          if (LiveInterval *LI = LIS.getCachedRegUnit(*Units))
-            collectRanges(MO, LI, Entering, Internal, Exiting, OldIdx);
-      } else {
-        // Collect ranges for individual virtual registers.
-        collectRanges(MO, &LIS.getInterval(Reg),
-                      Entering, Internal, Exiting, OldIdx);
       }
+
+      // For physregs, only update the regunits that actually have a
+      // precomputed live range.
+      for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units)
+        if (LiveInterval *LI = getRegUnitLI(*Units))
+          updateRange(*LI);
     }
+    if (hasRegMask)
+      updateRegMaskSlots();
   }
 
-  void collectRanges(const MachineOperand &MO, LiveInterval *LI,
-                     RangeSet &Entering, RangeSet &Internal, RangeSet &Exiting,
-                     SlotIndex OldIdx) {
-    if (MO.readsReg()) {
-      LiveRange* LR = LI->getLiveRangeContaining(OldIdx);
-      if (LR != 0)
-        Entering.insert(std::make_pair(LI, LR));
-    }
-    if (MO.isDef()) {
-      LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getRegSlot());
-      assert(LR != 0 && "No live range for def?");
-      if (LR->end > OldIdx.getDeadSlot())
-        Exiting.insert(std::make_pair(LI, LR));
+private:
+  /// Update a single live range, assuming an instruction has been moved from
+  /// OldIdx to NewIdx.
+  void updateRange(LiveInterval &LI) {
+    if (!Updated.insert(&LI))
+      return;
+    DEBUG({
+      dbgs() << "     ";
+      if (TargetRegisterInfo::isVirtualRegister(LI.reg))
+        dbgs() << PrintReg(LI.reg);
       else
-        Internal.insert(std::make_pair(LI, LR));
-    }
+        dbgs() << PrintRegUnit(LI.reg, &TRI);
+      dbgs() << ":\t" << LI << '\n';
+    });
+    if (SlotIndex::isEarlierInstr(OldIdx, NewIdx))
+      handleMoveDown(LI);
+    else
+      handleMoveUp(LI);
+    DEBUG(dbgs() << "        -->\t" << LI << '\n');
+    LI.verify();
   }
 
-  BundleRanges createBundleRanges(RangeSet& Entering,
-                                  RangeSet& Internal,
-                                  RangeSet& Exiting) {
-    BundleRanges BR;
-
-    for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
-         EI != EE; ++EI) {
-      LiveInterval* LI = EI->first;
-      LiveRange* LR = EI->second;
-      BR[LI->reg].Use = LR;
-    }
+  /// Update LI to reflect an instruction has been moved downwards from OldIdx
+  /// to NewIdx.
+  ///
+  /// 1. Live def at OldIdx:
+  ///    Move def to NewIdx, assert endpoint after NewIdx.
+  ///
+  /// 2. Live def at OldIdx, killed at NewIdx:
+  ///    Change to dead def at NewIdx.
+  ///    (Happens when bundling def+kill together).
+  ///
+  /// 3. Dead def at OldIdx:
+  ///    Move def to NewIdx, possibly across another live value.
+  ///
+  /// 4. Def at OldIdx AND at NewIdx:
+  ///    Remove live range [OldIdx;NewIdx) and value defined at OldIdx.
+  ///    (Happens when bundling multiple defs together).
+  ///
+  /// 5. Value read at OldIdx, killed before NewIdx:
+  ///    Extend kill to NewIdx.
+  ///
+  void handleMoveDown(LiveInterval &LI) {
+    // First look for a kill at OldIdx.
+    LiveInterval::iterator I = LI.find(OldIdx.getBaseIndex());
+    LiveInterval::iterator E = LI.end();
+    // Is LI even live at OldIdx?
+    if (I == E || SlotIndex::isEarlierInstr(OldIdx, I->start))
+      return;
 
-    for (RangeSet::iterator II = Internal.begin(), IE = Internal.end();
-         II != IE; ++II) {
-      LiveInterval* LI = II->first;
-      LiveRange* LR = II->second;
-      if (LR->end.isDead()) {
-        BR[LI->reg].Dead = LR;
-      } else {
-        BR[LI->reg].EC = LR;
-      }
+    // Handle a live-in value.
+    if (!SlotIndex::isSameInstr(I->start, OldIdx)) {
+      bool isKill = SlotIndex::isSameInstr(OldIdx, I->end);
+      // If the live-in value already extends to NewIdx, there is nothing to do.
+      if (!SlotIndex::isEarlierInstr(I->end, NewIdx))
+        return;
+      // Aggressively remove all kill flags from the old kill point.
+      // Kill flags shouldn't be used while live intervals exist, they will be
+      // reinserted by VirtRegRewriter.
+      if (MachineInstr *KillMI = LIS.getInstructionFromIndex(I->end))
+        for (MIBundleOperands MO(KillMI); MO.isValid(); ++MO)
+          if (MO->isReg() && MO->isUse())
+            MO->setIsKill(false);
+      // Adjust I->end to reach NewIdx. This may temporarily make LI invalid by
+      // overlapping ranges. Case 5 above.
+      I->end = NewIdx.getRegSlot(I->end.isEarlyClobber());
+      // If this was a kill, there may also be a def. Otherwise we're done.
+      if (!isKill)
+        return;
+      ++I;
     }
 
-    for (RangeSet::iterator EI = Exiting.begin(), EE = Exiting.end();
-         EI != EE; ++EI) {
-      LiveInterval* LI = EI->first;
-      LiveRange* LR = EI->second;
-      BR[LI->reg].Def = LR;
+    // Check for a def at OldIdx.
+    if (I == E || !SlotIndex::isSameInstr(OldIdx, I->start))
+      return;
+    // We have a def at OldIdx.
+    VNInfo *DefVNI = I->valno;
+    assert(DefVNI->def == I->start && "Inconsistent def");
+    DefVNI->def = NewIdx.getRegSlot(I->start.isEarlyClobber());
+    // If the defined value extends beyond NewIdx, just move the def down.
+    // This is case 1 above.
+    if (SlotIndex::isEarlierInstr(NewIdx, I->end)) {
+      I->start = DefVNI->def;
+      return;
     }
-
-    return BR;
-  }
-
-  void moveKillFlags(unsigned reg, SlotIndex OldIdx, SlotIndex newKillIdx) {
-    MachineInstr* OldKillMI = LIS.getInstructionFromIndex(OldIdx);
-    if (!OldKillMI->killsRegister(reg))
-      return; // Bail out if we don't have kill flags on the old register.
-    MachineInstr* NewKillMI = LIS.getInstructionFromIndex(newKillIdx);
-    assert(OldKillMI->killsRegister(reg) && "Old 'kill' instr isn't a kill.");
-    assert(!NewKillMI->killsRegister(reg) &&
-           "New kill instr is already a kill.");
-    OldKillMI->clearRegisterKills(reg, &TRI);
-    NewKillMI->addRegisterKilled(reg, &TRI);
-  }
-
-  void updateRegMaskSlots(SlotIndex OldIdx) {
-    SmallVectorImpl<SlotIndex>::iterator RI =
-      std::lower_bound(LIS.RegMaskSlots.begin(), LIS.RegMaskSlots.end(),
-                       OldIdx);
-    assert(*RI == OldIdx && "No RegMask at OldIdx.");
-    *RI = NewIdx;
-    assert(*prior(RI) < *RI && *RI < *next(RI) &&
-           "RegSlots out of order. Did you move one call across another?");
-  }
-
-  // Return the last use of reg between NewIdx and OldIdx.
-  SlotIndex findLastUseBefore(unsigned Reg, SlotIndex OldIdx) {
-    SlotIndex LastUse = NewIdx;
-    for (MachineRegisterInfo::use_nodbg_iterator
-           UI = MRI.use_nodbg_begin(Reg),
-           UE = MRI.use_nodbg_end();
-         UI != UE; UI.skipInstruction()) {
-      const MachineInstr* MI = &*UI;
-      SlotIndex InstSlot = LIS.getSlotIndexes()->getInstructionIndex(MI);
-      if (InstSlot > LastUse && InstSlot < OldIdx)
-        LastUse = InstSlot;
+    // The remaining possibilities are now:
+    // 2. Live def at OldIdx, killed at NewIdx: isSameInstr(I->end, NewIdx).
+    // 3. Dead def at OldIdx: I->end = OldIdx.getDeadSlot().
+    // In either case, it is possible that there is an existing def at NewIdx.
+    assert((I->end == OldIdx.getDeadSlot() ||
+            SlotIndex::isSameInstr(I->end, NewIdx)) &&
+            "Cannot move def below kill");
+    LiveInterval::iterator NewI = LI.advanceTo(I, NewIdx.getRegSlot());
+    if (NewI != E && SlotIndex::isSameInstr(NewI->start, NewIdx)) {
+      // There is an existing def at NewIdx, case 4 above. The def at OldIdx is
+      // coalesced into that value.
+      assert(NewI->valno != DefVNI && "Multiple defs of value?");
+      LI.removeValNo(DefVNI);
+      return;
     }
-    return LastUse;
+    // There was no existing def at NewIdx. Turn *I into a dead def at NewIdx.
+    // If the def at OldIdx was dead, we allow it to be moved across other LI
+    // values. The new range should be placed immediately before NewI, move any
+    // intermediate ranges up.
+    assert(NewI != I && "Inconsistent iterators");
+    std::copy(llvm::next(I), NewI, I);
+    *llvm::prior(NewI) = LiveRange(DefVNI->def, NewIdx.getDeadSlot(), DefVNI);
   }
 
-  void moveEnteringUpFrom(SlotIndex OldIdx, IntRangePair& P) {
-    LiveInterval* LI = P.first;
-    LiveRange* LR = P.second;
-    bool LiveThrough = LR->end > OldIdx.getRegSlot();
-    if (LiveThrough)
+  /// Update LI to reflect an instruction has been moved upwards from OldIdx
+  /// to NewIdx.
+  ///
+  /// 1. Live def at OldIdx:
+  ///    Hoist def to NewIdx.
+  ///
+  /// 2. Dead def at OldIdx:
+  ///    Hoist def+end to NewIdx, possibly move across other values.
+  ///
+  /// 3. Dead def at OldIdx AND existing def at NewIdx:
+  ///    Remove value defined at OldIdx, coalescing it with existing value.
+  ///
+  /// 4. Live def at OldIdx AND existing def at NewIdx:
+  ///    Remove value defined at NewIdx, hoist OldIdx def to NewIdx.
+  ///    (Happens when bundling multiple defs together).
+  ///
+  /// 5. Value killed at OldIdx:
+  ///    Hoist kill to NewIdx, then scan for last kill between NewIdx and
+  ///    OldIdx.
+  ///
+  void handleMoveUp(LiveInterval &LI) {
+    // First look for a kill at OldIdx.
+    LiveInterval::iterator I = LI.find(OldIdx.getBaseIndex());
+    LiveInterval::iterator E = LI.end();
+    // Is LI even live at OldIdx?
+    if (I == E || SlotIndex::isEarlierInstr(OldIdx, I->start))
       return;
-    SlotIndex LastUse = findLastUseBefore(LI->reg, OldIdx);
-    if (LastUse != NewIdx)
-      moveKillFlags(LI->reg, NewIdx, LastUse);
-    LR->end = LastUse.getRegSlot(LR->end.isEarlyClobber());
-  }
 
-  void moveEnteringDownFrom(SlotIndex OldIdx, IntRangePair& P) {
-    LiveInterval* LI = P.first;
-    LiveRange* LR = P.second;
-    // Extend the LiveRange if NewIdx is past the end.
-    if (NewIdx > LR->end) {
-      // Move kill flags if OldIdx was not originally the end
-      // (otherwise LR->end points to an invalid slot).
-      if (LR->end.getRegSlot() != OldIdx.getRegSlot()) {
-        assert(LR->end > OldIdx && "LiveRange does not cover original slot");
-        moveKillFlags(LI->reg, LR->end, NewIdx);
+    // Handle a live-in value.
+    if (!SlotIndex::isSameInstr(I->start, OldIdx)) {
+      // If the live-in value isn't killed here, there is nothing to do.
+      if (!SlotIndex::isSameInstr(OldIdx, I->end))
+        return;
+      // Adjust I->end to end at NewIdx. If we are hoisting a kill above
+      // another use, we need to search for that use. Case 5 above.
+      I->end = NewIdx.getRegSlot(I->end.isEarlyClobber());
+      ++I;
+      // If OldIdx also defines a value, there couldn't have been another use.
+      if (I == E || !SlotIndex::isSameInstr(I->start, OldIdx)) {
+        // No def, search for the new kill.
+        // This can never be an early clobber kill since there is no def.
+        llvm::prior(I)->end = findLastUseBefore(LI.reg).getRegSlot();
+        return;
       }
-      LR->end = NewIdx.getRegSlot(LR->end.isEarlyClobber());
     }
-  }
-
-  void moveAllEnteringFrom(SlotIndex OldIdx, RangeSet& Entering) {
-    bool GoingUp = NewIdx < OldIdx;
-
-    if (GoingUp) {
-      for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
-           EI != EE; ++EI)
-        moveEnteringUpFrom(OldIdx, *EI);
-    } else {
-      for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
-           EI != EE; ++EI)
-        moveEnteringDownFrom(OldIdx, *EI);
-    }
-  }
-
-  void moveInternalFrom(SlotIndex OldIdx, IntRangePair& P) {
-    LiveInterval* LI = P.first;
-    LiveRange* LR = P.second;
-    assert(OldIdx < LR->start && LR->start < OldIdx.getDeadSlot() &&
-           LR->end <= OldIdx.getDeadSlot() &&
-           "Range should be internal to OldIdx.");
-    LiveRange Tmp(*LR);
-    Tmp.start = NewIdx.getRegSlot(LR->start.isEarlyClobber());
-    Tmp.valno->def = Tmp.start;
-    Tmp.end = LR->end.isDead() ? NewIdx.getDeadSlot() : NewIdx.getRegSlot();
-    LI->removeRange(*LR);
-    LI->addRange(Tmp);
-  }
-
-  void moveAllInternalFrom(SlotIndex OldIdx, RangeSet& Internal) {
-    for (RangeSet::iterator II = Internal.begin(), IE = Internal.end();
-         II != IE; ++II)
-      moveInternalFrom(OldIdx, *II);
-  }
-
-  void moveExitingFrom(SlotIndex OldIdx, IntRangePair& P) {
-    LiveRange* LR = P.second;
-    assert(OldIdx < LR->start && LR->start < OldIdx.getDeadSlot() &&
-           "Range should start in OldIdx.");
-    assert(LR->end > OldIdx.getDeadSlot() && "Range should exit OldIdx.");
-    SlotIndex NewStart = NewIdx.getRegSlot(LR->start.isEarlyClobber());
-    LR->start = NewStart;
-    LR->valno->def = NewStart;
-  }
-
-  void moveAllExitingFrom(SlotIndex OldIdx, RangeSet& Exiting) {
-    for (RangeSet::iterator EI = Exiting.begin(), EE = Exiting.end();
-         EI != EE; ++EI)
-      moveExitingFrom(OldIdx, *EI);
-  }
 
-  void moveEnteringUpFromInto(SlotIndex OldIdx, IntRangePair& P,
-                              BundleRanges& BR) {
-    LiveInterval* LI = P.first;
-    LiveRange* LR = P.second;
-    bool LiveThrough = LR->end > OldIdx.getRegSlot();
-    if (LiveThrough) {
-      assert((LR->start < NewIdx || BR[LI->reg].Def == LR) &&
-             "Def in bundle should be def range.");
-      assert((BR[LI->reg].Use == 0 || BR[LI->reg].Use == LR) &&
-             "If bundle has use for this reg it should be LR.");
-      BR[LI->reg].Use = LR;
+    // Now deal with the def at OldIdx.
+    assert(I != E && SlotIndex::isSameInstr(I->start, OldIdx) && "No def?");
+    VNInfo *DefVNI = I->valno;
+    assert(DefVNI->def == I->start && "Inconsistent def");
+    DefVNI->def = NewIdx.getRegSlot(I->start.isEarlyClobber());
+
+    // Check for an existing def at NewIdx.
+    LiveInterval::iterator NewI = LI.find(NewIdx.getRegSlot());
+    if (SlotIndex::isSameInstr(NewI->start, NewIdx)) {
+      assert(NewI->valno != DefVNI && "Same value defined more than once?");
+      // There is an existing def at NewIdx.
+      if (I->end.isDead()) {
+        // Case 3: Remove the dead def at OldIdx.
+        LI.removeValNo(DefVNI);
+        return;
+      }
+      // Case 4: Replace def at NewIdx with live def at OldIdx.
+      I->start = DefVNI->def;
+      LI.removeValNo(NewI->valno);
       return;
     }
 
-    SlotIndex LastUse = findLastUseBefore(LI->reg, OldIdx);
-    moveKillFlags(LI->reg, OldIdx, LastUse);
-
-    if (LR->start < NewIdx) {
-      // Becoming a new entering range.
-      assert(BR[LI->reg].Dead == 0 && BR[LI->reg].Def == 0 &&
-             "Bundle shouldn't be re-defining reg mid-range.");
-      assert((BR[LI->reg].Use == 0 || BR[LI->reg].Use == LR) &&
-             "Bundle shouldn't have different use range for same reg.");
-      LR->end = LastUse.getRegSlot();
-      BR[LI->reg].Use = LR;
-    } else {
-      // Becoming a new Dead-def.
-      assert(LR->start == NewIdx.getRegSlot(LR->start.isEarlyClobber()) &&
-             "Live range starting at unexpected slot.");
-      assert(BR[LI->reg].Def == LR && "Reg should have def range.");
-      assert(BR[LI->reg].Dead == 0 &&
-               "Can't have def and dead def of same reg in a bundle.");
-      LR->end = LastUse.getDeadSlot();
-      BR[LI->reg].Dead = BR[LI->reg].Def;
-      BR[LI->reg].Def = 0;
-    }
-  }
-
-  void moveEnteringDownFromInto(SlotIndex OldIdx, IntRangePair& P,
-                                BundleRanges& BR) {
-    LiveInterval* LI = P.first;
-    LiveRange* LR = P.second;
-    if (NewIdx > LR->end) {
-      // Range extended to bundle. Add to bundle uses.
-      // Note: Currently adds kill flags to bundle start.
-      assert(BR[LI->reg].Use == 0 &&
-             "Bundle already has use range for reg.");
-      moveKillFlags(LI->reg, LR->end, NewIdx);
-      LR->end = NewIdx.getRegSlot();
-      BR[LI->reg].Use = LR;
-    } else {
-      assert(BR[LI->reg].Use != 0 &&
-             "Bundle should already have a use range for reg.");
-    }
-  }
-
-  void moveAllEnteringFromInto(SlotIndex OldIdx, RangeSet& Entering,
-                               BundleRanges& BR) {
-    bool GoingUp = NewIdx < OldIdx;
-
-    if (GoingUp) {
-      for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
-           EI != EE; ++EI)
-        moveEnteringUpFromInto(OldIdx, *EI, BR);
-    } else {
-      for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
-           EI != EE; ++EI)
-        moveEnteringDownFromInto(OldIdx, *EI, BR);
+    // There is no existing def at NewIdx. Hoist DefVNI.
+    if (!I->end.isDead()) {
+      // Leave the end point of a live def.
+      I->start = DefVNI->def;
+      return;
     }
-  }
 
-  void moveInternalFromInto(SlotIndex OldIdx, IntRangePair& P,
-                            BundleRanges& BR) {
-    // TODO: Sane rules for moving ranges into bundles.
+    // DefVNI is a dead def. It may have been moved across other values in LI,
+    // so move I up to NewI. Slide [NewI;I) down one position.
+    std::copy_backward(NewI, I, llvm::next(I));
+    *NewI = LiveRange(DefVNI->def, NewIdx.getDeadSlot(), DefVNI);
   }
 
-  void moveAllInternalFromInto(SlotIndex OldIdx, RangeSet& Internal,
-                               BundleRanges& BR) {
-    for (RangeSet::iterator II = Internal.begin(), IE = Internal.end();
-         II != IE; ++II)
-      moveInternalFromInto(OldIdx, *II, BR);
+  void updateRegMaskSlots() {
+    SmallVectorImpl<SlotIndex>::iterator RI =
+      std::lower_bound(LIS.RegMaskSlots.begin(), LIS.RegMaskSlots.end(),
+                       OldIdx);
+    assert(RI != LIS.RegMaskSlots.end() && *RI == OldIdx.getRegSlot() &&
+           "No RegMask at OldIdx.");
+    *RI = NewIdx.getRegSlot();
+    assert((RI == LIS.RegMaskSlots.begin() ||
+            SlotIndex::isEarlierInstr(*llvm::prior(RI), *RI)) &&
+            "Cannot move regmask instruction above another call");
+    assert((llvm::next(RI) == LIS.RegMaskSlots.end() ||
+            SlotIndex::isEarlierInstr(*RI, *llvm::next(RI))) &&
+            "Cannot move regmask instruction below another call");
   }
 
-  void moveExitingFromInto(SlotIndex OldIdx, IntRangePair& P,
-                           BundleRanges& BR) {
-    LiveInterval* LI = P.first;
-    LiveRange* LR = P.second;
-
-    assert(LR->start.isRegister() &&
-           "Don't know how to merge exiting ECs into bundles yet.");
+  // Return the last use of reg between NewIdx and OldIdx.
+  SlotIndex findLastUseBefore(unsigned Reg) {
+    SlotIndex LastUse = NewIdx;
 
-    if (LR->end > NewIdx.getDeadSlot()) {
-      // This range is becoming an exiting range on the bundle.
-      // If there was an old dead-def of this reg, delete it.
-      if (BR[LI->reg].Dead != 0) {
-        LI->removeRange(*BR[LI->reg].Dead);
-        BR[LI->reg].Dead = 0;
+    if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+      for (MachineRegisterInfo::use_nodbg_iterator
+             UI = MRI.use_nodbg_begin(Reg),
+             UE = MRI.use_nodbg_end();
+           UI != UE; UI.skipInstruction()) {
+        const MachineInstr* MI = &*UI;
+        SlotIndex InstSlot = LIS.getSlotIndexes()->getInstructionIndex(MI);
+        if (InstSlot > LastUse && InstSlot < OldIdx)
+          LastUse = InstSlot;
       }
-      assert(BR[LI->reg].Def == 0 &&
-             "Can't have two defs for the same variable exiting a bundle.");
-      LR->start = NewIdx.getRegSlot();
-      LR->valno->def = LR->start;
-      BR[LI->reg].Def = LR;
     } else {
-      // This range is becoming internal to the bundle.
-      assert(LR->end == NewIdx.getRegSlot() &&
-             "Can't bundle def whose kill is before the bundle");
-      if (BR[LI->reg].Dead || BR[LI->reg].Def) {
-        // Already have a def for this. Just delete range.
-        LI->removeRange(*LR);
-      } else {
-        // Make range dead, record.
-        LR->end = NewIdx.getDeadSlot();
-        BR[LI->reg].Dead = LR;
-        assert(BR[LI->reg].Use == LR &&
-               "Range becoming dead should currently be use.");
+      MachineInstr* MI = LIS.getSlotIndexes()->getInstructionFromIndex(NewIdx);
+      MachineBasicBlock::iterator MII(MI);
+      ++MII;
+      MachineBasicBlock* MBB = MI->getParent();
+      for (; MII != MBB->end(); ++MII){
+        if (MII->isDebugValue())
+          continue;
+        if (LIS.getInstructionIndex(MII) < OldIdx)
+          break;
+        for (MachineInstr::mop_iterator MOI = MII->operands_begin(),
+                                        MOE = MII->operands_end();
+             MOI != MOE; ++MOI) {
+          const MachineOperand& mop = *MOI;
+          if (!mop.isReg() || mop.getReg() == 0 ||
+              TargetRegisterInfo::isVirtualRegister(mop.getReg()))
+            continue;
+
+          if (TRI.hasRegUnit(mop.getReg(), Reg))
+            LastUse = LIS.getInstructionIndex(MII);
+        }
       }
-      // In both cases the range is no longer a use on the bundle.
-      BR[LI->reg].Use = 0;
     }
+    return LastUse;
   }
-
-  void moveAllExitingFromInto(SlotIndex OldIdx, RangeSet& Exiting,
-                              BundleRanges& BR) {
-    for (RangeSet::iterator EI = Exiting.begin(), EE = Exiting.end();
-         EI != EE; ++EI)
-      moveExitingFromInto(OldIdx, *EI, BR);
-  }
-
 };
 
-void LiveIntervals::handleMove(MachineInstr* MI) {
+void LiveIntervals::handleMove(MachineInstr* MI, bool UpdateFlags) {
+  assert(!MI->isBundled() && "Can't handle bundled instructions yet.");
   SlotIndex OldIndex = Indexes->getInstructionIndex(MI);
   Indexes->removeMachineInstrFromMaps(MI);
-  SlotIndex NewIndex = MI->isInsideBundle() ?
-                        Indexes->getInstructionIndex(MI) :
-                        Indexes->insertMachineInstrInMaps(MI);
+  SlotIndex NewIndex = Indexes->insertMachineInstrInMaps(MI);
   assert(getMBBStartIdx(MI->getParent()) <= OldIndex &&
          OldIndex < getMBBEndIdx(MI->getParent()) &&
          "Cannot handle moves across basic block boundaries.");
-  assert(!MI->isBundled() && "Can't handle bundled instructions yet.");
 
-  HMEditor HME(*this, *MRI, *TRI, NewIndex);
-  HME.moveAllRangesFrom(MI, OldIndex);
+  HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex, UpdateFlags);
+  HME.updateAllRanges(MI);
 }
 
 void LiveIntervals::handleMoveIntoBundle(MachineInstr* MI,
-                                         MachineInstr* BundleStart) {
+                                         MachineInstr* BundleStart,
+                                         bool UpdateFlags) {
+  SlotIndex OldIndex = Indexes->getInstructionIndex(MI);
   SlotIndex NewIndex = Indexes->getInstructionIndex(BundleStart);
-  HMEditor HME(*this, *MRI, *TRI, NewIndex);
-  HME.moveAllRangesInto(MI, BundleStart);
+  HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex, UpdateFlags);
+  HME.updateAllRanges(MI);
 }
diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp
index dadd02b..d5a81a3 100644
--- a/lib/CodeGen/LiveIntervalUnion.cpp
+++ b/lib/CodeGen/LiveIntervalUnion.cpp
@@ -14,13 +14,11 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "regalloc"
-#include "LiveIntervalUnion.h"
+#include "llvm/CodeGen/LiveIntervalUnion.h"
 #include "llvm/ADT/SparseBitVector.h"
-#include "llvm/CodeGen/MachineLoopRanges.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-
 #include <algorithm>
 
 using namespace llvm;
@@ -182,33 +180,6 @@ collectInterferingVRegs(unsigned MaxInterferingRegs) {
   return InterferingVRegs.size();
 }
 
-bool LiveIntervalUnion::Query::checkLoopInterference(MachineLoopRange *Loop) {
-  // VirtReg is likely live throughout the loop, so start by checking LIU-Loop
-  // overlaps.
-  IntervalMapOverlaps<LiveIntervalUnion::Map, MachineLoopRange::Map>
-    Overlaps(LiveUnion->getMap(), Loop->getMap());
-  if (!Overlaps.valid())
-    return false;
-
-  // The loop is overlapping an LIU assignment. Check VirtReg as well.
-  LiveInterval::iterator VRI = VirtReg->find(Overlaps.start());
-
-  for (;;) {
-    if (VRI == VirtReg->end())
-      return false;
-    if (VRI->start < Overlaps.stop())
-      return true;
-
-    Overlaps.advanceTo(VRI->start);
-    if (!Overlaps.valid())
-      return false;
-    if (Overlaps.start() < VRI->end)
-      return true;
-
-    VRI = VirtReg->advanceTo(VRI, Overlaps.start());
-  }
-}
-
 void LiveIntervalUnion::Array::init(LiveIntervalUnion::Allocator &Alloc,
                                     unsigned NSize) {
   // Reuse existing allocation.
diff --git a/lib/CodeGen/LiveIntervalUnion.h b/lib/CodeGen/LiveIntervalUnion.h
deleted file mode 100644
index cd4e690..0000000
--- a/lib/CodeGen/LiveIntervalUnion.h
+++ /dev/null
@@ -1,210 +0,0 @@
-//===-- LiveIntervalUnion.h - Live interval union data struct --*- C++ -*--===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// LiveIntervalUnion is a union of live segments across multiple live virtual
-// registers. This may be used during coalescing to represent a congruence
-// class, or during register allocation to model liveness of a physical
-// register.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_LIVEINTERVALUNION
-#define LLVM_CODEGEN_LIVEINTERVALUNION
-
-#include "llvm/ADT/IntervalMap.h"
-#include "llvm/CodeGen/LiveInterval.h"
-
-namespace llvm {
-
-class MachineLoopRange;
-class TargetRegisterInfo;
-
-#ifndef NDEBUG
-// forward declaration
-template <unsigned Element> class SparseBitVector;
-typedef SparseBitVector<128> LiveVirtRegBitSet;
-#endif
-
-/// Compare a live virtual register segment to a LiveIntervalUnion segment.
-inline bool
-overlap(const LiveRange &VRSeg,
-        const IntervalMap<SlotIndex, LiveInterval*>::const_iterator &LUSeg) {
-  return VRSeg.start < LUSeg.stop() && LUSeg.start() < VRSeg.end;
-}
-
-/// Union of live intervals that are strong candidates for coalescing into a
-/// single register (either physical or virtual depending on the context).  We
-/// expect the constituent live intervals to be disjoint, although we may
-/// eventually make exceptions to handle value-based interference.
-class LiveIntervalUnion {
-  // A set of live virtual register segments that supports fast insertion,
-  // intersection, and removal.
-  // Mapping SlotIndex intervals to virtual register numbers.
-  typedef IntervalMap<SlotIndex, LiveInterval*> LiveSegments;
-
-public:
-  // SegmentIter can advance to the next segment ordered by starting position
-  // which may belong to a different live virtual register. We also must be able
-  // to reach the current segment's containing virtual register.
-  typedef LiveSegments::iterator SegmentIter;
-
-  // LiveIntervalUnions share an external allocator.
-  typedef LiveSegments::Allocator Allocator;
-
-  class Query;
-
-private:
-  unsigned Tag;           // unique tag for current contents.
-  LiveSegments Segments;  // union of virtual reg segments
-
-public:
-  explicit LiveIntervalUnion(Allocator &a) : Tag(0), Segments(a) {}
-
-  // Iterate over all segments in the union of live virtual registers ordered
-  // by their starting position.
-  SegmentIter begin() { return Segments.begin(); }
-  SegmentIter end() { return Segments.end(); }
-  SegmentIter find(SlotIndex x) { return Segments.find(x); }
-  bool empty() const { return Segments.empty(); }
-  SlotIndex startIndex() const { return Segments.start(); }
-
-  // Provide public access to the underlying map to allow overlap iteration.
-  typedef LiveSegments Map;
-  const Map &getMap() { return Segments; }
-
-  /// getTag - Return an opaque tag representing the current state of the union.
-  unsigned getTag() const { return Tag; }
-
-  /// changedSince - Return true if the union change since getTag returned tag.
-  bool changedSince(unsigned tag) const { return tag != Tag; }
-
-  // Add a live virtual register to this union and merge its segments.
-  void unify(LiveInterval &VirtReg);
-
-  // Remove a live virtual register's segments from this union.
-  void extract(LiveInterval &VirtReg);
-
-  // Remove all inserted virtual registers.
-  void clear() { Segments.clear(); ++Tag; }
-
-  // Print union, using TRI to translate register names
-  void print(raw_ostream &OS, const TargetRegisterInfo *TRI) const;
-
-#ifndef NDEBUG
-  // Verify the live intervals in this union and add them to the visited set.
-  void verify(LiveVirtRegBitSet& VisitedVRegs);
-#endif
-
-  /// Query interferences between a single live virtual register and a live
-  /// interval union.
-  class Query {
-    LiveIntervalUnion *LiveUnion;
-    LiveInterval *VirtReg;
-    LiveInterval::iterator VirtRegI; // current position in VirtReg
-    SegmentIter LiveUnionI;          // current position in LiveUnion
-    SmallVector<LiveInterval*,4> InterferingVRegs;
-    bool CheckedFirstInterference;
-    bool SeenAllInterferences;
-    bool SeenUnspillableVReg;
-    unsigned Tag, UserTag;
-
-  public:
-    Query(): LiveUnion(), VirtReg(), Tag(0), UserTag(0) {}
-
-    Query(LiveInterval *VReg, LiveIntervalUnion *LIU):
-      LiveUnion(LIU), VirtReg(VReg), CheckedFirstInterference(false),
-      SeenAllInterferences(false), SeenUnspillableVReg(false)
-    {}
-
-    void clear() {
-      LiveUnion = NULL;
-      VirtReg = NULL;
-      InterferingVRegs.clear();
-      CheckedFirstInterference = false;
-      SeenAllInterferences = false;
-      SeenUnspillableVReg = false;
-      Tag = 0;
-      UserTag = 0;
-    }
-
-    void init(unsigned UTag, LiveInterval *VReg, LiveIntervalUnion *LIU) {
-      assert(VReg && LIU && "Invalid arguments");
-      if (UserTag == UTag && VirtReg == VReg &&
-          LiveUnion == LIU && !LIU->changedSince(Tag)) {
-        // Retain cached results, e.g. firstInterference.
-        return;
-      }
-      clear();
-      LiveUnion = LIU;
-      VirtReg = VReg;
-      Tag = LIU->getTag();
-      UserTag = UTag;
-    }
-
-    LiveInterval &virtReg() const {
-      assert(VirtReg && "uninitialized");
-      return *VirtReg;
-    }
-
-    // Does this live virtual register interfere with the union?
-    bool checkInterference() { return collectInterferingVRegs(1); }
-
-    // Count the virtual registers in this union that interfere with this
-    // query's live virtual register, up to maxInterferingRegs.
-    unsigned collectInterferingVRegs(unsigned MaxInterferingRegs = UINT_MAX);
-
-    // Was this virtual register visited during collectInterferingVRegs?
-    bool isSeenInterference(LiveInterval *VReg) const;
-
-    // Did collectInterferingVRegs collect all interferences?
-    bool seenAllInterferences() const { return SeenAllInterferences; }
-
-    // Did collectInterferingVRegs encounter an unspillable vreg?
-    bool seenUnspillableVReg() const { return SeenUnspillableVReg; }
-
-    // Vector generated by collectInterferingVRegs.
-    const SmallVectorImpl<LiveInterval*> &interferingVRegs() const {
-      return InterferingVRegs;
-    }
-
-    /// checkLoopInterference - Return true if there is interference overlapping
-    /// Loop.
-    bool checkLoopInterference(MachineLoopRange*);
-
-  private:
-    Query(const Query&);          // DO NOT IMPLEMENT
-    void operator=(const Query&); // DO NOT IMPLEMENT
-  };
-
-  // Array of LiveIntervalUnions.
-  class Array {
-    unsigned Size;
-    LiveIntervalUnion *LIUs;
-  public:
-    Array() : Size(0), LIUs(0) {}
-    ~Array() { clear(); }
-
-    // Initialize the array to have Size entries.
-    // Reuse an existing allocation if the size matches.
-    void init(LiveIntervalUnion::Allocator&, unsigned Size);
-
-    unsigned size() const { return Size; }
-
-    void clear();
-
-    LiveIntervalUnion& operator[](unsigned idx) {
-      assert(idx <  Size && "idx out of bounds");
-      return LIUs[idx];
-    }
-  };
-};
-
-} // end namespace llvm
-
-#endif // !defined(LLVM_CODEGEN_LIVEINTERVALUNION)
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index b4ce9aa..3b28e6a 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -12,16 +12,16 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "regalloc"
-#include "VirtRegMap.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/LiveRangeEdit.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
 
 using namespace llvm;
 
@@ -87,7 +87,7 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
 
     // We can't remat physreg uses, unless it is a constant.
     if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
-      if (MRI.isConstantPhysReg(MO.getReg(), VRM->getMachineFunction()))
+      if (MRI.isConstantPhysReg(MO.getReg(), *OrigMI->getParent()->getParent()))
         continue;
       return false;
     }
@@ -96,6 +96,13 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
     const VNInfo *OVNI = li.getVNInfoAt(OrigIdx);
     if (!OVNI)
       continue;
+
+    // Don't allow rematerialization immediately after the original def.
+    // It would be incorrect if OrigMI redefines the register.
+    // See PR14098.
+    if (SlotIndex::isSameInstr(OrigIdx, UseIdx))
+      return false;
+
     if (OVNI != li.getVNInfoAt(UseIdx))
       return false;
   }
@@ -249,7 +256,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
         unsigned Reg = MOI->getReg();
         if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
           // Check if MI reads any unreserved physregs.
-          if (Reg && MOI->readsReg() && !LIS.isReserved(Reg))
+          if (Reg && MOI->readsReg() && !MRI.isReserved(Reg))
             ReadsPhysRegs = true;
           continue;
         }
diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp
index 7f22478..0ef069f 100644
--- a/lib/CodeGen/LiveRegMatrix.cpp
+++ b/lib/CodeGen/LiveRegMatrix.cpp
@@ -12,16 +12,16 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "regalloc"
-#include "LiveRegMatrix.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
 #include "RegisterCoalescer.h"
-#include "VirtRegMap.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 
 using namespace llvm;
 
diff --git a/lib/CodeGen/LiveRegMatrix.h b/lib/CodeGen/LiveRegMatrix.h
deleted file mode 100644
index b3e2d7f..0000000
--- a/lib/CodeGen/LiveRegMatrix.h
+++ /dev/null
@@ -1,148 +0,0 @@
-//===-- LiveRegMatrix.h - Track register interference ---------*- C++ -*---===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// The LiveRegMatrix analysis pass keeps track of virtual register interference
-// along two dimensions: Slot indexes and register units. The matrix is used by
-// register allocators to ensure that no interfering virtual registers get
-// assigned to overlapping physical registers.
-//
-// Register units are defined in MCRegisterInfo.h, they represent the smallest
-// unit of interference when dealing with overlapping physical registers. The
-// LiveRegMatrix is represented as a LiveIntervalUnion per register unit. When
-// a virtual register is assigned to a physicval register, the live range for
-// the virtual register is inserted into the LiveIntervalUnion for each regunit
-// in the physreg.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_LIVEREGMATRIX_H
-#define LLVM_CODEGEN_LIVEREGMATRIX_H
-
-#include "LiveIntervalUnion.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-
-namespace llvm {
-
-class LiveInterval;
-class LiveIntervalAnalysis;
-class MachineRegisterInfo;
-class TargetRegisterInfo;
-class VirtRegMap;
-
-class LiveRegMatrix : public MachineFunctionPass {
-  const TargetRegisterInfo *TRI;
-  MachineRegisterInfo *MRI;
-  LiveIntervals *LIS;
-  VirtRegMap *VRM;
-
-  // UserTag changes whenever virtual registers have been modified.
-  unsigned UserTag;
-
-  // The matrix is represented as a LiveIntervalUnion per register unit.
-  LiveIntervalUnion::Allocator LIUAlloc;
-  LiveIntervalUnion::Array Matrix;
-
-  // Cached queries per register unit.
-  OwningArrayPtr<LiveIntervalUnion::Query> Queries;
-
-  // Cached register mask interference info.
-  unsigned RegMaskTag;
-  unsigned RegMaskVirtReg;
-  BitVector RegMaskUsable;
-
-  // MachineFunctionPass boilerplate.
-  virtual void getAnalysisUsage(AnalysisUsage&) const;
-  virtual bool runOnMachineFunction(MachineFunction&);
-  virtual void releaseMemory();
-public:
-  static char ID;
-  LiveRegMatrix();
-
-  //===--------------------------------------------------------------------===//
-  // High-level interface.
-  //===--------------------------------------------------------------------===//
-  //
-  // Check for interference before assigning virtual registers to physical
-  // registers.
-  //
-
-  /// Invalidate cached interference queries after modifying virtual register
-  /// live ranges. Interference checks may return stale information unless
-  /// caches are invalidated.
-  void invalidateVirtRegs() { ++UserTag; }
-
-  enum InterferenceKind {
-    /// No interference, go ahead and assign.
-    IK_Free = 0,
-
-    /// Virtual register interference. There are interfering virtual registers
-    /// assigned to PhysReg or its aliases. This interference could be resolved
-    /// by unassigning those other virtual registers.
-    IK_VirtReg,
-
-    /// Register unit interference. A fixed live range is in the way, typically
-    /// argument registers for a call. This can't be resolved by unassigning
-    /// other virtual registers.
-    IK_RegUnit,
-
-    /// RegMask interference. The live range is crossing an instruction with a
-    /// regmask operand that doesn't preserve PhysReg. This typically means
-    /// VirtReg is live across a call, and PhysReg isn't call-preserved.
-    IK_RegMask
-  };
-
-  /// Check for interference before assigning VirtReg to PhysReg.
-  /// If this function returns IK_Free, it is legal to assign(VirtReg, PhysReg).
-  /// When there is more than one kind of interference, the InterferenceKind
-  /// with the highest enum value is returned.
-  InterferenceKind checkInterference(LiveInterval &VirtReg, unsigned PhysReg);
-
-  /// Assign VirtReg to PhysReg.
-  /// This will mark VirtReg's live range as occupied in the LiveRegMatrix and
-  /// update VirtRegMap. The live range is expected to be available in PhysReg.
-  void assign(LiveInterval &VirtReg, unsigned PhysReg);
-
-  /// Unassign VirtReg from its PhysReg.
-  /// Assuming that VirtReg was previously assigned to a PhysReg, this undoes
-  /// the assignment and updates VirtRegMap accordingly.
-  void unassign(LiveInterval &VirtReg);
-
-  //===--------------------------------------------------------------------===//
-  // Low-level interface.
-  //===--------------------------------------------------------------------===//
-  //
-  // Provide access to the underlying LiveIntervalUnions.
-  //
-
-  /// Check for regmask interference only.
-  /// Return true if VirtReg crosses a regmask operand that clobbers PhysReg.
-  /// If PhysReg is null, check if VirtReg crosses any regmask operands.
-  bool checkRegMaskInterference(LiveInterval &VirtReg, unsigned PhysReg = 0);
-
-  /// Check for regunit interference only.
-  /// Return true if VirtReg overlaps a fixed assignment of one of PhysRegs's
-  /// register units.
-  bool checkRegUnitInterference(LiveInterval &VirtReg, unsigned PhysReg);
-
-  /// Query a line of the assigned virtual register matrix directly.
-  /// Use MCRegUnitIterator to enumerate all regunits in the desired PhysReg.
-  /// This returns a reference to an internal Query data structure that is only
-  /// valid until the next query() call.
-  LiveIntervalUnion::Query &query(LiveInterval &VirtReg, unsigned RegUnit);
-
-  /// Directly access the live interval unions per regunit.
-  /// This returns an array indexed by the regunit number.
-  LiveIntervalUnion *getLiveUnions() { return &Matrix[0]; }
-};
-
-} // end namespace llvm
-
-#endif // LLVM_CODEGEN_LIVEREGMATRIX_H
diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp
index 939e795..be11a8f 100644
--- a/lib/CodeGen/LiveStackAnalysis.cpp
+++ b/lib/CodeGen/LiveStackAnalysis.cpp
@@ -15,17 +15,20 @@
 
 #define DEBUG_TYPE "livestacks"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <limits>
 using namespace llvm;
 
 char LiveStacks::ID = 0;
-INITIALIZE_PASS(LiveStacks, "livestacks",
+INITIALIZE_PASS_BEGIN(LiveStacks, "livestacks",
+                "Live Stack Slot Analysis", false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_END(LiveStacks, "livestacks",
                 "Live Stack Slot Analysis", false, false)
 
 char &llvm::LiveStacksID = LiveStacks::ID;
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index f294124..f81ad1c 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -27,17 +27,17 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/STLExtras.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -65,7 +65,7 @@ LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const {
 }
 
 void LiveVariables::VarInfo::dump() const {
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   dbgs() << "  Alive in blocks: ";
   for (SparseBitVector<>::iterator I = AliveBlocks.begin(),
            E = AliveBlocks.end(); I != E; ++I)
@@ -503,8 +503,6 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
   MRI = &mf.getRegInfo();
   TRI = MF->getTarget().getRegisterInfo();
 
-  ReservedRegisters = TRI->getReservedRegs(mf);
-
   unsigned NumRegs = TRI->getNumRegs();
   PhysRegDef  = new MachineInstr*[NumRegs];
   PhysRegUse  = new MachineInstr*[NumRegs];
@@ -588,7 +586,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
         unsigned MOReg = UseRegs[i];
         if (TargetRegisterInfo::isVirtualRegister(MOReg))
           HandleVirtRegUse(MOReg, MBB, MI);
-        else if (!ReservedRegisters[MOReg])
+        else if (!MRI->isReserved(MOReg))
           HandlePhysRegUse(MOReg, MI);
       }
 
@@ -601,7 +599,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
         unsigned MOReg = DefRegs[i];
         if (TargetRegisterInfo::isVirtualRegister(MOReg))
           HandleVirtRegDef(MOReg, MI);
-        else if (!ReservedRegisters[MOReg])
+        else if (!MRI->isReserved(MOReg))
           HandlePhysRegDef(MOReg, MI, Defs);
       }
       UpdatePhysRegDefs(MI, Defs);
diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp
index fbc9e20..352ef94 100644
--- a/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -15,26 +15,26 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "localstackalloc"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 
 using namespace llvm;
 
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 9250577..7e9fb20 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -12,24 +12,24 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/BasicBlock.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Assembly/Writer.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/LeakDetector.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -145,7 +145,8 @@ MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() {
   instr_iterator I = instr_begin(), E = instr_end();
   while (I != E && I->isPHI())
     ++I;
-  assert(!I->isInsideBundle() && "First non-phi MI cannot be inside a bundle!");
+  assert((I == E || !I->isInsideBundle()) &&
+         "First non-phi MI cannot be inside a bundle!");
   return I;
 }
 
@@ -156,7 +157,7 @@ MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) {
     ++I;
   // FIXME: This needs to change if we wish to bundle labels / dbg_values
   // inside the bundle.
-  assert(!I->isInsideBundle() &&
+  assert((I == E || !I->isInsideBundle()) &&
          "First non-phi / non-label instruction is inside a bundle!");
   return I;
 }
@@ -228,7 +229,7 @@ const MachineBasicBlock *MachineBasicBlock::getLandingPadSuccessor() const {
   return 0;
 }
 
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void MachineBasicBlock::dump() const {
   print(dbgs());
 }
@@ -787,40 +788,42 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
   return NMBB;
 }
 
-MachineBasicBlock::iterator
-MachineBasicBlock::erase(MachineBasicBlock::iterator I) {
-  if (I->isBundle()) {
-    MachineBasicBlock::iterator E = llvm::next(I);
-    return Insts.erase(I.getInstrIterator(), E.getInstrIterator());
-  }
-
-  return Insts.erase(I.getInstrIterator());
+/// Prepare MI to be removed from its bundle. This fixes bundle flags on MI's
+/// neighboring instructions so the bundle won't be broken by removing MI.
+static void unbundleSingleMI(MachineInstr *MI) {
+  // Removing the first instruction in a bundle.
+  if (MI->isBundledWithSucc() && !MI->isBundledWithPred())
+    MI->unbundleFromSucc();
+  // Removing the last instruction in a bundle.
+  if (MI->isBundledWithPred() && !MI->isBundledWithSucc())
+    MI->unbundleFromPred();
+  // If MI is not bundled, or if it is internal to a bundle, the neighbor flags
+  // are already fine.
 }
 
-MachineInstr *MachineBasicBlock::remove(MachineInstr *I) {
-  if (I->isBundle()) {
-    instr_iterator MII = llvm::next(I);
-    iterator E = end();
-    while (MII != E && MII->isInsideBundle()) {
-      MachineInstr *MI = &*MII++;
-      Insts.remove(MI);
-    }
-  }
+MachineBasicBlock::instr_iterator
+MachineBasicBlock::erase(MachineBasicBlock::instr_iterator I) {
+  unbundleSingleMI(I);
+  return Insts.erase(I);
+}
 
-  return Insts.remove(I);
+MachineInstr *MachineBasicBlock::remove_instr(MachineInstr *MI) {
+  unbundleSingleMI(MI);
+  MI->clearFlag(MachineInstr::BundledPred);
+  MI->clearFlag(MachineInstr::BundledSucc);
+  return Insts.remove(MI);
 }
 
-void MachineBasicBlock::splice(MachineBasicBlock::iterator where,
-                               MachineBasicBlock *Other,
-                               MachineBasicBlock::iterator From) {
-  if (From->isBundle()) {
-    MachineBasicBlock::iterator To = llvm::next(From);
-    Insts.splice(where.getInstrIterator(), Other->Insts,
-                 From.getInstrIterator(), To.getInstrIterator());
-    return;
+MachineBasicBlock::instr_iterator
+MachineBasicBlock::insert(instr_iterator I, MachineInstr *MI) {
+  assert(!MI->isBundledWithPred() && !MI->isBundledWithSucc() &&
+         "Cannot insert instruction with bundle flags");
+  // Set the bundle flags when inserting inside a bundle.
+  if (I != instr_end() && I->isBundledWithPred()) {
+    MI->setFlag(MachineInstr::BundledPred);
+    MI->setFlag(MachineInstr::BundledSucc);
   }
-
-  Insts.splice(where.getInstrIterator(), Other->Insts, From.getInstrIterator());
+  return Insts.insert(I, MI);
 }
 
 /// removeFromParent - This method unlinks 'this' from the containing function,
@@ -971,6 +974,83 @@ getWeightIterator(MachineBasicBlock::const_succ_iterator I) const {
   return Weights.begin() + index;
 }
 
+/// Return whether (physical) register "Reg" has been <def>ined and not <kill>ed
+/// as of just before "MI".
+/// 
+/// Search is localised to a neighborhood of
+/// Neighborhood instructions before (searching for defs or kills) and N
+/// instructions after (searching just for defs) MI.
+MachineBasicBlock::LivenessQueryResult
+MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
+                                           unsigned Reg, MachineInstr *MI,
+                                           unsigned Neighborhood) {
+  unsigned N = Neighborhood;
+  MachineBasicBlock *MBB = MI->getParent();
+
+  // Start by searching backwards from MI, looking for kills, reads or defs.
+
+  MachineBasicBlock::iterator I(MI);
+  // If this is the first insn in the block, don't search backwards.
+  if (I != MBB->begin()) {
+    do {
+      --I;
+
+      MachineOperandIteratorBase::PhysRegInfo Analysis =
+        MIOperands(I).analyzePhysReg(Reg, TRI);
+
+      if (Analysis.Defines)
+        // Outputs happen after inputs so they take precedence if both are
+        // present.
+        return Analysis.DefinesDead ? LQR_Dead : LQR_Live;
+
+      if (Analysis.Kills || Analysis.Clobbers)
+        // Register killed, so isn't live.
+        return LQR_Dead;
+
+      else if (Analysis.ReadsOverlap)
+        // Defined or read without a previous kill - live.
+        return Analysis.Reads ? LQR_Live : LQR_OverlappingLive;
+
+    } while (I != MBB->begin() && --N > 0);
+  }
+
+  // Did we get to the start of the block?
+  if (I == MBB->begin()) {
+    // If so, the register's state is definitely defined by the live-in state.
+    for (MCRegAliasIterator RAI(Reg, TRI, /*IncludeSelf=*/true);
+         RAI.isValid(); ++RAI) {
+      if (MBB->isLiveIn(*RAI))
+        return (*RAI == Reg) ? LQR_Live : LQR_OverlappingLive;
+    }
+
+    return LQR_Dead;
+  }
+
+  N = Neighborhood;
+
+  // Try searching forwards from MI, looking for reads or defs.
+  I = MachineBasicBlock::iterator(MI);
+  // If this is the last insn in the block, don't search forwards.
+  if (I != MBB->end()) {
+    for (++I; I != MBB->end() && N > 0; ++I, --N) {
+      MachineOperandIteratorBase::PhysRegInfo Analysis =
+        MIOperands(I).analyzePhysReg(Reg, TRI);
+
+      if (Analysis.ReadsOverlap)
+        // Used, therefore must have been live.
+        return (Analysis.Reads) ?
+          LQR_Live : LQR_OverlappingLive;
+
+      else if (Analysis.Clobbers || Analysis.Defines)
+        // Defined (but not read) therefore cannot have been live.
+        return LQR_Dead;
+    }
+  }
+
+  // At this point we have no idea of the liveness of the register.
+  return LQR_Unknown;
+}
+
 void llvm::WriteAsOperand(raw_ostream &OS, const MachineBasicBlock *MBB,
                           bool t) {
   OS << "BB#" << MBB->getNumber();
diff --git a/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/lib/CodeGen/MachineBlockFrequencyInfo.cpp
index a079d6e..070daf2 100644
--- a/lib/CodeGen/MachineBlockFrequencyInfo.cpp
+++ b/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -11,11 +11,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/InitializePasses.h"
-#include "llvm/Analysis/BlockFrequencyImpl.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/Analysis/BlockFrequencyImpl.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
 
 using namespace llvm;
 
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index c4dca2c..07a3e03 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -26,6 +26,11 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "block-placement2"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
@@ -33,13 +38,8 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include <algorithm>
@@ -500,11 +500,10 @@ void MachineBlockPlacement::buildChain(
     assert(BB);
     assert(BlockToChain[BB] == &Chain);
     assert(*llvm::prior(Chain.end()) == BB);
-    MachineBasicBlock *BestSucc = 0;
 
     // Look for the best viable successor if there is one to place immediately
     // after this block.
-    BestSucc = selectBestSuccessor(BB, Chain, BlockFilter);
+    MachineBasicBlock *BestSucc = selectBestSuccessor(BB, Chain, BlockFilter);
 
     // If an immediate successor isn't available, look for the best viable
     // block among those we've identified as not violating the loop's CFG at
@@ -1014,7 +1013,8 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
   // exclusively on the loop info here so that we can align backedges in
   // unnatural CFGs and backedges that were introduced purely because of the
   // loop rotations done during this layout pass.
-  if (F.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+  if (F.getFunction()->getAttributes().
+        hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize))
     return;
   unsigned Align = TLI->getPrefLoopAlignment();
   if (!Align)
diff --git a/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/lib/CodeGen/MachineBranchProbabilityInfo.cpp
index 4479211..ae70912 100644
--- a/lib/CodeGen/MachineBranchProbabilityInfo.cpp
+++ b/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -11,9 +11,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Instructions.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 896461f..61d8d38 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -15,17 +15,17 @@
 
 #define DEBUG_TYPE "machine-cse"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/ScopedHashTable.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/RecyclingAllocator.h"
+#include "llvm/Target/TargetInstrInfo.h"
 using namespace llvm;
 
 STATISTIC(NumCoalesces, "Number of copies coalesced");
@@ -63,8 +63,6 @@ namespace {
     virtual void releaseMemory() {
       ScopeMap.clear();
       Exps.clear();
-      AllocatableRegs.clear();
-      ReservedRegs.clear();
     }
 
   private:
@@ -78,8 +76,6 @@ namespace {
     ScopedHTType VNT;
     SmallVector<MachineInstr*, 64> Exps;
     unsigned CurrVN;
-    BitVector AllocatableRegs;
-    BitVector ReservedRegs;
 
     bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB);
     bool isPhysDefTriviallyDead(unsigned Reg,
@@ -88,7 +84,8 @@ namespace {
     bool hasLivePhysRegDefUses(const MachineInstr *MI,
                                const MachineBasicBlock *MBB,
                                SmallSet<unsigned,8> &PhysRefs,
-                               SmallVector<unsigned,2> &PhysDefs) const;
+                               SmallVector<unsigned,2> &PhysDefs,
+                               bool &PhysUseDef) const;
     bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
                           SmallSet<unsigned,8> &PhysRefs,
                           SmallVector<unsigned,2> &PhysDefs,
@@ -129,8 +126,6 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI,
       // deleted.
       continue;
     MachineInstr *DefMI = MRI->getVRegDef(Reg);
-    if (DefMI->getParent() != MBB)
-      continue;
     if (!DefMI->isCopy())
       continue;
     unsigned SrcReg = DefMI->getOperand(1).getReg();
@@ -198,31 +193,52 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
 bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
                                        const MachineBasicBlock *MBB,
                                        SmallSet<unsigned,8> &PhysRefs,
-                                       SmallVector<unsigned,2> &PhysDefs) const{
-  MachineBasicBlock::const_iterator I = MI; I = llvm::next(I);
+                                       SmallVector<unsigned,2> &PhysDefs,
+                                       bool &PhysUseDef) const{
+  // First, add all uses to PhysRefs.
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
-    if (!MO.isReg())
+    if (!MO.isReg() || MO.isDef())
       continue;
     unsigned Reg = MO.getReg();
     if (!Reg)
       continue;
     if (TargetRegisterInfo::isVirtualRegister(Reg))
       continue;
-    // If the def is dead, it's ok. But the def may not marked "dead". That's
-    // common since this pass is run before livevariables. We can scan
-    // forward a few instructions and check if it is obviously dead.
-    if (MO.isDef() &&
-        (MO.isDead() || isPhysDefTriviallyDead(Reg, I, MBB->end())))
-      continue;
     // Reading constant physregs is ok.
     if (!MRI->isConstantPhysReg(Reg, *MBB->getParent()))
       for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
         PhysRefs.insert(*AI);
-    if (MO.isDef())
+  }
+
+  // Next, collect all defs into PhysDefs.  If any is already in PhysRefs
+  // (which currently contains only uses), set the PhysUseDef flag.
+  PhysUseDef = false;
+  MachineBasicBlock::const_iterator I = MI; I = llvm::next(I);
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || !MO.isDef())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!Reg)
+      continue;
+    if (TargetRegisterInfo::isVirtualRegister(Reg))
+      continue;
+    // Check against PhysRefs even if the def is "dead".
+    if (PhysRefs.count(Reg))
+      PhysUseDef = true;
+    // If the def is dead, it's ok. But the def may not marked "dead". That's
+    // common since this pass is run before livevariables. We can scan
+    // forward a few instructions and check if it is obviously dead.
+    if (!MO.isDead() && !isPhysDefTriviallyDead(Reg, I, MBB->end()))
       PhysDefs.push_back(Reg);
   }
 
+  // Finally, add all defs to PhysRefs as well.
+  for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i)
+    for (MCRegAliasIterator AI(PhysDefs[i], TRI, true); AI.isValid(); ++AI)
+      PhysRefs.insert(*AI);
+
   return !PhysRefs.empty();
 }
 
@@ -242,7 +258,7 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
       return false;
 
     for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) {
-      if (AllocatableRegs.test(PhysDefs[i]) || ReservedRegs.test(PhysDefs[i]))
+      if (MRI->isAllocatable(PhysDefs[i]) || MRI->isReserved(PhysDefs[i]))
         // Avoid extending live range of physical registers if they are
         //allocatable or reserved.
         return false;
@@ -411,8 +427,8 @@ void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
   DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
   DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB);
   assert(SI != ScopeMap.end());
-  ScopeMap.erase(SI);
   delete SI->second;
+  ScopeMap.erase(SI);
 }
 
 bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
@@ -463,16 +479,22 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
     bool CrossMBBPhysDef = false;
     SmallSet<unsigned, 8> PhysRefs;
     SmallVector<unsigned, 2> PhysDefs;
-    if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs, PhysDefs)) {
+    bool PhysUseDef = false;
+    if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs,
+                                          PhysDefs, PhysUseDef)) {
       FoundCSE = false;
 
       // ... Unless the CS is local or is in the sole predecessor block
       // and it also defines the physical register which is not clobbered
       // in between and the physical register uses were not clobbered.
-      unsigned CSVN = VNT.lookup(MI);
-      MachineInstr *CSMI = Exps[CSVN];
-      if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
-        FoundCSE = true;
+      // This can never be the case if the instruction both uses and
+      // defines the same physical register, which was detected above.
+      if (!PhysUseDef) {
+        unsigned CSVN = VNT.lookup(MI);
+        MachineInstr *CSMI = Exps[CSVN];
+        if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
+          FoundCSE = true;
+      }
     }
 
     if (!FoundCSE) {
@@ -635,7 +657,5 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
   MRI = &MF.getRegInfo();
   AA = &getAnalysis<AliasAnalysis>();
   DT = &getAnalysis<MachineDominatorTree>();
-  AllocatableRegs = TRI->getAllocatableSet(MF);
-  ReservedRegs = TRI->getReservedRegs(MF);
   return PerformCSE(DT->getRootNode());
 }
diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp
index bac3aa2..dc8a224 100644
--- a/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/lib/CodeGen/MachineCopyPropagation.cpp
@@ -13,18 +13,19 @@
 
 #define DEBUG_TYPE "codegen-cp"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Pass.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 STATISTIC(NumDeletes, "Number of dead copies deleted");
@@ -32,7 +33,8 @@ STATISTIC(NumDeletes, "Number of dead copies deleted");
 namespace {
   class MachineCopyPropagation : public MachineFunctionPass {
     const TargetRegisterInfo *TRI;
-    BitVector ReservedRegs;
+    const TargetInstrInfo *TII;
+    MachineRegisterInfo *MRI;
 
   public:
     static char ID; // Pass identification, replacement for typeid
@@ -50,6 +52,7 @@ namespace {
                                  SourceMap &SrcMap,
                                  DenseMap<unsigned, MachineInstr*> &AvailCopyMap);
     bool CopyPropagateBlock(MachineBasicBlock &MBB);
+    void removeCopy(MachineInstr *MI);
   };
 }
 char MachineCopyPropagation::ID = 0;
@@ -123,6 +126,16 @@ static bool isNopCopy(MachineInstr *CopyMI, unsigned Def, unsigned Src,
   return false;
 }
 
+// Remove MI from the function because it has been determined it is dead.
+// Turn it into a noop KILL instruction if it has super-register liveness
+// adjustments.
+void MachineCopyPropagation::removeCopy(MachineInstr *MI) {
+  if (MI->getNumOperands() == 2)
+    MI->eraseFromParent();
+  else
+    MI->setDesc(TII->get(TargetOpcode::KILL));
+}
+
 bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
   SmallSetVector<MachineInstr*, 8> MaybeDeadCopies;  // Candidates for deletion
   DenseMap<unsigned, MachineInstr*> AvailCopyMap;    // Def -> available copies map
@@ -146,8 +159,8 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
       DenseMap<unsigned, MachineInstr*>::iterator CI = AvailCopyMap.find(Src);
       if (CI != AvailCopyMap.end()) {
         MachineInstr *CopyMI = CI->second;
-        if (!ReservedRegs.test(Def) &&
-            (!ReservedRegs.test(Src) || NoInterveningSideEffect(CopyMI, MI)) &&
+        if (!MRI->isReserved(Def) &&
+            (!MRI->isReserved(Src) || NoInterveningSideEffect(CopyMI, MI)) &&
             isNopCopy(CopyMI, Def, Src, TRI)) {
           // The two copies cancel out and the source of the first copy
           // hasn't been overridden, eliminate the second one. e.g.
@@ -168,7 +181,7 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
           for (MachineBasicBlock::iterator I = CopyMI, E = MI; I != E; ++I)
             I->clearRegisterKills(Def, TRI);
 
-          MI->eraseFromParent();
+          removeCopy(MI);
           Changed = true;
           ++NumDeletes;
           continue;
@@ -259,9 +272,9 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
            DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end();
            DI != DE; ++DI) {
         unsigned Reg = (*DI)->getOperand(0).getReg();
-        if (ReservedRegs.test(Reg) || !MaskMO.clobbersPhysReg(Reg))
+        if (MRI->isReserved(Reg) || !MaskMO.clobbersPhysReg(Reg))
           continue;
-        (*DI)->eraseFromParent();
+        removeCopy(*DI);
         Changed = true;
         ++NumDeletes;
       }
@@ -296,8 +309,8 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
     for (SmallSetVector<MachineInstr*, 8>::iterator
            DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end();
          DI != DE; ++DI) {
-      if (!ReservedRegs.test((*DI)->getOperand(0).getReg())) {
-        (*DI)->eraseFromParent();
+      if (!MRI->isReserved((*DI)->getOperand(0).getReg())) {
+        removeCopy(*DI);
         Changed = true;
         ++NumDeletes;
       }
@@ -311,7 +324,8 @@ bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
   bool Changed = false;
 
   TRI = MF.getTarget().getRegisterInfo();
-  ReservedRegs = TRI->getReservedRegs(MF);
+  TII = MF.getTarget().getInstrInfo();
+  MRI = &MF.getRegInfo();
 
   for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
     Changed |= CopyPropagateBlock(*I);
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index c282332..9647e83 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -14,28 +14,28 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/Function.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
-#include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/GraphWriter.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -58,14 +58,17 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
   else
     RegInfo = 0;
   MFInfo = 0;
-  FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameLowering());
-  if (Fn->hasFnAttr(Attribute::StackAlignment))
-    FrameInfo->ensureMaxAlignment(Attribute::getStackAlignmentFromAttrs(
-        Fn->getAttributes().getFnAttributes()));
-  ConstantPool = new (Allocator) MachineConstantPool(TM.getTargetData());
+  FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameLowering(),
+                                               TM.Options.RealignStack);
+  if (Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                       Attribute::StackAlignment))
+    FrameInfo->ensureMaxAlignment(Fn->getAttributes().
+                                getStackAlignment(AttributeSet::FunctionIndex));
+  ConstantPool = new (Allocator) MachineConstantPool(TM.getDataLayout());
   Alignment = TM.getTargetLowering()->getMinFunctionAlignment();
   // FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn.
-  if (!Fn->hasFnAttr(Attribute::OptimizeForSize))
+  if (!Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                        Attribute::OptimizeForSize))
     Alignment = std::max(Alignment,
                          TM.getTargetLowering()->getPrefFunctionAlignment());
   FunctionNumber = FunctionNum;
@@ -73,8 +76,15 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
 }
 
 MachineFunction::~MachineFunction() {
-  BasicBlocks.clear();
+  // Don't call destructors on MachineInstr and MachineOperand. All of their
+  // memory comes from the BumpPtrAllocator which is about to be purged.
+  //
+  // Do call MachineBasicBlock destructors, it contains std::vectors.
+  for (iterator I = begin(), E = end(); I != E; I = BasicBlocks.erase(I))
+    I->Insts.clearAndLeakNodesUnsafely();
+
   InstructionRecycler.clear(Allocator);
+  OperandRecycler.clear(Allocator);
   BasicBlockRecycler.clear(Allocator);
   if (RegInfo) {
     RegInfo->~MachineRegisterInfo();
@@ -157,7 +167,7 @@ MachineInstr *
 MachineFunction::CreateMachineInstr(const MCInstrDesc &MCID,
                                     DebugLoc DL, bool NoImp) {
   return new (InstructionRecycler.Allocate<MachineInstr>(Allocator))
-    MachineInstr(MCID, DL, NoImp);
+    MachineInstr(*this, MCID, DL, NoImp);
 }
 
 /// CloneMachineInstr - Create a new MachineInstr which is a copy of the
@@ -172,9 +182,17 @@ MachineFunction::CloneMachineInstr(const MachineInstr *Orig) {
 
 /// DeleteMachineInstr - Delete the given MachineInstr.
 ///
+/// This function also serves as the MachineInstr destructor - the real
+/// ~MachineInstr() destructor must be empty.
 void
 MachineFunction::DeleteMachineInstr(MachineInstr *MI) {
-  MI->~MachineInstr();
+  // Strip it for parts. The operand array and the MI object itself are
+  // independently recyclable.
+  if (MI->Operands)
+    deallocateOperandArray(MI->CapOperands, MI->Operands);
+  // Don't call ~MachineInstr() which must be trivial anyway because
+  // ~MachineFunction drops whole lists of MachineInstrs wihout calling their
+  // destructors.
   InstructionRecycler.Deallocate(Allocator, MI);
 }
 
@@ -284,7 +302,7 @@ MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin,
   return std::make_pair(Result, Result + Num);
 }
 
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void MachineFunction::dump() const {
   print(dbgs());
 }
@@ -445,6 +463,70 @@ MCSymbol *MachineFunction::getPICBaseSymbol() const {
 //  MachineFrameInfo implementation
 //===----------------------------------------------------------------------===//
 
+/// ensureMaxAlignment - Make sure the function is at least Align bytes
+/// aligned.
+void MachineFrameInfo::ensureMaxAlignment(unsigned Align) {
+  if (!TFI.isStackRealignable() || !RealignOption)
+    assert(Align <= TFI.getStackAlignment() &&
+           "For targets without stack realignment, Align is out of limit!");
+  if (MaxAlignment < Align) MaxAlignment = Align;
+}
+
+/// clampStackAlignment - Clamp the alignment if requested and emit a warning.
+static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align,
+                                           unsigned StackAlign) {
+  if (!ShouldClamp || Align <= StackAlign)
+    return Align;
+  DEBUG(dbgs() << "Warning: requested alignment " << Align
+               << " exceeds the stack alignment " << StackAlign
+               << " when stack realignment is off" << '\n');
+  return StackAlign;
+}
+
+/// CreateStackObject - Create a new statically sized stack object, returning
+/// a nonnegative identifier to represent it.
+///
+int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment,
+                      bool isSS, bool MayNeedSP, const AllocaInst *Alloca) {
+  assert(Size != 0 && "Cannot allocate zero size stack objects!");
+  Alignment = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption,
+                                  Alignment, TFI.getStackAlignment());
+  Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, MayNeedSP,
+                                Alloca));
+  int Index = (int)Objects.size() - NumFixedObjects - 1;
+  assert(Index >= 0 && "Bad frame index!");
+  ensureMaxAlignment(Alignment);
+  return Index;
+}
+
+/// CreateSpillStackObject - Create a new statically sized stack object that
+/// represents a spill slot, returning a nonnegative identifier to represent
+/// it.
+///
+int MachineFrameInfo::CreateSpillStackObject(uint64_t Size,
+                                             unsigned Alignment) {
+  Alignment = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption,
+                                  Alignment, TFI.getStackAlignment()); 
+  CreateStackObject(Size, Alignment, true, false);
+  int Index = (int)Objects.size() - NumFixedObjects - 1;
+  ensureMaxAlignment(Alignment);
+  return Index;
+}
+
+/// CreateVariableSizedObject - Notify the MachineFrameInfo object that a
+/// variable sized object has been created.  This must be created whenever a
+/// variable sized object is created, whether or not the index returned is
+/// actually used.
+///
+int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment) {
+  HasVarSizedObjects = true;
+  Alignment = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption,
+                                  Alignment, TFI.getStackAlignment()); 
+  Objects.push_back(StackObject(0, Alignment, 0, false, false, true, 0));
+  ensureMaxAlignment(Alignment);
+  return (int)Objects.size()-NumFixedObjects-1;
+}
+
 /// CreateFixedObject - Create a new object at a fixed location on the stack.
 /// All fixed objects should be created before other objects are created for
 /// efficiency. By default, fixed objects are immutable. This returns an
@@ -459,6 +541,8 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
   // object is 16-byte aligned.
   unsigned StackAlign = TFI.getStackAlignment();
   unsigned Align = MinAlign(SPOffset, StackAlign);
+  Align = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption,
+                              Align, TFI.getStackAlignment()); 
   Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable,
                                               /*isSS*/   false,
                                               /*NeedSP*/ false,
@@ -534,7 +618,7 @@ void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{
   }
 }
 
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void MachineFrameInfo::dump(const MachineFunction &MF) const {
   print(MF, dbgs());
 }
@@ -545,7 +629,7 @@ void MachineFrameInfo::dump(const MachineFunction &MF) const {
 //===----------------------------------------------------------------------===//
 
 /// getEntrySize - Return the size of each entry in the jump table.
-unsigned MachineJumpTableInfo::getEntrySize(const TargetData &TD) const {
+unsigned MachineJumpTableInfo::getEntrySize(const DataLayout &TD) const {
   // The size of a jump table entry is 4 bytes unless the entry is just the
   // address of a block, in which case it is the pointer size.
   switch (getEntryKind()) {
@@ -564,7 +648,7 @@ unsigned MachineJumpTableInfo::getEntrySize(const TargetData &TD) const {
 }
 
 /// getEntryAlignment - Return the alignment of each entry in the jump table.
-unsigned MachineJumpTableInfo::getEntryAlignment(const TargetData &TD) const {
+unsigned MachineJumpTableInfo::getEntryAlignment(const DataLayout &TD) const {
   // The alignment of a jump table entry is the alignment of int32 unless the
   // entry is just the address of a block, in which case it is the pointer
   // alignment.
@@ -633,7 +717,7 @@ void MachineJumpTableInfo::print(raw_ostream &OS) const {
   OS << '\n';
 }
 
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void MachineJumpTableInfo::dump() const { print(dbgs()); }
 #endif
 
@@ -670,7 +754,7 @@ MachineConstantPool::~MachineConstantPool() {
 /// CanShareConstantPoolEntry - Test whether the given two constants
 /// can be allocated the same constant pool entry.
 static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
-                                      const TargetData *TD) {
+                                      const DataLayout *TD) {
   // Handle the trivial case quickly.
   if (A == B) return true;
 
@@ -694,7 +778,7 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
   // Try constant folding a bitcast of both instructions to an integer.  If we
   // get two identical ConstantInt's, then we are good to share them.  We use
   // the constant folding APIs to do this so that we get the benefit of
-  // TargetData.
+  // DataLayout.
   if (isa<PointerType>(A->getType()))
     A = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy,
                                  const_cast<Constant*>(A), TD);
@@ -768,6 +852,6 @@ void MachineConstantPool::print(raw_ostream &OS) const {
   }
 }
 
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void MachineConstantPool::dump() const { print(dbgs()); }
 #endif
diff --git a/lib/CodeGen/MachineFunctionPass.cpp b/lib/CodeGen/MachineFunctionPass.cpp
index e5a4912..674cc80 100644
--- a/lib/CodeGen/MachineFunctionPass.cpp
+++ b/lib/CodeGen/MachineFunctionPass.cpp
@@ -11,7 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Function.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
diff --git a/lib/CodeGen/MachineFunctionPrinterPass.cpp b/lib/CodeGen/MachineFunctionPrinterPass.cpp
index 0102ac7..fa9c821 100644
--- a/lib/CodeGen/MachineFunctionPrinterPass.cpp
+++ b/lib/CodeGen/MachineFunctionPrinterPass.cpp
@@ -12,11 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/SlotIndexes.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
@@ -51,7 +51,7 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass {
 char MachineFunctionPrinterPass::ID = 0;
 }
 
-char &MachineFunctionPrinterPassID = MachineFunctionPrinterPass::ID;
+char &llvm::MachineFunctionPrinterPassID = MachineFunctionPrinterPass::ID;
 INITIALIZE_PASS(MachineFunctionPrinterPass, "print-machineinstrs",
                 "Machine Function Printer", false, false)
 
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 0508b9f..29d8866 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -12,15 +12,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/Function.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Metadata.h"
-#include "llvm/Module.h"
-#include "llvm/Type.h"
-#include "llvm/Value.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -28,19 +22,24 @@
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/LeakDetector.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/FoldingSet.h"
-#include "llvm/ADT/Hashing.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -144,7 +143,7 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
   // Change this to a register and set the reg#.
   OpKind = MO_Register;
   SmallContents.RegNo = Reg;
-  SubReg = 0;
+  SubReg_TargetFlags = 0;
   IsDef = isDef;
   IsImp = isImp;
   IsKill = isKill;
@@ -198,7 +197,8 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
     return !strcmp(getSymbolName(), Other.getSymbolName()) &&
            getOffset() == Other.getOffset();
   case MachineOperand::MO_BlockAddress:
-    return getBlockAddress() == Other.getBlockAddress();
+    return getBlockAddress() == Other.getBlockAddress() &&
+           getOffset() == Other.getOffset();
   case MO_RegisterMask:
     return getRegMask() == Other.getRegMask();
   case MachineOperand::MO_MCSymbol:
@@ -239,7 +239,7 @@ hash_code llvm::hash_value(const MachineOperand &MO) {
                         MO.getOffset());
   case MachineOperand::MO_BlockAddress:
     return hash_combine(MO.getType(), MO.getTargetFlags(),
-                        MO.getBlockAddress());
+                        MO.getBlockAddress(), MO.getOffset());
   case MachineOperand::MO_RegisterMask:
     return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getRegMask());
   case MachineOperand::MO_Metadata:
@@ -362,6 +362,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
   case MachineOperand::MO_BlockAddress:
     OS << '<';
     WriteAsOperand(OS, getBlockAddress(), /*PrintType=*/false);
+    if (getOffset()) OS << "+" << getOffset();
     OS << '>';
     break;
   case MachineOperand::MO_RegisterMask:
@@ -516,118 +517,50 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) {
 // MachineInstr Implementation
 //===----------------------------------------------------------------------===//
 
-/// MachineInstr ctor - This constructor creates a dummy MachineInstr with
-/// MCID NULL and no operands.
-MachineInstr::MachineInstr()
-  : MCID(0), Flags(0), AsmPrinterFlags(0),
-    NumMemRefs(0), MemRefs(0),
-    Parent(0) {
-  // Make sure that we get added to a machine basicblock
-  LeakDetector::addGarbageObject(this);
-}
-
-void MachineInstr::addImplicitDefUseOperands() {
+void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) {
   if (MCID->ImplicitDefs)
     for (const uint16_t *ImpDefs = MCID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
-      addOperand(MachineOperand::CreateReg(*ImpDefs, true, true));
+      addOperand(MF, MachineOperand::CreateReg(*ImpDefs, true, true));
   if (MCID->ImplicitUses)
     for (const uint16_t *ImpUses = MCID->getImplicitUses(); *ImpUses; ++ImpUses)
-      addOperand(MachineOperand::CreateReg(*ImpUses, false, true));
+      addOperand(MF, MachineOperand::CreateReg(*ImpUses, false, true));
 }
 
 /// MachineInstr ctor - This constructor creates a MachineInstr and adds the
 /// implicit operands. It reserves space for the number of operands specified by
 /// the MCInstrDesc.
-MachineInstr::MachineInstr(const MCInstrDesc &tid, bool NoImp)
-  : MCID(&tid), Flags(0), AsmPrinterFlags(0),
-    NumMemRefs(0), MemRefs(0), Parent(0) {
-  unsigned NumImplicitOps = 0;
-  if (!NoImp)
-    NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
-  Operands.reserve(NumImplicitOps + MCID->getNumOperands());
-  if (!NoImp)
-    addImplicitDefUseOperands();
-  // Make sure that we get added to a machine basicblock
-  LeakDetector::addGarbageObject(this);
-}
+MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid,
+                           const DebugLoc dl, bool NoImp)
+  : MCID(&tid), Parent(0), Operands(0), NumOperands(0),
+    Flags(0), AsmPrinterFlags(0),
+    NumMemRefs(0), MemRefs(0), debugLoc(dl) {
+  // Reserve space for the expected number of operands.
+  if (unsigned NumOps = MCID->getNumOperands() +
+    MCID->getNumImplicitDefs() + MCID->getNumImplicitUses()) {
+    CapOperands = OperandCapacity::get(NumOps);
+    Operands = MF.allocateOperandArray(CapOperands);
+  }
 
-/// MachineInstr ctor - As above, but with a DebugLoc.
-MachineInstr::MachineInstr(const MCInstrDesc &tid, const DebugLoc dl,
-                           bool NoImp)
-  : MCID(&tid), Flags(0), AsmPrinterFlags(0),
-    NumMemRefs(0), MemRefs(0), Parent(0), debugLoc(dl) {
-  unsigned NumImplicitOps = 0;
   if (!NoImp)
-    NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
-  Operands.reserve(NumImplicitOps + MCID->getNumOperands());
-  if (!NoImp)
-    addImplicitDefUseOperands();
-  // Make sure that we get added to a machine basicblock
-  LeakDetector::addGarbageObject(this);
-}
-
-/// MachineInstr ctor - Work exactly the same as the ctor two above, except
-/// that the MachineInstr is created and added to the end of the specified
-/// basic block.
-MachineInstr::MachineInstr(MachineBasicBlock *MBB, const MCInstrDesc &tid)
-  : MCID(&tid), Flags(0), AsmPrinterFlags(0),
-    NumMemRefs(0), MemRefs(0), Parent(0) {
-  assert(MBB && "Cannot use inserting ctor with null basic block!");
-  unsigned NumImplicitOps =
-    MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
-  Operands.reserve(NumImplicitOps + MCID->getNumOperands());
-  addImplicitDefUseOperands();
-  // Make sure that we get added to a machine basicblock
-  LeakDetector::addGarbageObject(this);
-  MBB->push_back(this);  // Add instruction to end of basic block!
-}
-
-/// MachineInstr ctor - As above, but with a DebugLoc.
-///
-MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
-                           const MCInstrDesc &tid)
-  : MCID(&tid), Flags(0), AsmPrinterFlags(0),
-    NumMemRefs(0), MemRefs(0), Parent(0), debugLoc(dl) {
-  assert(MBB && "Cannot use inserting ctor with null basic block!");
-  unsigned NumImplicitOps =
-    MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
-  Operands.reserve(NumImplicitOps + MCID->getNumOperands());
-  addImplicitDefUseOperands();
-  // Make sure that we get added to a machine basicblock
-  LeakDetector::addGarbageObject(this);
-  MBB->push_back(this);  // Add instruction to end of basic block!
+    addImplicitDefUseOperands(MF);
 }
 
 /// MachineInstr ctor - Copies MachineInstr arg exactly
 ///
 MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
-  : MCID(&MI.getDesc()), Flags(0), AsmPrinterFlags(0),
+  : MCID(&MI.getDesc()), Parent(0), Operands(0), NumOperands(0),
+    Flags(0), AsmPrinterFlags(0),
     NumMemRefs(MI.NumMemRefs), MemRefs(MI.MemRefs),
-    Parent(0), debugLoc(MI.getDebugLoc()) {
-  Operands.reserve(MI.getNumOperands());
+    debugLoc(MI.getDebugLoc()) {
+  CapOperands = OperandCapacity::get(MI.getNumOperands());
+  Operands = MF.allocateOperandArray(CapOperands);
 
-  // Add operands
+  // Copy operands.
   for (unsigned i = 0; i != MI.getNumOperands(); ++i)
-    addOperand(MI.getOperand(i));
-
-  // Copy all the flags.
-  Flags = MI.Flags;
+    addOperand(MF, MI.getOperand(i));
 
-  // Set parent to null.
-  Parent = 0;
-
-  LeakDetector::addGarbageObject(this);
-}
-
-MachineInstr::~MachineInstr() {
-  LeakDetector::removeGarbageObject(this);
-#ifndef NDEBUG
-  for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
-    assert(Operands[i].ParentMI == this && "ParentMI mismatch!");
-    assert((!Operands[i].isReg() || !Operands[i].isOnRegUseList()) &&
-           "Reg operand def/use list corrupted");
-  }
-#endif
+  // Copy all the sensible flags.
+  setFlags(MI.Flags);
 }
 
 /// getRegInfo - If this instruction is embedded into a MachineFunction,
@@ -643,7 +576,7 @@ MachineRegisterInfo *MachineInstr::getRegInfo() {
 /// this instruction from their respective use lists.  This requires that the
 /// operands already be on their use lists.
 void MachineInstr::RemoveRegOperandsFromUseLists(MachineRegisterInfo &MRI) {
-  for (unsigned i = 0, e = Operands.size(); i != e; ++i)
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
     if (Operands[i].isReg())
       MRI.removeRegOperandFromUseList(&Operands[i]);
 }
@@ -652,40 +585,65 @@ void MachineInstr::RemoveRegOperandsFromUseLists(MachineRegisterInfo &MRI) {
 /// this instruction from their respective use lists.  This requires that the
 /// operands not be on their use lists yet.
 void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &MRI) {
-  for (unsigned i = 0, e = Operands.size(); i != e; ++i)
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
     if (Operands[i].isReg())
       MRI.addRegOperandToUseList(&Operands[i]);
 }
 
+void MachineInstr::addOperand(const MachineOperand &Op) {
+  MachineBasicBlock *MBB = getParent();
+  assert(MBB && "Use MachineInstrBuilder to add operands to dangling instrs");
+  MachineFunction *MF = MBB->getParent();
+  assert(MF && "Use MachineInstrBuilder to add operands to dangling instrs");
+  addOperand(*MF, Op);
+}
+
+/// Move NumOps MachineOperands from Src to Dst, with support for overlapping
+/// ranges. If MRI is non-null also update use-def chains.
+static void moveOperands(MachineOperand *Dst, MachineOperand *Src,
+                         unsigned NumOps, MachineRegisterInfo *MRI) {
+  if (MRI)
+    return MRI->moveOperands(Dst, Src, NumOps);
+
+  // Here it would be convenient to call memmove, so that isn't allowed because
+  // MachineOperand has a constructor and so isn't a POD type.
+  if (Dst < Src)
+    for (unsigned i = 0; i != NumOps; ++i)
+      new (Dst + i) MachineOperand(Src[i]);
+  else
+    for (unsigned i = NumOps; i ; --i)
+      new (Dst + i - 1) MachineOperand(Src[i - 1]);
+}
+
 /// addOperand - Add the specified operand to the instruction.  If it is an
 /// implicit operand, it is added to the end of the operand list.  If it is
 /// an explicit operand it is added at the end of the explicit operand list
 /// (before the first implicit operand).
-void MachineInstr::addOperand(const MachineOperand &Op) {
+void MachineInstr::addOperand(MachineFunction &MF, const MachineOperand &Op) {
   assert(MCID && "Cannot add operands before providing an instr descriptor");
-  bool isImpReg = Op.isReg() && Op.isImplicit();
-  MachineRegisterInfo *RegInfo = getRegInfo();
 
-  // If the Operands backing store is reallocated, all register operands must
-  // be removed and re-added to RegInfo.  It is storing pointers to operands.
-  bool Reallocate = RegInfo &&
-    !Operands.empty() && Operands.size() == Operands.capacity();
+  // Check if we're adding one of our existing operands.
+  if (&Op >= Operands && &Op < Operands + NumOperands) {
+    // This is unusual: MI->addOperand(MI->getOperand(i)).
+    // If adding Op requires reallocating or moving existing operands around,
+    // the Op reference could go stale. Support it by copying Op.
+    MachineOperand CopyOp(Op);
+    return addOperand(MF, CopyOp);
+  }
 
   // Find the insert location for the new operand.  Implicit registers go at
-  // the end, everything goes before the implicit regs.
-  unsigned OpNo = Operands.size();
-
-  // Remove all the implicit operands from RegInfo if they need to be shifted.
+  // the end, everything else goes before the implicit regs.
+  //
   // FIXME: Allow mixed explicit and implicit operands on inline asm.
   // InstrEmitter::EmitSpecialNode() is marking inline asm clobbers as
   // implicit-defs, but they must not be moved around.  See the FIXME in
   // InstrEmitter.cpp.
+  unsigned OpNo = getNumOperands();
+  bool isImpReg = Op.isReg() && Op.isImplicit();
   if (!isImpReg && !isInlineAsm()) {
     while (OpNo && Operands[OpNo-1].isReg() && Operands[OpNo-1].isImplicit()) {
       --OpNo;
       assert(!Operands[OpNo].isTied() && "Cannot move tied operands");
-      if (RegInfo)
-        RegInfo->removeRegOperandFromUseList(&Operands[OpNo]);
     }
   }
 
@@ -696,55 +654,56 @@ void MachineInstr::addOperand(const MachineOperand &Op) {
           OpNo < MCID->getNumOperands()) &&
          "Trying to add an operand to a machine instr that is already done!");
 
-  // All operands from OpNo have been removed from RegInfo.  If the Operands
-  // backing store needs to be reallocated, we also need to remove any other
-  // register operands.
-  if (Reallocate)
-    for (unsigned i = 0; i != OpNo; ++i)
-      if (Operands[i].isReg())
-        RegInfo->removeRegOperandFromUseList(&Operands[i]);
-
-  // Insert the new operand at OpNo.
-  Operands.insert(Operands.begin() + OpNo, Op);
-  Operands[OpNo].ParentMI = this;
-
-  // The Operands backing store has now been reallocated, so we can re-add the
-  // operands before OpNo.
-  if (Reallocate)
-    for (unsigned i = 0; i != OpNo; ++i)
-      if (Operands[i].isReg())
-        RegInfo->addRegOperandToUseList(&Operands[i]);
-
-  // When adding a register operand, tell RegInfo about it.
-  if (Operands[OpNo].isReg()) {
+  MachineRegisterInfo *MRI = getRegInfo();
+
+  // Determine if the Operands array needs to be reallocated.
+  // Save the old capacity and operand array.
+  OperandCapacity OldCap = CapOperands;
+  MachineOperand *OldOperands = Operands;
+  if (!OldOperands || OldCap.getSize() == getNumOperands()) {
+    CapOperands = OldOperands ? OldCap.getNext() : OldCap.get(1);
+    Operands = MF.allocateOperandArray(CapOperands);
+    // Move the operands before the insertion point.
+    if (OpNo)
+      moveOperands(Operands, OldOperands, OpNo, MRI);
+  }
+
+  // Move the operands following the insertion point.
+  if (OpNo != NumOperands)
+    moveOperands(Operands + OpNo + 1, OldOperands + OpNo, NumOperands - OpNo,
+                 MRI);
+  ++NumOperands;
+
+  // Deallocate the old operand array.
+  if (OldOperands != Operands && OldOperands)
+    MF.deallocateOperandArray(OldCap, OldOperands);
+
+  // Copy Op into place. It still needs to be inserted into the MRI use lists.
+  MachineOperand *NewMO = new (Operands + OpNo) MachineOperand(Op);
+  NewMO->ParentMI = this;
+
+  // When adding a register operand, tell MRI about it.
+  if (NewMO->isReg()) {
     // Ensure isOnRegUseList() returns false, regardless of Op's status.
-    Operands[OpNo].Contents.Reg.Prev = 0;
+    NewMO->Contents.Reg.Prev = 0;
     // Ignore existing ties. This is not a property that can be copied.
-    Operands[OpNo].TiedTo = 0;
-    // Add the new operand to RegInfo.
-    if (RegInfo)
-      RegInfo->addRegOperandToUseList(&Operands[OpNo]);
+    NewMO->TiedTo = 0;
+    // Add the new operand to MRI, but only for instructions in an MBB.
+    if (MRI)
+      MRI->addRegOperandToUseList(NewMO);
     // The MCID operand information isn't accurate until we start adding
     // explicit operands. The implicit operands are added first, then the
     // explicits are inserted before them.
     if (!isImpReg) {
       // Tie uses to defs as indicated in MCInstrDesc.
-      if (Operands[OpNo].isUse()) {
+      if (NewMO->isUse()) {
         int DefIdx = MCID->getOperandConstraint(OpNo, MCOI::TIED_TO);
         if (DefIdx != -1)
           tieOperands(DefIdx, OpNo);
       }
       // If the register operand is flagged as early, mark the operand as such.
       if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1)
-        Operands[OpNo].setIsEarlyClobber(true);
-    }
-  }
-
-  // Re-add all the implicit ops.
-  if (RegInfo) {
-    for (unsigned i = OpNo + 1, e = Operands.size(); i != e; ++i) {
-      assert(Operands[i].isReg() && "Should only be an implicit reg!");
-      RegInfo->addRegOperandToUseList(&Operands[i]);
+        NewMO->setIsEarlyClobber(true);
     }
   }
 }
@@ -753,45 +712,27 @@ void MachineInstr::addOperand(const MachineOperand &Op) {
 /// fewer operand than it started with.
 ///
 void MachineInstr::RemoveOperand(unsigned OpNo) {
-  assert(OpNo < Operands.size() && "Invalid operand number");
+  assert(OpNo < getNumOperands() && "Invalid operand number");
   untieRegOperand(OpNo);
-  MachineRegisterInfo *RegInfo = getRegInfo();
-
-  // Special case removing the last one.
-  if (OpNo == Operands.size()-1) {
-    // If needed, remove from the reg def/use list.
-    if (RegInfo && Operands.back().isReg() && Operands.back().isOnRegUseList())
-      RegInfo->removeRegOperandFromUseList(&Operands.back());
-
-    Operands.pop_back();
-    return;
-  }
-
-  // Otherwise, we are removing an interior operand.  If we have reginfo to
-  // update, remove all operands that will be shifted down from their reg lists,
-  // move everything down, then re-add them.
-  if (RegInfo) {
-    for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) {
-      if (Operands[i].isReg())
-        RegInfo->removeRegOperandFromUseList(&Operands[i]);
-    }
-  }
 
 #ifndef NDEBUG
   // Moving tied operands would break the ties.
-  for (unsigned i = OpNo + 1, e = Operands.size(); i != e; ++i)
+  for (unsigned i = OpNo + 1, e = getNumOperands(); i != e; ++i)
     if (Operands[i].isReg())
       assert(!Operands[i].isTied() && "Cannot move tied operands");
 #endif
 
-  Operands.erase(Operands.begin()+OpNo);
+  MachineRegisterInfo *MRI = getRegInfo();
+  if (MRI && Operands[OpNo].isReg())
+    MRI->removeRegOperandFromUseList(Operands + OpNo);
 
-  if (RegInfo) {
-    for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) {
-      if (Operands[i].isReg())
-        RegInfo->addRegOperandToUseList(&Operands[i]);
-    }
-  }
+  // Don't call the MachineOperand destructor. A lot of this code depends on
+  // MachineOperand having a trivial destructor anyway, and adding a call here
+  // wouldn't make it 'destructor-correct'.
+
+  if (unsigned N = NumOperands - 1 - OpNo)
+    moveOperands(Operands + OpNo, Operands + OpNo + 1, N, MRI);
+  --NumOperands;
 }
 
 /// addMemOperand - Add a MachineMemOperand to the machine instruction.
@@ -800,16 +741,14 @@ void MachineInstr::RemoveOperand(unsigned OpNo) {
 void MachineInstr::addMemOperand(MachineFunction &MF,
                                  MachineMemOperand *MO) {
   mmo_iterator OldMemRefs = MemRefs;
-  uint16_t OldNumMemRefs = NumMemRefs;
+  unsigned OldNumMemRefs = NumMemRefs;
 
-  uint16_t NewNum = NumMemRefs + 1;
+  unsigned NewNum = NumMemRefs + 1;
   mmo_iterator NewMemRefs = MF.allocateMemRefsArray(NewNum);
 
   std::copy(OldMemRefs, OldMemRefs + OldNumMemRefs, NewMemRefs);
   NewMemRefs[NewNum - 1] = MO;
-
-  MemRefs = NewMemRefs;
-  NumMemRefs = NewNum;
+  setMemRefs(NewMemRefs, NewMemRefs + NewNum);
 }
 
 bool MachineInstr::hasPropertyInBundle(unsigned Mask, QueryType Type) const {
@@ -892,46 +831,25 @@ bool MachineInstr::isIdenticalTo(const MachineInstr *Other,
   return true;
 }
 
-/// removeFromParent - This method unlinks 'this' from the containing basic
-/// block, and returns it, but does not delete it.
 MachineInstr *MachineInstr::removeFromParent() {
   assert(getParent() && "Not embedded in a basic block!");
-
-  // If it's a bundle then remove the MIs inside the bundle as well.
-  if (isBundle()) {
-    MachineBasicBlock *MBB = getParent();
-    MachineBasicBlock::instr_iterator MII = *this; ++MII;
-    MachineBasicBlock::instr_iterator E = MBB->instr_end();
-    while (MII != E && MII->isInsideBundle()) {
-      MachineInstr *MI = &*MII;
-      ++MII;
-      MBB->remove(MI);
-    }
-  }
-  getParent()->remove(this);
-  return this;
+  return getParent()->remove(this);
 }
 
+MachineInstr *MachineInstr::removeFromBundle() {
+  assert(getParent() && "Not embedded in a basic block!");
+  return getParent()->remove_instr(this);
+}
 
-/// eraseFromParent - This method unlinks 'this' from the containing basic
-/// block, and deletes it.
 void MachineInstr::eraseFromParent() {
   assert(getParent() && "Not embedded in a basic block!");
-  // If it's a bundle then remove the MIs inside the bundle as well.
-  if (isBundle()) {
-    MachineBasicBlock *MBB = getParent();
-    MachineBasicBlock::instr_iterator MII = *this; ++MII;
-    MachineBasicBlock::instr_iterator E = MBB->instr_end();
-    while (MII != E && MII->isInsideBundle()) {
-      MachineInstr *MI = &*MII;
-      ++MII;
-      MBB->erase(MI);
-    }
-  }
-  // Erase the individual instruction, which may itself be inside a bundle.
-  getParent()->erase_instr(this);
+  getParent()->erase(this);
 }
 
+void MachineInstr::eraseFromBundle() {
+  assert(getParent() && "Not embedded in a basic block!");
+  getParent()->erase_instr(this);
+}
 
 /// getNumExplicitOperands - Returns the number of non-implicit operands.
 ///
@@ -948,14 +866,40 @@ unsigned MachineInstr::getNumExplicitOperands() const {
   return NumOperands;
 }
 
-/// isBundled - Return true if this instruction part of a bundle. This is true
-/// if either itself or its following instruction is marked "InsideBundle".
-bool MachineInstr::isBundled() const {
-  if (isInsideBundle())
-    return true;
-  MachineBasicBlock::const_instr_iterator nextMI = this;
-  ++nextMI;
-  return nextMI != Parent->instr_end() && nextMI->isInsideBundle();
+void MachineInstr::bundleWithPred() {
+  assert(!isBundledWithPred() && "MI is already bundled with its predecessor");
+  setFlag(BundledPred);
+  MachineBasicBlock::instr_iterator Pred = this;
+  --Pred;
+  assert(!Pred->isBundledWithSucc() && "Inconsistent bundle flags");
+  Pred->setFlag(BundledSucc);
+}
+
+void MachineInstr::bundleWithSucc() {
+  assert(!isBundledWithSucc() && "MI is already bundled with its successor");
+  setFlag(BundledSucc);
+  MachineBasicBlock::instr_iterator Succ = this;
+  ++Succ;
+  assert(!Succ->isBundledWithPred() && "Inconsistent bundle flags");
+  Succ->setFlag(BundledPred);
+}
+
+void MachineInstr::unbundleFromPred() {
+  assert(isBundledWithPred() && "MI isn't bundled with its predecessor");
+  clearFlag(BundledPred);
+  MachineBasicBlock::instr_iterator Pred = this;
+  --Pred;
+  assert(Pred->isBundledWithSucc() && "Inconsistent bundle flags");
+  Pred->clearFlag(BundledSucc);
+}
+
+void MachineInstr::unbundleFromSucc() {
+  assert(isBundledWithSucc() && "MI isn't bundled with its successor");
+  clearFlag(BundledSucc);
+  MachineBasicBlock::instr_iterator Succ = this;
+  --Succ;
+  assert(Succ->isBundledWithPred() && "Inconsistent bundle flags");
+  Succ->clearFlag(BundledPred);
 }
 
 bool MachineInstr::isStackAligningInlineAsm() const {
@@ -1042,9 +986,10 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx,
 unsigned MachineInstr::getBundleSize() const {
   assert(isBundle() && "Expecting a bundle");
 
-  MachineBasicBlock::const_instr_iterator I = *this;
+  const MachineBasicBlock *MBB = getParent();
+  MachineBasicBlock::const_instr_iterator I = *this, E = MBB->instr_end();
   unsigned Size = 0;
-  while ((++I)->isInsideBundle()) {
+  while ((++I != E) && I->isInsideBundle()) {
     ++Size;
   }
   assert(Size > 1 && "Malformed bundle");
@@ -1257,41 +1202,6 @@ void MachineInstr::clearKillInfo() {
   }
 }
 
-/// copyKillDeadInfo - Copies kill / dead operand properties from MI.
-///
-void MachineInstr::copyKillDeadInfo(const MachineInstr *MI) {
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = MI->getOperand(i);
-    if (!MO.isReg() || (!MO.isKill() && !MO.isDead()))
-      continue;
-    for (unsigned j = 0, ee = getNumOperands(); j != ee; ++j) {
-      MachineOperand &MOp = getOperand(j);
-      if (!MOp.isIdenticalTo(MO))
-        continue;
-      if (MO.isKill())
-        MOp.setIsKill();
-      else
-        MOp.setIsDead();
-      break;
-    }
-  }
-}
-
-/// copyPredicates - Copies predicate operand(s) from MI.
-void MachineInstr::copyPredicates(const MachineInstr *MI) {
-  assert(!isBundle() && "MachineInstr::copyPredicates() can't handle bundles");
-
-  const MCInstrDesc &MCID = MI->getDesc();
-  if (!MCID.isPredicable())
-    return;
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    if (MCID.OpInfo[i].isPredicate()) {
-      // Predicated operands must be last operands.
-      addOperand(MI->getOperand(i));
-    }
-  }
-}
-
 void MachineInstr::substituteRegister(unsigned FromReg,
                                       unsigned ToReg,
                                       unsigned SubIdx,
@@ -1486,17 +1396,18 @@ bool MachineInstr::allDefsAreDead() const {
 
 /// copyImplicitOps - Copy implicit register operands from specified
 /// instruction to this instruction.
-void MachineInstr::copyImplicitOps(const MachineInstr *MI) {
+void MachineInstr::copyImplicitOps(MachineFunction &MF,
+                                   const MachineInstr *MI) {
   for (unsigned i = MI->getDesc().getNumOperands(), e = MI->getNumOperands();
        i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
     if (MO.isReg() && MO.isImplicit())
-      addOperand(MO);
+      addOperand(MF, MO);
   }
 }
 
 void MachineInstr::dump() const {
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   dbgs() << "  " << *this;
 #endif
 }
diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp
index b7de7bf..77bcd1d 100644
--- a/lib/CodeGen/MachineInstrBundle.cpp
+++ b/lib/CodeGen/MachineInstrBundle.cpp
@@ -8,14 +8,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
 using namespace llvm;
 
 namespace {
@@ -47,8 +47,8 @@ bool UnpackMachineBundles::runOnMachineFunction(MachineFunction &MF) {
       // Remove BUNDLE instruction and the InsideBundle flags from bundled
       // instructions.
       if (MI->isBundle()) {
-        while (++MII != MIE && MII->isInsideBundle()) {
-          MII->setIsInsideBundle(false);
+        while (++MII != MIE && MII->isBundledWithPred()) {
+          MII->unbundleFromPred();
           for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
             MachineOperand &MO = MII->getOperand(i);
             if (MO.isReg() && MO.isInternalRead())
@@ -101,18 +101,20 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
                           MachineBasicBlock::instr_iterator FirstMI,
                           MachineBasicBlock::instr_iterator LastMI) {
   assert(FirstMI != LastMI && "Empty bundle?");
+  MIBundleBuilder Bundle(MBB, FirstMI, LastMI);
 
   const TargetMachine &TM = MBB.getParent()->getTarget();
   const TargetInstrInfo *TII = TM.getInstrInfo();
   const TargetRegisterInfo *TRI = TM.getRegisterInfo();
 
-  MachineInstrBuilder MIB = BuildMI(MBB, FirstMI, FirstMI->getDebugLoc(),
+  MachineInstrBuilder MIB = BuildMI(*MBB.getParent(), FirstMI->getDebugLoc(),
                                     TII->get(TargetOpcode::BUNDLE));
+  Bundle.prepend(MIB);
 
-  SmallVector<unsigned, 8> LocalDefs;
-  SmallSet<unsigned, 8> LocalDefSet;
+  SmallVector<unsigned, 32> LocalDefs;
+  SmallSet<unsigned, 32> LocalDefSet;
   SmallSet<unsigned, 8> DeadDefSet;
-  SmallSet<unsigned, 8> KilledDefSet;
+  SmallSet<unsigned, 16> KilledDefSet;
   SmallVector<unsigned, 8> ExternUses;
   SmallSet<unsigned, 8> ExternUseSet;
   SmallSet<unsigned, 8> KilledUseSet;
@@ -177,11 +179,10 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
       }
     }
 
-    FirstMI->setIsInsideBundle();
     Defs.clear();
   }
 
-  SmallSet<unsigned, 8> Added;
+  SmallSet<unsigned, 32> Added;
   for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
     unsigned Reg = LocalDefs[i];
     if (Added.insert(Reg)) {
@@ -223,14 +224,13 @@ bool llvm::finalizeBundles(MachineFunction &MF) {
   bool Changed = false;
   for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
     MachineBasicBlock &MBB = *I;
-
     MachineBasicBlock::instr_iterator MII = MBB.instr_begin();
-    assert(!MII->isInsideBundle() &&
-           "First instr cannot be inside bundle before finalization!");
-
     MachineBasicBlock::instr_iterator MIE = MBB.instr_end();
     if (MII == MIE)
       continue;
+    assert(!MII->isInsideBundle() &&
+           "First instr cannot be inside bundle before finalization!");
+
     for (++MII; MII != MIE; ) {
       if (!MII->isInsideBundle())
         ++MII;
@@ -248,10 +248,10 @@ bool llvm::finalizeBundles(MachineFunction &MF) {
 // MachineOperand iterator
 //===----------------------------------------------------------------------===//
 
-MachineOperandIteratorBase::RegInfo
+MachineOperandIteratorBase::VirtRegInfo
 MachineOperandIteratorBase::analyzeVirtReg(unsigned Reg,
                     SmallVectorImpl<std::pair<MachineInstr*, unsigned> > *Ops) {
-  RegInfo RI = { false, false, false };
+  VirtRegInfo RI = { false, false, false };
   for(; isValid(); ++*this) {
     MachineOperand &MO = deref();
     if (!MO.isReg() || MO.getReg() != Reg)
@@ -276,3 +276,55 @@ MachineOperandIteratorBase::analyzeVirtReg(unsigned Reg,
   }
   return RI;
 }
+
+MachineOperandIteratorBase::PhysRegInfo
+MachineOperandIteratorBase::analyzePhysReg(unsigned Reg,
+                                           const TargetRegisterInfo *TRI) {
+  bool AllDefsDead = true;
+  PhysRegInfo PRI = {false, false, false, false, false, false};
+
+  assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
+         "analyzePhysReg not given a physical register!");
+  for (; isValid(); ++*this) {
+    MachineOperand &MO = deref();
+
+    if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
+      PRI.Clobbers = true;    // Regmask clobbers Reg.
+
+    if (!MO.isReg())
+      continue;
+
+    unsigned MOReg = MO.getReg();
+    if (!MOReg || !TargetRegisterInfo::isPhysicalRegister(MOReg))
+      continue;
+
+    bool IsRegOrSuperReg = MOReg == Reg || TRI->isSubRegister(MOReg, Reg);
+    bool IsRegOrOverlapping = MOReg == Reg || TRI->regsOverlap(MOReg, Reg);
+
+    if (IsRegOrSuperReg && MO.readsReg()) {
+      // Reg or a super-reg is read, and perhaps killed also.
+      PRI.Reads = true;
+      PRI.Kills = MO.isKill();
+    }
+
+    if (IsRegOrOverlapping && MO.readsReg()) {
+      PRI.ReadsOverlap = true;// Reg or an overlapping register is read.
+    }
+
+    if (!MO.isDef())
+      continue;
+
+    if (IsRegOrSuperReg) {
+      PRI.Defines = true;     // Reg or a super-register is defined.
+      if (!MO.isDead())
+        AllDefsDead = false;
+    }
+    if (IsRegOrOverlapping)
+      PRI.Clobbers = true;    // Reg or an overlapping reg is defined.
+  }
+
+  if (AllDefsDead && PRI.Defines)
+    PRI.DefinesDead = true;   // Reg or super-register was defined and was dead.
+
+  return PRI;
+}
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 169443e..760cf8a 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -22,6 +22,10 @@
 
 #define DEBUG_TYPE "machine-licm"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
@@ -29,17 +33,13 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/MC/MCInstrItineraries.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 static cl::opt<bool>
@@ -780,7 +780,7 @@ MachineLICM::getRegisterClassIDAndCost(const MachineInstr *MI,
                                        unsigned Reg, unsigned OpIdx,
                                        unsigned &RCId, unsigned &RCCost) const {
   const TargetRegisterClass *RC = MRI->getRegClass(Reg);
-  EVT VT = *RC->vt_begin();
+  MVT VT = *RC->vt_begin();
   if (VT == MVT::Untyped) {
     RCId = RC->getID();
     RCCost = 1;
diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp
index 05d2f2a..4e2cfdc 100644
--- a/lib/CodeGen/MachineLoopInfo.cpp
+++ b/lib/CodeGen/MachineLoopInfo.cpp
@@ -15,9 +15,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/Analysis/LoopInfoImpl.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Analysis/LoopInfoImpl.h"
 #include "llvm/Support/Debug.h"
 using namespace llvm;
 
@@ -74,7 +74,7 @@ MachineBasicBlock *MachineLoop::getBottomBlock() {
   return BotMBB;
 }
 
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void MachineLoop::dump() const {
   print(dbgs());
 }
diff --git a/lib/CodeGen/MachineLoopRanges.cpp b/lib/CodeGen/MachineLoopRanges.cpp
deleted file mode 100644
index 17fe67f..0000000
--- a/lib/CodeGen/MachineLoopRanges.cpp
+++ /dev/null
@@ -1,116 +0,0 @@
-//===- MachineLoopRanges.cpp - Ranges of machine loops --------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides the implementation of the MachineLoopRanges analysis.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CodeGen/MachineLoopRanges.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/Passes.h"
-
-using namespace llvm;
-
-char MachineLoopRanges::ID = 0;
-INITIALIZE_PASS_BEGIN(MachineLoopRanges, "machine-loop-ranges",
-                "Machine Loop Ranges", true, true)
-INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_END(MachineLoopRanges, "machine-loop-ranges",
-                "Machine Loop Ranges", true, true)
-
-char &llvm::MachineLoopRangesID = MachineLoopRanges::ID;
-
-void MachineLoopRanges::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.setPreservesAll();
-  AU.addRequiredTransitive<SlotIndexes>();
-  AU.addRequiredTransitive<MachineLoopInfo>();
-  MachineFunctionPass::getAnalysisUsage(AU);
-}
-
-/// runOnMachineFunction - Don't do much, loop ranges are computed on demand.
-bool MachineLoopRanges::runOnMachineFunction(MachineFunction &) {
-  releaseMemory();
-  Indexes = &getAnalysis<SlotIndexes>();
-  return false;
-}
-
-void MachineLoopRanges::releaseMemory() {
-  DeleteContainerSeconds(Cache);
-  Cache.clear();
-}
-
-MachineLoopRange *MachineLoopRanges::getLoopRange(const MachineLoop *Loop) {
-  MachineLoopRange *&Range = Cache[Loop];
-  if (!Range)
-    Range = new MachineLoopRange(Loop, Allocator, *Indexes);
-  return Range;
-}
-
-/// Create a MachineLoopRange, only accessible to MachineLoopRanges.
-MachineLoopRange::MachineLoopRange(const MachineLoop *loop,
-                                   MachineLoopRange::Allocator &alloc,
-                                   SlotIndexes &Indexes)
-  : Loop(loop), Intervals(alloc), Area(0) {
-  // Compute loop coverage.
-  for (MachineLoop::block_iterator I = Loop->block_begin(),
-         E = Loop->block_end(); I != E; ++I) {
-    const std::pair<SlotIndex, SlotIndex> &Range = Indexes.getMBBRange(*I);
-    Intervals.insert(Range.first, Range.second, 1u);
-    Area += Range.first.distance(Range.second);
-  }
-}
-
-/// overlaps - Return true if this loop overlaps the given range of machine
-/// instructions.
-bool MachineLoopRange::overlaps(SlotIndex Start, SlotIndex Stop) {
-  Map::const_iterator I = Intervals.find(Start);
-  return I.valid() && Stop > I.start();
-}
-
-unsigned MachineLoopRange::getNumber() const {
-  return Loop->getHeader()->getNumber();
-}
-
-/// byNumber - Comparator for array_pod_sort that sorts a list of
-/// MachineLoopRange pointers by number.
-int MachineLoopRange::byNumber(const void *pa, const void *pb) {
-  const MachineLoopRange *a = *static_cast<MachineLoopRange *const *>(pa);
-  const MachineLoopRange *b = *static_cast<MachineLoopRange *const *>(pb);
-  unsigned na = a->getNumber();
-  unsigned nb = b->getNumber();
-  if (na < nb)
-    return -1;
-  if (na > nb)
-    return 1;
-  return 0;
-}
-
-/// byAreaDesc - Comparator for array_pod_sort that sorts a list of
-/// MachineLoopRange pointers by:
-/// 1. Descending area.
-/// 2. Ascending number.
-int MachineLoopRange::byAreaDesc(const void *pa, const void *pb) {
-  const MachineLoopRange *a = *static_cast<MachineLoopRange *const *>(pa);
-  const MachineLoopRange *b = *static_cast<MachineLoopRange *const *>(pb);
-  if (a->getArea() != b->getArea())
-    return a->getArea() > b->getArea() ? -1 : 1;
-  return byNumber(pa, pb);
-}
-
-void MachineLoopRange::print(raw_ostream &OS) const {
-  OS << "Loop#" << getNumber() << " =";
-  for (Map::const_iterator I = Intervals.begin(); I.valid(); ++I)
-    OS << " [" << I.start() << ';' << I.stop() << ')';
-}
-
-raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineLoopRange &MLR) {
-  MLR.print(OS);
-  return OS;
-}
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index ea98b23..0ea9ae0 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -8,24 +8,23 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/MachineModuleInfo.h"
-
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Module.h"
+#include "llvm/ADT/PointerUnion.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
 #include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/ADT/PointerUnion.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
 using namespace llvm::dwarf;
 
-// Handle the Pass registration stuff necessary to use TargetData's.
+// Handle the Pass registration stuff necessary to use DataLayout's.
 INITIALIZE_PASS(MachineModuleInfo, "machinemoduleinfo",
                 "Machine Module Information", false, false)
 char MachineModuleInfo::ID = 0;
@@ -254,15 +253,8 @@ void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) {
 MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI,
                                      const MCRegisterInfo &MRI,
                                      const MCObjectFileInfo *MOFI)
-  : ImmutablePass(ID), Context(MAI, MRI, MOFI),
-    ObjFileMMI(0), CompactUnwindEncoding(0), CurCallSite(0), CallsEHReturn(0),
-    CallsUnwindInit(0), DbgInfoAvailable(false),
-    UsesVAFloatArgument(false) {
+  : ImmutablePass(ID), Context(MAI, MRI, MOFI, 0, false) {
   initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
-  // Always emit some info, by default "no personality" info.
-  Personalities.push_back(NULL);
-  AddrLabelSymbols = 0;
-  TheModule = 0;
 }
 
 MachineModuleInfo::MachineModuleInfo()
@@ -274,26 +266,36 @@ MachineModuleInfo::MachineModuleInfo()
 }
 
 MachineModuleInfo::~MachineModuleInfo() {
-  delete ObjFileMMI;
+}
 
-  // FIXME: Why isn't doFinalization being called??
-  //assert(AddrLabelSymbols == 0 && "doFinalization not called");
-  delete AddrLabelSymbols;
+bool MachineModuleInfo::doInitialization(Module &M) {
+
+  ObjFileMMI = 0;
+  CompactUnwindEncoding = 0;
+  CurCallSite = 0;
+  CallsEHReturn = 0;
+  CallsUnwindInit = 0;
+  DbgInfoAvailable = UsesVAFloatArgument = false; 
+  // Always emit some info, by default "no personality" info.
+  Personalities.push_back(NULL);
   AddrLabelSymbols = 0;
-}
+  TheModule = 0;
 
-/// doInitialization - Initialize the state for a new module.
-///
-bool MachineModuleInfo::doInitialization() {
-  assert(AddrLabelSymbols == 0 && "Improperly initialized");
   return false;
 }
 
-/// doFinalization - Tear down the state after completion of a module.
-///
-bool MachineModuleInfo::doFinalization() {
+bool MachineModuleInfo::doFinalization(Module &M) {
+
+  Personalities.clear();
+
   delete AddrLabelSymbols;
   AddrLabelSymbols = 0;
+
+  Context.reset();
+
+  delete ObjFileMMI;
+  ObjFileMMI = 0;
+
   return false;
 }
 
diff --git a/lib/CodeGen/MachineModuleInfoImpls.cpp b/lib/CodeGen/MachineModuleInfoImpls.cpp
index 5ab56c0..a1c7e9f 100644
--- a/lib/CodeGen/MachineModuleInfoImpls.cpp
+++ b/lib/CodeGen/MachineModuleInfoImpls.cpp
@@ -21,8 +21,8 @@ using namespace llvm;
 //===----------------------------------------------------------------------===//
 
 // Out of line virtual method.
-void MachineModuleInfoMachO::Anchor() {}
-void MachineModuleInfoELF::Anchor() {}
+void MachineModuleInfoMachO::anchor() {}
+void MachineModuleInfoELF::anchor() {}
 
 static int SortSymbolPair(const void *LHS, const void *RHS) {
   typedef std::pair<MCSymbol*, MachineModuleInfoImpl::StubValueTy> PairTy;
diff --git a/lib/CodeGen/MachinePostDominators.cpp b/lib/CodeGen/MachinePostDominators.cpp
new file mode 100644
index 0000000..c3f6e92
--- /dev/null
+++ b/lib/CodeGen/MachinePostDominators.cpp
@@ -0,0 +1,55 @@
+//===- MachinePostDominators.cpp -Machine Post Dominator Calculation ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements simple dominator construction algorithms for finding
+// post dominators on machine functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachinePostDominators.h"
+
+using namespace llvm;
+
+char MachinePostDominatorTree::ID = 0;
+
+//declare initializeMachinePostDominatorTreePass
+INITIALIZE_PASS(MachinePostDominatorTree, "machinepostdomtree",
+                "MachinePostDominator Tree Construction", true, true)
+
+MachinePostDominatorTree::MachinePostDominatorTree() : MachineFunctionPass(ID) {
+  initializeMachinePostDominatorTreePass(*PassRegistry::getPassRegistry());
+  DT = new DominatorTreeBase<MachineBasicBlock>(true); //true indicate
+                                                       // postdominator
+}
+
+FunctionPass *
+MachinePostDominatorTree::createMachinePostDominatorTreePass() {
+  return new MachinePostDominatorTree();
+}
+
+bool
+MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) {
+  DT->recalculate(F);
+  return false;
+}
+
+MachinePostDominatorTree::~MachinePostDominatorTree() {
+  delete DT;
+}
+
+void
+MachinePostDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void
+MachinePostDominatorTree::print(llvm::raw_ostream &OS, const Module *M) const {
+  DT->print(OS);
+}
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 5fb938f..21877e5 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -21,7 +21,7 @@ MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI)
   : TRI(&TRI), IsSSA(true), TracksLiveness(true) {
   VRegInfo.reserve(256);
   RegAllocHints.reserve(256);
-  UsedPhysRegs.resize(TRI.getNumRegs());
+  UsedRegUnits.resize(TRI.getNumRegUnits());
   UsedPhysRegMask.resize(TRI.getNumRegs());
 
   // Create the physreg use/def lists.
@@ -30,12 +30,6 @@ MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI)
 }
 
 MachineRegisterInfo::~MachineRegisterInfo() {
-#ifndef NDEBUG
-  clearVirtRegs();
-  for (unsigned i = 0, e = UsedPhysRegs.size(); i != e; ++i)
-    assert(!PhysRegUseDefLists[i] &&
-           "PhysRegUseDefLists has entries after all instructions are deleted");
-#endif
   delete [] PhysRegUseDefLists;
 }
 
@@ -180,6 +174,55 @@ void MachineRegisterInfo::removeRegOperandFromUseList(MachineOperand *MO) {
   MO->Contents.Reg.Next = 0;
 }
 
+/// Move NumOps operands from Src to Dst, updating use-def lists as needed.
+///
+/// The Dst range is assumed to be uninitialized memory. (Or it may contain
+/// operands that won't be destroyed, which is OK because the MO destructor is
+/// trivial anyway).
+///
+/// The Src and Dst ranges may overlap.
+void MachineRegisterInfo::moveOperands(MachineOperand *Dst,
+                                       MachineOperand *Src,
+                                       unsigned NumOps) {
+  assert(Src != Dst && NumOps && "Noop moveOperands");
+
+  // Copy backwards if Dst is within the Src range.
+  int Stride = 1;
+  if (Dst >= Src && Dst < Src + NumOps) {
+    Stride = -1;
+    Dst += NumOps - 1;
+    Src += NumOps - 1;
+  }
+
+  // Copy one operand at a time.
+  do {
+    new (Dst) MachineOperand(*Src);
+
+    // Dst takes Src's place in the use-def chain.
+    if (Src->isReg()) {
+      MachineOperand *&Head = getRegUseDefListHead(Src->getReg());
+      MachineOperand *Prev = Src->Contents.Reg.Prev;
+      MachineOperand *Next = Src->Contents.Reg.Next;
+      assert(Head && "List empty, but operand is chained");
+      assert(Prev && "Operand was not on use-def list");
+
+      // Prev links are circular, next link is NULL instead of looping back to
+      // Head.
+      if (Src == Head)
+        Head = Dst;
+      else
+        Prev->Contents.Reg.Next = Dst;
+
+      // Update Prev pointer. This also works when Src was pointing to itself
+      // in a 1-element list. In that case Head == Dst.
+      (Next ? Next : Head)->Contents.Reg.Prev = Dst;
+    }
+
+    Dst += Stride;
+    Src += Stride;
+  } while (--NumOps);
+}
+
 /// replaceRegWith - Replace all instances of FromReg with ToReg in the
 /// machine function.  This is like llvm-level X->replaceAllUsesWith(Y),
 /// except that it also changes any definitions of the register as well.
@@ -306,22 +349,18 @@ void MachineRegisterInfo::dumpUses(unsigned Reg) const {
 
 void MachineRegisterInfo::freezeReservedRegs(const MachineFunction &MF) {
   ReservedRegs = TRI->getReservedRegs(MF);
+  assert(ReservedRegs.size() == TRI->getNumRegs() &&
+         "Invalid ReservedRegs vector from target");
 }
 
 bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg,
                                             const MachineFunction &MF) const {
   assert(TargetRegisterInfo::isPhysicalRegister(PhysReg));
 
-  // Check if any overlapping register is modified.
-  for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI)
-    if (!def_empty(*AI))
-      return false;
-
-  // Check if any overlapping register is allocatable so it may be used later.
-  if (AllocatableRegs.empty())
-    AllocatableRegs = TRI->getAllocatableSet(MF);
+  // Check if any overlapping register is modified, or allocatable so it may be
+  // used later.
   for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI)
-    if (AllocatableRegs.test(*AI))
+    if (!def_empty(*AI) || isAllocatable(*AI))
       return false;
   return true;
 }
diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp
index 076547a..bb6aad7 100644
--- a/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/lib/CodeGen/MachineSSAUpdater.cpp
@@ -13,19 +13,19 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/MachineSSAUpdater.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/AlignOf.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Transforms/Utils/SSAUpdaterImpl.h"
 using namespace llvm;
 
@@ -109,7 +109,7 @@ unsigned LookForIdenticalPHI(MachineBasicBlock *BB,
 /// a value of the given register class at the start of the specified basic
 /// block. It returns the virtual register defined by the instruction.
 static
-MachineInstr *InsertNewDef(unsigned Opcode,
+MachineInstrBuilder InsertNewDef(unsigned Opcode,
                            MachineBasicBlock *BB, MachineBasicBlock::iterator I,
                            const TargetRegisterClass *RC,
                            MachineRegisterInfo *MRI,
@@ -183,13 +183,12 @@ unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) {
 
   // Otherwise, we do need a PHI: insert one now.
   MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->begin();
-  MachineInstr *InsertedPHI = InsertNewDef(TargetOpcode::PHI, BB,
-                                           Loc, VRC, MRI, TII);
+  MachineInstrBuilder InsertedPHI = InsertNewDef(TargetOpcode::PHI, BB,
+                                                 Loc, VRC, MRI, TII);
 
   // Fill in all the predecessors of the PHI.
-  MachineInstrBuilder MIB(InsertedPHI);
   for (unsigned i = 0, e = PredValues.size(); i != e; ++i)
-    MIB.addReg(PredValues[i].second).addMBB(PredValues[i].first);
+    InsertedPHI.addReg(PredValues[i].second).addMBB(PredValues[i].first);
 
   // See if the PHI node can be merged to a single value.  This can happen in
   // loop cases when we get a PHI of itself and one other value.
@@ -316,8 +315,7 @@ public:
   /// the specified predecessor block.
   static void AddPHIOperand(MachineInstr *PHI, unsigned Val,
                             MachineBasicBlock *Pred) {
-    PHI->addOperand(MachineOperand::CreateReg(Val, false));
-    PHI->addOperand(MachineOperand::CreateMBB(Pred));
+    MachineInstrBuilder(*Pred->getParent(), PHI).addReg(Val).addMBB(Pred);
   }
 
   /// InstrIsPHI - Check if an instruction is a PHI.
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index 4704dae..117b2bd 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -14,31 +14,29 @@
 
 #define DEBUG_TYPE "misched"
 
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/PriorityQueue.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
-#include "llvm/CodeGen/RegisterPressure.h"
-#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/ScheduleDFS.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/MC/MCInstrItineraries.h"
-#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/PriorityQueue.h"
-
 #include <queue>
 
 using namespace llvm;
 
-static cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
-                                  cl::desc("Force top-down list scheduling"));
-static cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
-                                  cl::desc("Force bottom-up list scheduling"));
+namespace llvm {
+cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
+                           cl::desc("Force top-down list scheduling"));
+cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
+                            cl::desc("Force bottom-up list scheduling"));
+}
 
 #ifndef NDEBUG
 static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
@@ -50,6 +48,23 @@ static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
 static bool ViewMISchedDAGs = false;
 #endif // NDEBUG
 
+// Threshold to very roughly model an out-of-order processor's instruction
+// buffers. If the actual value of this threshold matters much in practice, then
+// it can be specified by the machine model. For now, it's an experimental
+// tuning knob to determine when and if it matters.
+static cl::opt<unsigned> ILPWindow("ilp-window", cl::Hidden,
+  cl::desc("Allow expected latency to exceed the critical path by N cycles "
+           "before attempting to balance ILP"),
+  cl::init(10U));
+
+// Experimental heuristics
+static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
+  cl::desc("Enable load clustering."), cl::init(true));
+
+// Experimental heuristics
+static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
+  cl::desc("Enable scheduling for macro fusion."), cl::init(true));
+
 //===----------------------------------------------------------------------===//
 // Machine Instruction Scheduling Pass and Registry
 //===----------------------------------------------------------------------===//
@@ -221,7 +236,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
     // The Scheduler may insert instructions during either schedule() or
     // exitRegion(), even for empty regions. So the local iterators 'I' and
     // 'RegionEnd' are invalid across these calls.
-    unsigned RemainingCount = MBB->size();
+    unsigned RemainingInstrs = MBB->size();
     for(MachineBasicBlock::iterator RegionEnd = MBB->end();
         RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) {
 
@@ -230,19 +245,19 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
           || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) {
         --RegionEnd;
         // Count the boundary instruction.
-        --RemainingCount;
+        --RemainingInstrs;
       }
 
       // The next region starts above the previous region. Look backward in the
       // instruction stream until we find the nearest boundary.
       MachineBasicBlock::iterator I = RegionEnd;
-      for(;I != MBB->begin(); --I, --RemainingCount) {
+      for(;I != MBB->begin(); --I, --RemainingInstrs) {
         if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF))
           break;
       }
       // Notify the scheduler of the region, even if we may skip scheduling
       // it. Perhaps it still needs to be bundled.
-      Scheduler->enterRegion(MBB, I, RegionEnd, RemainingCount);
+      Scheduler->enterRegion(MBB, I, RegionEnd, RemainingInstrs);
 
       // Skip empty scheduling regions (0 or 1 schedulable instructions).
       if (I == RegionEnd || I == llvm::prior(RegionEnd)) {
@@ -256,7 +271,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
             << ":BB#" << MBB->getNumber() << "\n  From: " << *I << "    To: ";
             if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
             else dbgs() << "End";
-            dbgs() << " Remaining: " << RemainingCount << "\n");
+            dbgs() << " Remaining: " << RemainingInstrs << "\n");
 
       // Schedule a region: possibly reorder instructions.
       // This invalidates 'RegionEnd' and 'I'.
@@ -269,7 +284,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
       // scheduler for the top of it's scheduled region.
       RegionEnd = Scheduler->begin();
     }
-    assert(RemainingCount == 0 && "Instruction count mismatch!");
+    assert(RemainingInstrs == 0 && "Instruction count mismatch!");
     Scheduler->finishBlock();
   }
   Scheduler->finalizeSchedule();
@@ -281,156 +296,32 @@ void MachineScheduler::print(raw_ostream &O, const Module* m) const {
   // unimplemented
 }
 
-//===----------------------------------------------------------------------===//
-// MachineSchedStrategy - Interface to a machine scheduling algorithm.
-//===----------------------------------------------------------------------===//
-
-namespace {
-class ScheduleDAGMI;
-
-/// MachineSchedStrategy - Interface used by ScheduleDAGMI to drive the selected
-/// scheduling algorithm.
-///
-/// If this works well and targets wish to reuse ScheduleDAGMI, we may expose it
-/// in ScheduleDAGInstrs.h
-class MachineSchedStrategy {
-public:
-  virtual ~MachineSchedStrategy() {}
-
-  /// Initialize the strategy after building the DAG for a new region.
-  virtual void initialize(ScheduleDAGMI *DAG) = 0;
-
-  /// Pick the next node to schedule, or return NULL. Set IsTopNode to true to
-  /// schedule the node at the top of the unscheduled region. Otherwise it will
-  /// be scheduled at the bottom.
-  virtual SUnit *pickNode(bool &IsTopNode) = 0;
-
-  /// Notify MachineSchedStrategy that ScheduleDAGMI has scheduled a node.
-  virtual void schedNode(SUnit *SU, bool IsTopNode) = 0;
-
-  /// When all predecessor dependencies have been resolved, free this node for
-  /// top-down scheduling.
-  virtual void releaseTopNode(SUnit *SU) = 0;
-  /// When all successor dependencies have been resolved, free this node for
-  /// bottom-up scheduling.
-  virtual void releaseBottomNode(SUnit *SU) = 0;
-};
-} // namespace
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void ReadyQueue::dump() {
+  dbgs() << Name << ": ";
+  for (unsigned i = 0, e = Queue.size(); i < e; ++i)
+    dbgs() << Queue[i]->NodeNum << " ";
+  dbgs() << "\n";
+}
+#endif
 
 //===----------------------------------------------------------------------===//
 // ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals
 // preservation.
 //===----------------------------------------------------------------------===//
 
-namespace {
-/// ScheduleDAGMI is an implementation of ScheduleDAGInstrs that schedules
-/// machine instructions while updating LiveIntervals.
-class ScheduleDAGMI : public ScheduleDAGInstrs {
-  AliasAnalysis *AA;
-  RegisterClassInfo *RegClassInfo;
-  MachineSchedStrategy *SchedImpl;
-
-  MachineBasicBlock::iterator LiveRegionEnd;
-
-  /// Register pressure in this region computed by buildSchedGraph.
-  IntervalPressure RegPressure;
-  RegPressureTracker RPTracker;
-
-  /// List of pressure sets that exceed the target's pressure limit before
-  /// scheduling, listed in increasing set ID order. Each pressure set is paired
-  /// with its max pressure in the currently scheduled regions.
-  std::vector<PressureElement> RegionCriticalPSets;
-
-  /// The top of the unscheduled zone.
-  MachineBasicBlock::iterator CurrentTop;
-  IntervalPressure TopPressure;
-  RegPressureTracker TopRPTracker;
-
-  /// The bottom of the unscheduled zone.
-  MachineBasicBlock::iterator CurrentBottom;
-  IntervalPressure BotPressure;
-  RegPressureTracker BotRPTracker;
-
-#ifndef NDEBUG
-  /// The number of instructions scheduled so far. Used to cut off the
-  /// scheduler at the point determined by misched-cutoff.
-  unsigned NumInstrsScheduled;
-#endif
-public:
-  ScheduleDAGMI(MachineSchedContext *C, MachineSchedStrategy *S):
-    ScheduleDAGInstrs(*C->MF, *C->MLI, *C->MDT, /*IsPostRA=*/false, C->LIS),
-    AA(C->AA), RegClassInfo(C->RegClassInfo), SchedImpl(S),
-    RPTracker(RegPressure), CurrentTop(), TopRPTracker(TopPressure),
-    CurrentBottom(), BotRPTracker(BotPressure) {
-#ifndef NDEBUG
-    NumInstrsScheduled = 0;
-#endif
-  }
-
-  ~ScheduleDAGMI() {
-    delete SchedImpl;
+bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
+  if (SuccSU != &ExitSU) {
+    // Do not use WillCreateCycle, it assumes SD scheduling.
+    // If Pred is reachable from Succ, then the edge creates a cycle.
+    if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
+      return false;
+    Topo.AddPred(SuccSU, PredDep.getSUnit());
   }
-
-  MachineBasicBlock::iterator top() const { return CurrentTop; }
-  MachineBasicBlock::iterator bottom() const { return CurrentBottom; }
-
-  /// Implement the ScheduleDAGInstrs interface for handling the next scheduling
-  /// region. This covers all instructions in a block, while schedule() may only
-  /// cover a subset.
-  void enterRegion(MachineBasicBlock *bb,
-                   MachineBasicBlock::iterator begin,
-                   MachineBasicBlock::iterator end,
-                   unsigned endcount);
-
-  /// Implement ScheduleDAGInstrs interface for scheduling a sequence of
-  /// reorderable instructions.
-  void schedule();
-
-  /// Get current register pressure for the top scheduled instructions.
-  const IntervalPressure &getTopPressure() const { return TopPressure; }
-  const RegPressureTracker &getTopRPTracker() const { return TopRPTracker; }
-
-  /// Get current register pressure for the bottom scheduled instructions.
-  const IntervalPressure &getBotPressure() const { return BotPressure; }
-  const RegPressureTracker &getBotRPTracker() const { return BotRPTracker; }
-
-  /// Get register pressure for the entire scheduling region before scheduling.
-  const IntervalPressure &getRegPressure() const { return RegPressure; }
-
-  const std::vector<PressureElement> &getRegionCriticalPSets() const {
-    return RegionCriticalPSets;
-  }
-
-  /// getIssueWidth - Return the max instructions per scheduling group.
-  unsigned getIssueWidth() const {
-    return (InstrItins && InstrItins->SchedModel)
-      ? InstrItins->SchedModel->IssueWidth : 1;
-  }
-
-  /// getNumMicroOps - Return the number of issue slots required for this MI.
-  unsigned getNumMicroOps(MachineInstr *MI) const {
-    if (!InstrItins) return 1;
-    int UOps = InstrItins->getNumMicroOps(MI->getDesc().getSchedClass());
-    return (UOps >= 0) ? UOps : TII->getNumMicroOps(InstrItins, MI);
-  }
-
-protected:
-  void initRegPressure();
-  void updateScheduledPressure(std::vector<unsigned> NewMaxPressure);
-
-  void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos);
-  bool checkSchedLimit();
-
-  void releaseRoots();
-
-  void releaseSucc(SUnit *SU, SDep *SuccEdge);
-  void releaseSuccessors(SUnit *SU);
-  void releasePred(SUnit *SU, SDep *PredEdge);
-  void releasePredecessors(SUnit *SU);
-
-  void placeDebugValues();
-};
-} // namespace
+  SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
+  // Return true regardless of whether a new edge needed to be inserted.
+  return true;
+}
 
 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
 /// NumPredsLeft reaches zero, release the successor node.
@@ -439,6 +330,12 @@ protected:
 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
   SUnit *SuccSU = SuccEdge->getSUnit();
 
+  if (SuccEdge->isWeak()) {
+    --SuccSU->WeakPredsLeft;
+    if (SuccEdge->isCluster())
+      NextClusterSucc = SuccSU;
+    return;
+  }
 #ifndef NDEBUG
   if (SuccSU->NumPredsLeft == 0) {
     dbgs() << "*** Scheduling failed! ***\n";
@@ -467,6 +364,12 @@ void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
   SUnit *PredSU = PredEdge->getSUnit();
 
+  if (PredEdge->isWeak()) {
+    --PredSU->WeakSuccsLeft;
+    if (PredEdge->isCluster())
+      NextClusterPred = PredSU;
+    return;
+  }
 #ifndef NDEBUG
   if (PredSU->NumSuccsLeft == 0) {
     dbgs() << "*** Scheduling failed! ***\n";
@@ -498,7 +401,7 @@ void ScheduleDAGMI::moveInstruction(MachineInstr *MI,
   BB->splice(InsertPos, BB, MI);
 
   // Update LiveIntervals
-  LIS->handleMove(MI);
+  LIS->handleMove(MI, /*UpdateFlags=*/true);
 
   // Recede RegionBegin if an instruction moves above the first.
   if (RegionBegin == InsertPos)
@@ -565,6 +468,9 @@ void ScheduleDAGMI::initRegPressure() {
   std::vector<unsigned> RegionPressure = RPTracker.getPressure().MaxSetPressure;
   for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
     unsigned Limit = TRI->getRegPressureSetLimit(i);
+    DEBUG(dbgs() << TRI->getRegPressureSetName(i)
+          << "Limit " << Limit
+          << " Actual " << RegionPressure[i] << "\n");
     if (RegionPressure[i] > Limit)
       RegionCriticalPSets.push_back(PressureElement(i, 0));
   }
@@ -587,30 +493,54 @@ updateScheduledPressure(std::vector<unsigned> NewMaxPressure) {
   }
 }
 
-// Release all DAG roots for scheduling.
-void ScheduleDAGMI::releaseRoots() {
-  SmallVector<SUnit*, 16> BotRoots;
-
-  for (std::vector<SUnit>::iterator
-         I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
-    // A SUnit is ready to top schedule if it has no predecessors.
-    if (I->Preds.empty())
-      SchedImpl->releaseTopNode(&(*I));
-    // A SUnit is ready to bottom schedule if it has no successors.
-    if (I->Succs.empty())
-      BotRoots.push_back(&(*I));
-  }
-  // Release bottom roots in reverse order so the higher priority nodes appear
-  // first. This is more natural and slightly more efficient.
-  for (SmallVectorImpl<SUnit*>::const_reverse_iterator
-         I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I)
-    SchedImpl->releaseBottomNode(*I);
-}
-
 /// schedule - Called back from MachineScheduler::runOnMachineFunction
 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
 /// only includes instructions that have DAG nodes, not scheduling boundaries.
+///
+/// This is a skeletal driver, with all the functionality pushed into helpers,
+/// so that it can be easilly extended by experimental schedulers. Generally,
+/// implementing MachineSchedStrategy should be sufficient to implement a new
+/// scheduling algorithm. However, if a scheduler further subclasses
+/// ScheduleDAGMI then it will want to override this virtual method in order to
+/// update any specialized state.
 void ScheduleDAGMI::schedule() {
+  buildDAGWithRegPressure();
+
+  Topo.InitDAGTopologicalSorting();
+
+  postprocessDAG();
+
+  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+          SUnits[su].dumpAll(this));
+
+  if (ViewMISchedDAGs) viewGraph();
+
+  initQueues();
+
+  bool IsTopNode = false;
+  while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
+    assert(!SU->isScheduled && "Node already scheduled");
+    if (!checkSchedLimit())
+      break;
+
+    scheduleMI(SU, IsTopNode);
+
+    updateQueues(SU, IsTopNode);
+  }
+  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
+
+  placeDebugValues();
+
+  DEBUG({
+      unsigned BBNum = begin()->getParent()->getNumber();
+      dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
+      dumpSchedule();
+      dbgs() << '\n';
+    });
+}
+
+/// Build the DAG and setup three register pressure trackers.
+void ScheduleDAGMI::buildDAGWithRegPressure() {
   // Initialize the register pressure tracker used by buildSchedGraph.
   RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
 
@@ -620,79 +550,118 @@ void ScheduleDAGMI::schedule() {
 
   // Build the DAG, and compute current register pressure.
   buildSchedGraph(AA, &RPTracker);
+  if (ViewMISchedDAGs) viewGraph();
 
   // Initialize top/bottom trackers after computing region pressure.
   initRegPressure();
+}
 
-  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
-          SUnits[su].dumpAll(this));
+/// Apply each ScheduleDAGMutation step in order.
+void ScheduleDAGMI::postprocessDAG() {
+  for (unsigned i = 0, e = Mutations.size(); i < e; ++i) {
+    Mutations[i]->apply(this);
+  }
+}
 
-  if (ViewMISchedDAGs) viewGraph();
+// Release all DAG roots for scheduling.
+//
+// Nodes with unreleased weak edges can still be roots.
+void ScheduleDAGMI::releaseRoots() {
+  SmallVector<SUnit*, 16> BotRoots;
 
+  for (std::vector<SUnit>::iterator
+         I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
+    SUnit *SU = &(*I);
+    // A SUnit is ready to top schedule if it has no predecessors.
+    if (!I->NumPredsLeft && SU != &EntrySU)
+      SchedImpl->releaseTopNode(SU);
+    // A SUnit is ready to bottom schedule if it has no successors.
+    if (!I->NumSuccsLeft && SU != &ExitSU)
+      BotRoots.push_back(SU);
+  }
+  // Release bottom roots in reverse order so the higher priority nodes appear
+  // first. This is more natural and slightly more efficient.
+  for (SmallVectorImpl<SUnit*>::const_reverse_iterator
+         I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I)
+    SchedImpl->releaseBottomNode(*I);
+}
+
+/// Identify DAG roots and setup scheduler queues.
+void ScheduleDAGMI::initQueues() {
+  NextClusterSucc = NULL;
+  NextClusterPred = NULL;
+
+  // Initialize the strategy before modifying the DAG.
   SchedImpl->initialize(this);
 
-  // Release edges from the special Entry node or to the special Exit node.
+  // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
+  releaseRoots();
+
   releaseSuccessors(&EntrySU);
   releasePredecessors(&ExitSU);
 
-  // Release all DAG roots for scheduling.
-  releaseRoots();
+  SchedImpl->registerRoots();
 
+  // Advance past initial DebugValues.
+  assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
   CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
-  CurrentBottom = RegionEnd;
-  bool IsTopNode = false;
-  while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
-    if (!checkSchedLimit())
-      break;
-
-    // Move the instruction to its new location in the instruction stream.
-    MachineInstr *MI = SU->getInstr();
+  TopRPTracker.setPos(CurrentTop);
 
-    if (IsTopNode) {
-      assert(SU->isTopReady() && "node still has unscheduled dependencies");
-      if (&*CurrentTop == MI)
-        CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
-      else {
-        moveInstruction(MI, CurrentTop);
-        TopRPTracker.setPos(MI);
-      }
+  CurrentBottom = RegionEnd;
+}
 
-      // Update top scheduled pressure.
-      TopRPTracker.advance();
-      assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
-      updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure);
+/// Move an instruction and update register pressure.
+void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) {
+  // Move the instruction to its new location in the instruction stream.
+  MachineInstr *MI = SU->getInstr();
 
-      // Release dependent instructions for scheduling.
-      releaseSuccessors(SU);
+  if (IsTopNode) {
+    assert(SU->isTopReady() && "node still has unscheduled dependencies");
+    if (&*CurrentTop == MI)
+      CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
+    else {
+      moveInstruction(MI, CurrentTop);
+      TopRPTracker.setPos(MI);
     }
+
+    // Update top scheduled pressure.
+    TopRPTracker.advance();
+    assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
+    updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure);
+  }
+  else {
+    assert(SU->isBottomReady() && "node still has unscheduled dependencies");
+    MachineBasicBlock::iterator priorII =
+      priorNonDebug(CurrentBottom, CurrentTop);
+    if (&*priorII == MI)
+      CurrentBottom = priorII;
     else {
-      assert(SU->isBottomReady() && "node still has unscheduled dependencies");
-      MachineBasicBlock::iterator priorII =
-        priorNonDebug(CurrentBottom, CurrentTop);
-      if (&*priorII == MI)
-        CurrentBottom = priorII;
-      else {
-        if (&*CurrentTop == MI) {
-          CurrentTop = nextIfDebug(++CurrentTop, priorII);
-          TopRPTracker.setPos(CurrentTop);
-        }
-        moveInstruction(MI, CurrentBottom);
-        CurrentBottom = MI;
+      if (&*CurrentTop == MI) {
+        CurrentTop = nextIfDebug(++CurrentTop, priorII);
+        TopRPTracker.setPos(CurrentTop);
       }
-      // Update bottom scheduled pressure.
-      BotRPTracker.recede();
-      assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
-      updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure);
-
-      // Release dependent instructions for scheduling.
-      releasePredecessors(SU);
+      moveInstruction(MI, CurrentBottom);
+      CurrentBottom = MI;
     }
-    SU->isScheduled = true;
-    SchedImpl->schedNode(SU, IsTopNode);
+    // Update bottom scheduled pressure.
+    BotRPTracker.recede();
+    assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
+    updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure);
   }
-  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
+}
 
-  placeDebugValues();
+/// Update scheduler queues after scheduling an instruction.
+void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
+  // Release dependent instructions for scheduling.
+  if (IsTopNode)
+    releaseSuccessors(SU);
+  else
+    releasePredecessors(SU);
+
+  SU->isScheduled = true;
+
+  // Notify the scheduling strategy after updating the DAG.
+  SchedImpl->schedNode(SU, IsTopNode);
 }
 
 /// Reinsert any remaining debug_values, just like the PostRA scheduler.
@@ -708,6 +677,8 @@ void ScheduleDAGMI::placeDebugValues() {
     std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
     MachineInstr *DbgValue = P.first;
     MachineBasicBlock::iterator OrigPrevMI = P.second;
+    if (&*RegionBegin == DbgValue)
+      ++RegionBegin;
     BB->splice(++OrigPrevMI, BB, DbgValue);
     if (OrigPrevMI == llvm::prior(RegionEnd))
       RegionEnd = DbgValue;
@@ -716,93 +687,310 @@ void ScheduleDAGMI::placeDebugValues() {
   FirstDbgValue = NULL;
 }
 
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void ScheduleDAGMI::dumpSchedule() const {
+  for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
+    if (SUnit *SU = getSUnit(&(*MI)))
+      SU->dump(this);
+    else
+      dbgs() << "Missing SUnit\n";
+  }
+}
+#endif
+
 //===----------------------------------------------------------------------===//
-// ConvergingScheduler - Implementation of the standard MachineSchedStrategy.
+// LoadClusterMutation - DAG post-processing to cluster loads.
 //===----------------------------------------------------------------------===//
 
 namespace {
-/// ReadyQueue encapsulates vector of "ready" SUnits with basic convenience
-/// methods for pushing and removing nodes. ReadyQueue's are uniquely identified
-/// by an ID. SUnit::NodeQueueId is a mask of the ReadyQueues the SUnit is in.
-class ReadyQueue {
-  unsigned ID;
-  std::string Name;
-  std::vector<SUnit*> Queue;
+/// \brief Post-process the DAG to create cluster edges between neighboring
+/// loads.
+class LoadClusterMutation : public ScheduleDAGMutation {
+  struct LoadInfo {
+    SUnit *SU;
+    unsigned BaseReg;
+    unsigned Offset;
+    LoadInfo(SUnit *su, unsigned reg, unsigned ofs)
+      : SU(su), BaseReg(reg), Offset(ofs) {}
+  };
+  static bool LoadInfoLess(const LoadClusterMutation::LoadInfo &LHS,
+                           const LoadClusterMutation::LoadInfo &RHS);
 
+  const TargetInstrInfo *TII;
+  const TargetRegisterInfo *TRI;
 public:
-  ReadyQueue(unsigned id, const Twine &name): ID(id), Name(name.str()) {}
+  LoadClusterMutation(const TargetInstrInfo *tii,
+                      const TargetRegisterInfo *tri)
+    : TII(tii), TRI(tri) {}
 
-  unsigned getID() const { return ID; }
+  virtual void apply(ScheduleDAGMI *DAG);
+protected:
+  void clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG);
+};
+} // anonymous
+
+bool LoadClusterMutation::LoadInfoLess(
+  const LoadClusterMutation::LoadInfo &LHS,
+  const LoadClusterMutation::LoadInfo &RHS) {
+  if (LHS.BaseReg != RHS.BaseReg)
+    return LHS.BaseReg < RHS.BaseReg;
+  return LHS.Offset < RHS.Offset;
+}
 
-  StringRef getName() const { return Name; }
+void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads,
+                                                  ScheduleDAGMI *DAG) {
+  SmallVector<LoadClusterMutation::LoadInfo,32> LoadRecords;
+  for (unsigned Idx = 0, End = Loads.size(); Idx != End; ++Idx) {
+    SUnit *SU = Loads[Idx];
+    unsigned BaseReg;
+    unsigned Offset;
+    if (TII->getLdStBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI))
+      LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset));
+  }
+  if (LoadRecords.size() < 2)
+    return;
+  std::sort(LoadRecords.begin(), LoadRecords.end(), LoadInfoLess);
+  unsigned ClusterLength = 1;
+  for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) {
+    if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) {
+      ClusterLength = 1;
+      continue;
+    }
 
-  // SU is in this queue if it's NodeQueueID is a superset of this ID.
-  bool isInQueue(SUnit *SU) const { return (SU->NodeQueueId & ID); }
+    SUnit *SUa = LoadRecords[Idx].SU;
+    SUnit *SUb = LoadRecords[Idx+1].SU;
+    if (TII->shouldClusterLoads(SUa->getInstr(), SUb->getInstr(), ClusterLength)
+        && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
+
+      DEBUG(dbgs() << "Cluster loads SU(" << SUa->NodeNum << ") - SU("
+            << SUb->NodeNum << ")\n");
+      // Copy successor edges from SUa to SUb. Interleaving computation
+      // dependent on SUa can prevent load combining due to register reuse.
+      // Predecessor edges do not need to be copied from SUb to SUa since nearby
+      // loads should have effectively the same inputs.
+      for (SUnit::const_succ_iterator
+             SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) {
+        if (SI->getSUnit() == SUb)
+          continue;
+        DEBUG(dbgs() << "  Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n");
+        DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial));
+      }
+      ++ClusterLength;
+    }
+    else
+      ClusterLength = 1;
+  }
+}
 
-  bool empty() const { return Queue.empty(); }
+/// \brief Callback from DAG postProcessing to create cluster edges for loads.
+void LoadClusterMutation::apply(ScheduleDAGMI *DAG) {
+  // Map DAG NodeNum to store chain ID.
+  DenseMap<unsigned, unsigned> StoreChainIDs;
+  // Map each store chain to a set of dependent loads.
+  SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
+  for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
+    SUnit *SU = &DAG->SUnits[Idx];
+    if (!SU->getInstr()->mayLoad())
+      continue;
+    unsigned ChainPredID = DAG->SUnits.size();
+    for (SUnit::const_pred_iterator
+           PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
+      if (PI->isCtrl()) {
+        ChainPredID = PI->getSUnit()->NodeNum;
+        break;
+      }
+    }
+    // Check if this chain-like pred has been seen
+    // before. ChainPredID==MaxNodeID for loads at the top of the schedule.
+    unsigned NumChains = StoreChainDependents.size();
+    std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
+      StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
+    if (Result.second)
+      StoreChainDependents.resize(NumChains + 1);
+    StoreChainDependents[Result.first->second].push_back(SU);
+  }
+  // Iterate over the store chains.
+  for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx)
+    clusterNeighboringLoads(StoreChainDependents[Idx], DAG);
+}
 
-  unsigned size() const { return Queue.size(); }
+//===----------------------------------------------------------------------===//
+// MacroFusion - DAG post-processing to encourage fusion of macro ops.
+//===----------------------------------------------------------------------===//
 
-  typedef std::vector<SUnit*>::iterator iterator;
+namespace {
+/// \brief Post-process the DAG to create cluster edges between instructions
+/// that may be fused by the processor into a single operation.
+class MacroFusion : public ScheduleDAGMutation {
+  const TargetInstrInfo *TII;
+public:
+  MacroFusion(const TargetInstrInfo *tii): TII(tii) {}
 
-  iterator begin() { return Queue.begin(); }
+  virtual void apply(ScheduleDAGMI *DAG);
+};
+} // anonymous
+
+/// \brief Callback from DAG postProcessing to create cluster edges to encourage
+/// fused operations.
+void MacroFusion::apply(ScheduleDAGMI *DAG) {
+  // For now, assume targets can only fuse with the branch.
+  MachineInstr *Branch = DAG->ExitSU.getInstr();
+  if (!Branch)
+    return;
 
-  iterator end() { return Queue.end(); }
+  for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) {
+    SUnit *SU = &DAG->SUnits[--Idx];
+    if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch))
+      continue;
 
-  iterator find(SUnit *SU) {
-    return std::find(Queue.begin(), Queue.end(), SU);
+    // Create a single weak edge from SU to ExitSU. The only effect is to cause
+    // bottom-up scheduling to heavily prioritize the clustered SU.  There is no
+    // need to copy predecessor edges from ExitSU to SU, since top-down
+    // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling
+    // of SU, we could create an artificial edge from the deepest root, but it
+    // hasn't been needed yet.
+    bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster));
+    (void)Success;
+    assert(Success && "No DAG nodes should be reachable from ExitSU");
+
+    DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n");
+    break;
   }
+}
 
-  void push(SUnit *SU) {
-    Queue.push_back(SU);
-    SU->NodeQueueId |= ID;
-  }
+//===----------------------------------------------------------------------===//
+// ConvergingScheduler - Implementation of the standard MachineSchedStrategy.
+//===----------------------------------------------------------------------===//
 
-  void remove(iterator I) {
-    (*I)->NodeQueueId &= ~ID;
-    *I = Queue.back();
-    Queue.pop_back();
-  }
+namespace {
+/// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance
+/// the schedule.
+class ConvergingScheduler : public MachineSchedStrategy {
+public:
+  /// Represent the type of SchedCandidate found within a single queue.
+  /// pickNodeBidirectional depends on these listed by decreasing priority.
+  enum CandReason {
+    NoCand, SingleExcess, SingleCritical, Cluster,
+    ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce,
+    TopDepthReduce, TopPathReduce, SingleMax, MultiPressure, NextDefUse,
+    NodeOrder};
 
 #ifndef NDEBUG
-  void dump() {
-    dbgs() << Name << ": ";
-    for (unsigned i = 0, e = Queue.size(); i < e; ++i)
-      dbgs() << Queue[i]->NodeNum << " ";
-    dbgs() << "\n";
-  }
+  static const char *getReasonStr(ConvergingScheduler::CandReason Reason);
 #endif
-};
 
-/// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance
-/// the schedule.
-class ConvergingScheduler : public MachineSchedStrategy {
+  /// Policy for scheduling the next instruction in the candidate's zone.
+  struct CandPolicy {
+    bool ReduceLatency;
+    unsigned ReduceResIdx;
+    unsigned DemandResIdx;
+
+    CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {}
+  };
+
+  /// Status of an instruction's critical resource consumption.
+  struct SchedResourceDelta {
+    // Count critical resources in the scheduled region required by SU.
+    unsigned CritResources;
+
+    // Count critical resources from another region consumed by SU.
+    unsigned DemandedResources;
+
+    SchedResourceDelta(): CritResources(0), DemandedResources(0) {}
+
+    bool operator==(const SchedResourceDelta &RHS) const {
+      return CritResources == RHS.CritResources
+        && DemandedResources == RHS.DemandedResources;
+    }
+    bool operator!=(const SchedResourceDelta &RHS) const {
+      return !operator==(RHS);
+    }
+  };
 
   /// Store the state used by ConvergingScheduler heuristics, required for the
   /// lifetime of one invocation of pickNode().
   struct SchedCandidate {
+    CandPolicy Policy;
+
     // The best SUnit candidate.
     SUnit *SU;
 
+    // The reason for this candidate.
+    CandReason Reason;
+
     // Register pressure values for the best candidate.
     RegPressureDelta RPDelta;
 
-    SchedCandidate(): SU(NULL) {}
+    // Critical resource consumption of the best candidate.
+    SchedResourceDelta ResDelta;
+
+    SchedCandidate(const CandPolicy &policy)
+    : Policy(policy), SU(NULL), Reason(NoCand) {}
+
+    bool isValid() const { return SU; }
+
+    // Copy the status of another candidate without changing policy.
+    void setBest(SchedCandidate &Best) {
+      assert(Best.Reason != NoCand && "uninitialized Sched candidate");
+      SU = Best.SU;
+      Reason = Best.Reason;
+      RPDelta = Best.RPDelta;
+      ResDelta = Best.ResDelta;
+    }
+
+    void initResourceDelta(const ScheduleDAGMI *DAG,
+                           const TargetSchedModel *SchedModel);
+  };
+
+  /// Summarize the unscheduled region.
+  struct SchedRemainder {
+    // Critical path through the DAG in expected latency.
+    unsigned CriticalPath;
+
+    // Unscheduled resources
+    SmallVector<unsigned, 16> RemainingCounts;
+    // Critical resource for the unscheduled zone.
+    unsigned CritResIdx;
+    // Number of micro-ops left to schedule.
+    unsigned RemainingMicroOps;
+
+    void reset() {
+      CriticalPath = 0;
+      RemainingCounts.clear();
+      CritResIdx = 0;
+      RemainingMicroOps = 0;
+    }
+
+    SchedRemainder() { reset(); }
+
+    void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel);
+
+    unsigned getMaxRemainingCount(const TargetSchedModel *SchedModel) const {
+      if (!SchedModel->hasInstrSchedModel())
+        return 0;
+
+      return std::max(
+        RemainingMicroOps * SchedModel->getMicroOpFactor(),
+        RemainingCounts[CritResIdx]);
+    }
   };
-  /// Represent the type of SchedCandidate found within a single queue.
-  enum CandResult {
-    NoCand, NodeOrder, SingleExcess, SingleCritical, SingleMax, MultiPressure };
 
   /// Each Scheduling boundary is associated with ready queues. It tracks the
-  /// current cycle in whichever direction at has moved, and maintains the state
+  /// current cycle in the direction of movement, and maintains the state
   /// of "hazards" and other interlocks at the current cycle.
   struct SchedBoundary {
     ScheduleDAGMI *DAG;
+    const TargetSchedModel *SchedModel;
+    SchedRemainder *Rem;
 
     ReadyQueue Available;
     ReadyQueue Pending;
     bool CheckPending;
 
+    // For heuristics, keep a list of the nodes that immediately depend on the
+    // most recently scheduled node.
+    SmallPtrSet<const SUnit*, 8> NextSUs;
+
     ScheduleHazardRecognizer *HazardRec;
 
     unsigned CurrCycle;
@@ -811,29 +999,84 @@ class ConvergingScheduler : public MachineSchedStrategy {
     /// MinReadyCycle - Cycle of the soonest available instruction.
     unsigned MinReadyCycle;
 
+    // The expected latency of the critical path in this scheduled zone.
+    unsigned ExpectedLatency;
+
+    // Resources used in the scheduled zone beyond this boundary.
+    SmallVector<unsigned, 16> ResourceCounts;
+
+    // Cache the critical resources ID in this scheduled zone.
+    unsigned CritResIdx;
+
+    // Is the scheduled region resource limited vs. latency limited.
+    bool IsResourceLimited;
+
+    unsigned ExpectedCount;
+
+#ifndef NDEBUG
     // Remember the greatest min operand latency.
     unsigned MaxMinLatency;
+#endif
+
+    void reset() {
+      Available.clear();
+      Pending.clear();
+      CheckPending = false;
+      NextSUs.clear();
+      HazardRec = 0;
+      CurrCycle = 0;
+      IssueCount = 0;
+      MinReadyCycle = UINT_MAX;
+      ExpectedLatency = 0;
+      ResourceCounts.resize(1);
+      assert(!ResourceCounts[0] && "nonzero count for bad resource");
+      CritResIdx = 0;
+      IsResourceLimited = false;
+      ExpectedCount = 0;
+#ifndef NDEBUG
+      MaxMinLatency = 0;
+#endif
+      // Reserve a zero-count for invalid CritResIdx.
+      ResourceCounts.resize(1);
+    }
 
     /// Pending queues extend the ready queues with the same ID and the
     /// PendingFlag set.
     SchedBoundary(unsigned ID, const Twine &Name):
-      DAG(0), Available(ID, Name+".A"),
-      Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P"),
-      CheckPending(false), HazardRec(0), CurrCycle(0), IssueCount(0),
-      MinReadyCycle(UINT_MAX), MaxMinLatency(0) {}
+      DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"),
+      Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P") {
+      reset();
+    }
 
     ~SchedBoundary() { delete HazardRec; }
 
+    void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel,
+              SchedRemainder *rem);
+
     bool isTop() const {
       return Available.getID() == ConvergingScheduler::TopQID;
     }
 
+    unsigned getUnscheduledLatency(SUnit *SU) const {
+      if (isTop())
+        return SU->getHeight();
+      return SU->getDepth() + SU->Latency;
+    }
+
+    unsigned getCriticalCount() const {
+      return ResourceCounts[CritResIdx];
+    }
+
     bool checkHazard(SUnit *SU);
 
+    void setLatencyPolicy(CandPolicy &Policy);
+
     void releaseNode(SUnit *SU, unsigned ReadyCycle);
 
     void bumpCycle();
 
+    void countResource(unsigned PIdx, unsigned Cycles);
+
     void bumpNode(SUnit *SU);
 
     void releasePending();
@@ -843,10 +1086,13 @@ class ConvergingScheduler : public MachineSchedStrategy {
     SUnit *pickOnlyChoice();
   };
 
+private:
   ScheduleDAGMI *DAG;
+  const TargetSchedModel *SchedModel;
   const TargetRegisterInfo *TRI;
 
   // State of the top and bottom scheduled instruction boundaries.
+  SchedRemainder Rem;
   SchedBoundary Top;
   SchedBoundary Bot;
 
@@ -859,7 +1105,7 @@ public:
   };
 
   ConvergingScheduler():
-    DAG(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {}
+    DAG(0), SchedModel(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {}
 
   virtual void initialize(ScheduleDAGMI *dag);
 
@@ -871,28 +1117,87 @@ public:
 
   virtual void releaseBottomNode(SUnit *SU);
 
+  virtual void registerRoots();
+
 protected:
-  SUnit *pickNodeBidrectional(bool &IsTopNode);
+  void balanceZones(
+    ConvergingScheduler::SchedBoundary &CriticalZone,
+    ConvergingScheduler::SchedCandidate &CriticalCand,
+    ConvergingScheduler::SchedBoundary &OppositeZone,
+    ConvergingScheduler::SchedCandidate &OppositeCand);
+
+  void checkResourceLimits(ConvergingScheduler::SchedCandidate &TopCand,
+                           ConvergingScheduler::SchedCandidate &BotCand);
+
+  void tryCandidate(SchedCandidate &Cand,
+                    SchedCandidate &TryCand,
+                    SchedBoundary &Zone,
+                    const RegPressureTracker &RPTracker,
+                    RegPressureTracker &TempTracker);
+
+  SUnit *pickNodeBidirectional(bool &IsTopNode);
+
+  void pickNodeFromQueue(SchedBoundary &Zone,
+                         const RegPressureTracker &RPTracker,
+                         SchedCandidate &Candidate);
 
-  CandResult pickNodeFromQueue(ReadyQueue &Q,
-                               const RegPressureTracker &RPTracker,
-                               SchedCandidate &Candidate);
 #ifndef NDEBUG
-  void traceCandidate(const char *Label, const ReadyQueue &Q, SUnit *SU,
-                      PressureElement P = PressureElement());
+  void traceCandidate(const SchedCandidate &Cand, const SchedBoundary &Zone);
 #endif
 };
 } // namespace
 
+void ConvergingScheduler::SchedRemainder::
+init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
+  reset();
+  if (!SchedModel->hasInstrSchedModel())
+    return;
+  RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
+  for (std::vector<SUnit>::iterator
+         I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) {
+    const MCSchedClassDesc *SC = DAG->getSchedClass(&*I);
+    RemainingMicroOps += SchedModel->getNumMicroOps(I->getInstr(), SC);
+    for (TargetSchedModel::ProcResIter
+           PI = SchedModel->getWriteProcResBegin(SC),
+           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+      unsigned PIdx = PI->ProcResourceIdx;
+      unsigned Factor = SchedModel->getResourceFactor(PIdx);
+      RemainingCounts[PIdx] += (Factor * PI->Cycles);
+    }
+  }
+  for (unsigned PIdx = 0, PEnd = SchedModel->getNumProcResourceKinds();
+       PIdx != PEnd; ++PIdx) {
+    if ((int)(RemainingCounts[PIdx] - RemainingCounts[CritResIdx])
+        >= (int)SchedModel->getLatencyFactor()) {
+      CritResIdx = PIdx;
+    }
+  }
+}
+
+void ConvergingScheduler::SchedBoundary::
+init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
+  reset();
+  DAG = dag;
+  SchedModel = smodel;
+  Rem = rem;
+  if (SchedModel->hasInstrSchedModel())
+    ResourceCounts.resize(SchedModel->getNumProcResourceKinds());
+}
+
 void ConvergingScheduler::initialize(ScheduleDAGMI *dag) {
   DAG = dag;
+  SchedModel = DAG->getSchedModel();
   TRI = DAG->TRI;
-  Top.DAG = dag;
-  Bot.DAG = dag;
+  Rem.init(DAG, SchedModel);
+  Top.init(DAG, SchedModel, &Rem);
+  Bot.init(DAG, SchedModel, &Rem);
+
+  // Initialize resource counts.
 
-  // Initialize the HazardRecognizers.
+  // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
+  // are disabled, then these HazardRecs will be disabled.
+  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
   const TargetMachine &TM = DAG->MF.getTarget();
-  const InstrItineraryData *Itin = TM.getInstrItineraryData();
   Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
   Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
 
@@ -904,7 +1209,7 @@ void ConvergingScheduler::releaseTopNode(SUnit *SU) {
   if (SU->isScheduled)
     return;
 
-  for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
        I != E; ++I) {
     unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
     unsigned MinLatency = I->getMinLatency();
@@ -925,6 +1230,8 @@ void ConvergingScheduler::releaseBottomNode(SUnit *SU) {
 
   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
        I != E; ++I) {
+    if (I->isWeak())
+      continue;
     unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
     unsigned MinLatency = I->getMinLatency();
 #ifndef NDEBUG
@@ -936,6 +1243,17 @@ void ConvergingScheduler::releaseBottomNode(SUnit *SU) {
   Bot.releaseNode(SU, SU->BotReadyCycle);
 }
 
+void ConvergingScheduler::registerRoots() {
+  Rem.CriticalPath = DAG->ExitSU.getDepth();
+  // Some roots may not feed into ExitSU. Check all of them in case.
+  for (std::vector<SUnit*>::const_iterator
+         I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) {
+    if ((*I)->getDepth() > Rem.CriticalPath)
+      Rem.CriticalPath = (*I)->getDepth();
+  }
+  DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n');
+}
+
 /// Does this SU have a hazard within the current instruction group.
 ///
 /// The scheduler supports two modes of hazard recognition. The first is the
@@ -953,14 +1271,42 @@ bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) {
   if (HazardRec->isEnabled())
     return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
 
-  if (IssueCount + DAG->getNumMicroOps(SU->getInstr()) > DAG->getIssueWidth())
+  unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
+  if ((IssueCount > 0) && (IssueCount + uops > SchedModel->getIssueWidth())) {
+    DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") uops="
+          << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
     return true;
-
+  }
   return false;
 }
 
+/// Compute the remaining latency to determine whether ILP should be increased.
+void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) {
+  // FIXME: compile time. In all, we visit four queues here one we should only
+  // need to visit the one that was last popped if we cache the result.
+  unsigned RemLatency = 0;
+  for (ReadyQueue::iterator I = Available.begin(), E = Available.end();
+       I != E; ++I) {
+    unsigned L = getUnscheduledLatency(*I);
+    if (L > RemLatency)
+      RemLatency = L;
+  }
+  for (ReadyQueue::iterator I = Pending.begin(), E = Pending.end();
+       I != E; ++I) {
+    unsigned L = getUnscheduledLatency(*I);
+    if (L > RemLatency)
+      RemLatency = L;
+  }
+  if (RemLatency + ExpectedLatency >= Rem->CriticalPath + ILPWindow
+      && RemLatency > Rem->getMaxRemainingCount(SchedModel)) {
+    Policy.ReduceLatency = true;
+    DEBUG(dbgs() << "Increase ILP: " << Available.getName() << '\n');
+  }
+}
+
 void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU,
                                                      unsigned ReadyCycle) {
+
   if (ReadyCycle < MinReadyCycle)
     MinReadyCycle = ReadyCycle;
 
@@ -970,15 +1316,22 @@ void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU,
     Pending.push(SU);
   else
     Available.push(SU);
+
+  // Record this node as an immediate dependent of the scheduled node.
+  NextSUs.insert(SU);
 }
 
 /// Move the boundary of scheduled code by one cycle.
 void ConvergingScheduler::SchedBoundary::bumpCycle() {
-  unsigned Width = DAG->getIssueWidth();
+  unsigned Width = SchedModel->getIssueWidth();
   IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
 
+  unsigned NextCycle = CurrCycle + 1;
   assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
-  unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle);
+  if (MinReadyCycle > NextCycle) {
+    IssueCount = 0;
+    NextCycle = MinReadyCycle;
+  }
 
   if (!HazardRec->isEnabled()) {
     // Bypass HazardRec virtual calls.
@@ -994,11 +1347,36 @@ void ConvergingScheduler::SchedBoundary::bumpCycle() {
     }
   }
   CheckPending = true;
+  IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle);
 
-  DEBUG(dbgs() << "*** " << Available.getName() << " cycle "
+  DEBUG(dbgs() << "  *** " << Available.getName() << " cycle "
         << CurrCycle << '\n');
 }
 
+/// Add the given processor resource to this scheduled zone.
+void ConvergingScheduler::SchedBoundary::countResource(unsigned PIdx,
+                                                       unsigned Cycles) {
+  unsigned Factor = SchedModel->getResourceFactor(PIdx);
+  DEBUG(dbgs() << "  " << SchedModel->getProcResource(PIdx)->Name
+        << " +(" << Cycles << "x" << Factor
+        << ") / " << SchedModel->getLatencyFactor() << '\n');
+
+  unsigned Count = Factor * Cycles;
+  ResourceCounts[PIdx] += Count;
+  assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
+  Rem->RemainingCounts[PIdx] -= Count;
+
+  // Check if this resource exceeds the current critical resource by a full
+  // cycle. If so, it becomes the critical resource.
+  if ((int)(ResourceCounts[PIdx] - ResourceCounts[CritResIdx])
+      >= (int)SchedModel->getLatencyFactor()) {
+    CritResIdx = PIdx;
+    DEBUG(dbgs() << "  *** Critical resource "
+          << SchedModel->getProcResource(PIdx)->Name << " x"
+          << ResourceCounts[PIdx] << '\n');
+  }
+}
+
 /// Move the boundary of scheduled code by one SUnit.
 void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) {
   // Update the reservation table.
@@ -1010,11 +1388,38 @@ void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) {
     }
     HazardRec->EmitInstruction(SU);
   }
+  // Update resource counts and critical resource.
+  if (SchedModel->hasInstrSchedModel()) {
+    const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+    Rem->RemainingMicroOps -= SchedModel->getNumMicroOps(SU->getInstr(), SC);
+    for (TargetSchedModel::ProcResIter
+           PI = SchedModel->getWriteProcResBegin(SC),
+           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+      countResource(PI->ProcResourceIdx, PI->Cycles);
+    }
+  }
+  if (isTop()) {
+    if (SU->getDepth() > ExpectedLatency)
+      ExpectedLatency = SU->getDepth();
+  }
+  else {
+    if (SU->getHeight() > ExpectedLatency)
+      ExpectedLatency = SU->getHeight();
+  }
+
+  IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle);
+
   // Check the instruction group dispatch limit.
   // TODO: Check if this SU must end a dispatch group.
-  IssueCount += DAG->getNumMicroOps(SU->getInstr());
-  if (IssueCount >= DAG->getIssueWidth()) {
-    DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n');
+  IssueCount += SchedModel->getNumMicroOps(SU->getInstr());
+
+  // checkHazard prevents scheduling multiple instructions per cycle that exceed
+  // issue width. However, we commonly reach the maximum. In this case
+  // opportunistically bump the cycle to avoid uselessly checking everything in
+  // the readyQ. Furthermore, a single instruction may produce more than one
+  // cycle's worth of micro-ops.
+  if (IssueCount >= SchedModel->getIssueWidth()) {
+    DEBUG(dbgs() << "  *** Max instrs at cycle " << CurrCycle << '\n');
     bumpCycle();
   }
 }
@@ -1045,6 +1450,7 @@ void ConvergingScheduler::SchedBoundary::releasePending() {
     Pending.remove(Pending.begin()+i);
     --i; --e;
   }
+  DEBUG(if (!Pending.empty()) Pending.dump());
   CheckPending = false;
 }
 
@@ -1059,12 +1465,23 @@ void ConvergingScheduler::SchedBoundary::removeReady(SUnit *SU) {
 }
 
 /// If this queue only has one ready candidate, return it. As a side effect,
-/// advance the cycle until at least one node is ready. If multiple instructions
-/// are ready, return NULL.
+/// defer any nodes that now hit a hazard, and advance the cycle until at least
+/// one node is ready. If multiple instructions are ready, return NULL.
 SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() {
   if (CheckPending)
     releasePending();
 
+  if (IssueCount > 0) {
+    // Defer any ready instrs that now have a hazard.
+    for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
+      if (checkHazard(*I)) {
+        Pending.push(*I);
+        I = Available.remove(I);
+        continue;
+      }
+      ++I;
+    }
+  }
   for (unsigned i = 0; Available.empty(); ++i) {
     assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
            "permanent hazard"); (void)i;
@@ -1076,18 +1493,275 @@ SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() {
   return NULL;
 }
 
-#ifndef NDEBUG
-void ConvergingScheduler::traceCandidate(const char *Label, const ReadyQueue &Q,
-                                         SUnit *SU, PressureElement P) {
-  dbgs() << Label << " " << Q.getName() << " ";
-  if (P.isValid())
-    dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease
-           << " ";
-  else
-    dbgs() << "     ";
-  SU->dump(DAG);
+/// Record the candidate policy for opposite zones with different critical
+/// resources.
+///
+/// If the CriticalZone is latency limited, don't force a policy for the
+/// candidates here. Instead, setLatencyPolicy sets ReduceLatency if needed.
+void ConvergingScheduler::balanceZones(
+  ConvergingScheduler::SchedBoundary &CriticalZone,
+  ConvergingScheduler::SchedCandidate &CriticalCand,
+  ConvergingScheduler::SchedBoundary &OppositeZone,
+  ConvergingScheduler::SchedCandidate &OppositeCand) {
+
+  if (!CriticalZone.IsResourceLimited)
+    return;
+  assert(SchedModel->hasInstrSchedModel() && "required schedmodel");
+
+  SchedRemainder *Rem = CriticalZone.Rem;
+
+  // If the critical zone is overconsuming a resource relative to the
+  // remainder, try to reduce it.
+  unsigned RemainingCritCount =
+    Rem->RemainingCounts[CriticalZone.CritResIdx];
+  if ((int)(Rem->getMaxRemainingCount(SchedModel) - RemainingCritCount)
+      > (int)SchedModel->getLatencyFactor()) {
+    CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx;
+    DEBUG(dbgs() << "Balance " << CriticalZone.Available.getName() << " reduce "
+          << SchedModel->getProcResource(CriticalZone.CritResIdx)->Name
+          << '\n');
+  }
+  // If the other zone is underconsuming a resource relative to the full zone,
+  // try to increase it.
+  unsigned OppositeCount =
+    OppositeZone.ResourceCounts[CriticalZone.CritResIdx];
+  if ((int)(OppositeZone.ExpectedCount - OppositeCount)
+      > (int)SchedModel->getLatencyFactor()) {
+    OppositeCand.Policy.DemandResIdx = CriticalZone.CritResIdx;
+    DEBUG(dbgs() << "Balance " << OppositeZone.Available.getName() << " demand "
+          << SchedModel->getProcResource(OppositeZone.CritResIdx)->Name
+          << '\n');
+  }
+}
+
+/// Determine if the scheduled zones exceed resource limits or critical path and
+/// set each candidate's ReduceHeight policy accordingly.
+void ConvergingScheduler::checkResourceLimits(
+  ConvergingScheduler::SchedCandidate &TopCand,
+  ConvergingScheduler::SchedCandidate &BotCand) {
+
+  // Set ReduceLatency to true if needed.
+  Bot.setLatencyPolicy(TopCand.Policy);
+  Top.setLatencyPolicy(BotCand.Policy);
+
+  // Handle resource-limited regions.
+  if (Top.IsResourceLimited && Bot.IsResourceLimited
+      && Top.CritResIdx == Bot.CritResIdx) {
+    // If the scheduled critical resource in both zones is no longer the
+    // critical remaining resource, attempt to reduce resource height both ways.
+    if (Top.CritResIdx != Rem.CritResIdx) {
+      TopCand.Policy.ReduceResIdx = Top.CritResIdx;
+      BotCand.Policy.ReduceResIdx = Bot.CritResIdx;
+      DEBUG(dbgs() << "Reduce scheduled "
+            << SchedModel->getProcResource(Top.CritResIdx)->Name << '\n');
+    }
+    return;
+  }
+  // Handle latency-limited regions.
+  if (!Top.IsResourceLimited && !Bot.IsResourceLimited) {
+    // If the total scheduled expected latency exceeds the region's critical
+    // path then reduce latency both ways.
+    //
+    // Just because a zone is not resource limited does not mean it is latency
+    // limited. Unbuffered resource, such as max micro-ops may cause CurrCycle
+    // to exceed expected latency.
+    if ((Top.ExpectedLatency + Bot.ExpectedLatency >= Rem.CriticalPath)
+        && (Rem.CriticalPath > Top.CurrCycle + Bot.CurrCycle)) {
+      TopCand.Policy.ReduceLatency = true;
+      BotCand.Policy.ReduceLatency = true;
+      DEBUG(dbgs() << "Reduce scheduled latency " << Top.ExpectedLatency
+            << " + " << Bot.ExpectedLatency << '\n');
+    }
+    return;
+  }
+  // The critical resource is different in each zone, so request balancing.
+
+  // Compute the cost of each zone.
+  Top.ExpectedCount = std::max(Top.ExpectedLatency, Top.CurrCycle);
+  Top.ExpectedCount = std::max(
+    Top.getCriticalCount(),
+    Top.ExpectedCount * SchedModel->getLatencyFactor());
+  Bot.ExpectedCount = std::max(Bot.ExpectedLatency, Bot.CurrCycle);
+  Bot.ExpectedCount = std::max(
+    Bot.getCriticalCount(),
+    Bot.ExpectedCount * SchedModel->getLatencyFactor());
+
+  balanceZones(Top, TopCand, Bot, BotCand);
+  balanceZones(Bot, BotCand, Top, TopCand);
+}
+
+void ConvergingScheduler::SchedCandidate::
+initResourceDelta(const ScheduleDAGMI *DAG,
+                  const TargetSchedModel *SchedModel) {
+  if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
+    return;
+
+  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+  for (TargetSchedModel::ProcResIter
+         PI = SchedModel->getWriteProcResBegin(SC),
+         PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+    if (PI->ProcResourceIdx == Policy.ReduceResIdx)
+      ResDelta.CritResources += PI->Cycles;
+    if (PI->ProcResourceIdx == Policy.DemandResIdx)
+      ResDelta.DemandedResources += PI->Cycles;
+  }
+}
+
+/// Return true if this heuristic determines order.
+static bool tryLess(unsigned TryVal, unsigned CandVal,
+                    ConvergingScheduler::SchedCandidate &TryCand,
+                    ConvergingScheduler::SchedCandidate &Cand,
+                    ConvergingScheduler::CandReason Reason) {
+  if (TryVal < CandVal) {
+    TryCand.Reason = Reason;
+    return true;
+  }
+  if (TryVal > CandVal) {
+    if (Cand.Reason > Reason)
+      Cand.Reason = Reason;
+    return true;
+  }
+  return false;
+}
+
+static bool tryGreater(unsigned TryVal, unsigned CandVal,
+                       ConvergingScheduler::SchedCandidate &TryCand,
+                       ConvergingScheduler::SchedCandidate &Cand,
+                       ConvergingScheduler::CandReason Reason) {
+  if (TryVal > CandVal) {
+    TryCand.Reason = Reason;
+    return true;
+  }
+  if (TryVal < CandVal) {
+    if (Cand.Reason > Reason)
+      Cand.Reason = Reason;
+    return true;
+  }
+  return false;
+}
+
+static unsigned getWeakLeft(const SUnit *SU, bool isTop) {
+  return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
+}
+
+/// Apply a set of heursitics to a new candidate. Heuristics are currently
+/// hierarchical. This may be more efficient than a graduated cost model because
+/// we don't need to evaluate all aspects of the model for each node in the
+/// queue. But it's really done to make the heuristics easier to debug and
+/// statistically analyze.
+///
+/// \param Cand provides the policy and current best candidate.
+/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
+/// \param Zone describes the scheduled zone that we are extending.
+/// \param RPTracker describes reg pressure within the scheduled zone.
+/// \param TempTracker is a scratch pressure tracker to reuse in queries.
+void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
+                                       SchedCandidate &TryCand,
+                                       SchedBoundary &Zone,
+                                       const RegPressureTracker &RPTracker,
+                                       RegPressureTracker &TempTracker) {
+
+  // Always initialize TryCand's RPDelta.
+  TempTracker.getMaxPressureDelta(TryCand.SU->getInstr(), TryCand.RPDelta,
+                                  DAG->getRegionCriticalPSets(),
+                                  DAG->getRegPressure().MaxSetPressure);
+
+  // Initialize the candidate if needed.
+  if (!Cand.isValid()) {
+    TryCand.Reason = NodeOrder;
+    return;
+  }
+  // Avoid exceeding the target's limit.
+  if (tryLess(TryCand.RPDelta.Excess.UnitIncrease,
+              Cand.RPDelta.Excess.UnitIncrease, TryCand, Cand, SingleExcess))
+    return;
+  if (Cand.Reason == SingleExcess)
+    Cand.Reason = MultiPressure;
+
+  // Avoid increasing the max critical pressure in the scheduled region.
+  if (tryLess(TryCand.RPDelta.CriticalMax.UnitIncrease,
+              Cand.RPDelta.CriticalMax.UnitIncrease,
+              TryCand, Cand, SingleCritical))
+    return;
+  if (Cand.Reason == SingleCritical)
+    Cand.Reason = MultiPressure;
+
+  // Keep clustered nodes together to encourage downstream peephole
+  // optimizations which may reduce resource requirements.
+  //
+  // This is a best effort to set things up for a post-RA pass. Optimizations
+  // like generating loads of multiple registers should ideally be done within
+  // the scheduler pass by combining the loads during DAG postprocessing.
+  const SUnit *NextClusterSU =
+    Zone.isTop() ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
+  if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU,
+                 TryCand, Cand, Cluster))
+    return;
+  // Currently, weak edges are for clustering, so we hard-code that reason.
+  // However, deferring the current TryCand will not change Cand's reason.
+  CandReason OrigReason = Cand.Reason;
+  if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()),
+              getWeakLeft(Cand.SU, Zone.isTop()),
+              TryCand, Cand, Cluster)) {
+    Cand.Reason = OrigReason;
+    return;
+  }
+  // Avoid critical resource consumption and balance the schedule.
+  TryCand.initResourceDelta(DAG, SchedModel);
+  if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
+              TryCand, Cand, ResourceReduce))
+    return;
+  if (tryGreater(TryCand.ResDelta.DemandedResources,
+                 Cand.ResDelta.DemandedResources,
+                 TryCand, Cand, ResourceDemand))
+    return;
+
+  // Avoid serializing long latency dependence chains.
+  if (Cand.Policy.ReduceLatency) {
+    if (Zone.isTop()) {
+      if (Cand.SU->getDepth() * SchedModel->getLatencyFactor()
+          > Zone.ExpectedCount) {
+        if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
+                    TryCand, Cand, TopDepthReduce))
+          return;
+      }
+      if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
+                     TryCand, Cand, TopPathReduce))
+        return;
+    }
+    else {
+      if (Cand.SU->getHeight() * SchedModel->getLatencyFactor()
+          > Zone.ExpectedCount) {
+        if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
+                    TryCand, Cand, BotHeightReduce))
+          return;
+      }
+      if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
+                     TryCand, Cand, BotPathReduce))
+        return;
+    }
+  }
+
+  // Avoid increasing the max pressure of the entire region.
+  if (tryLess(TryCand.RPDelta.CurrentMax.UnitIncrease,
+              Cand.RPDelta.CurrentMax.UnitIncrease, TryCand, Cand, SingleMax))
+    return;
+  if (Cand.Reason == SingleMax)
+    Cand.Reason = MultiPressure;
+
+  // Prefer immediate defs/users of the last scheduled instruction. This is a
+  // nice pressure avoidance strategy that also conserves the processor's
+  // register renaming resources and keeps the machine code readable.
+  if (tryGreater(Zone.NextSUs.count(TryCand.SU), Zone.NextSUs.count(Cand.SU),
+                 TryCand, Cand, NextDefUse))
+    return;
+
+  // Fall through to original instruction order.
+  if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
+      || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
+    TryCand.Reason = NodeOrder;
+  }
 }
-#endif
 
 /// pickNodeFromQueue helper that returns true if the LHS reg pressure effect is
 /// more desirable than RHS from scheduling standpoint.
@@ -1098,109 +1772,144 @@ static bool compareRPDelta(const RegPressureDelta &LHS,
   // have UnitIncrease==0, so are neutral.
 
   // Avoid increasing the max critical pressure in the scheduled region.
-  if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease)
+  if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) {
+    DEBUG(dbgs() << "RP excess top - bot: "
+          << (LHS.Excess.UnitIncrease - RHS.Excess.UnitIncrease) << '\n');
     return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease;
-
+  }
   // Avoid increasing the max critical pressure in the scheduled region.
-  if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease)
+  if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) {
+    DEBUG(dbgs() << "RP critical top - bot: "
+          << (LHS.CriticalMax.UnitIncrease - RHS.CriticalMax.UnitIncrease)
+          << '\n');
     return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease;
-
+  }
   // Avoid increasing the max pressure of the entire region.
-  if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease)
+  if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) {
+    DEBUG(dbgs() << "RP current top - bot: "
+          << (LHS.CurrentMax.UnitIncrease - RHS.CurrentMax.UnitIncrease)
+          << '\n');
     return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease;
-
+  }
   return false;
 }
 
+#ifndef NDEBUG
+const char *ConvergingScheduler::getReasonStr(
+  ConvergingScheduler::CandReason Reason) {
+  switch (Reason) {
+  case NoCand:         return "NOCAND    ";
+  case SingleExcess:   return "REG-EXCESS";
+  case SingleCritical: return "REG-CRIT  ";
+  case Cluster:        return "CLUSTER   ";
+  case SingleMax:      return "REG-MAX   ";
+  case MultiPressure:  return "REG-MULTI ";
+  case ResourceReduce: return "RES-REDUCE";
+  case ResourceDemand: return "RES-DEMAND";
+  case TopDepthReduce: return "TOP-DEPTH ";
+  case TopPathReduce:  return "TOP-PATH  ";
+  case BotHeightReduce:return "BOT-HEIGHT";
+  case BotPathReduce:  return "BOT-PATH  ";
+  case NextDefUse:     return "DEF-USE   ";
+  case NodeOrder:      return "ORDER     ";
+  };
+  llvm_unreachable("Unknown reason!");
+}
+
+void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand,
+                                         const SchedBoundary &Zone) {
+  const char *Label = getReasonStr(Cand.Reason);
+  PressureElement P;
+  unsigned ResIdx = 0;
+  unsigned Latency = 0;
+  switch (Cand.Reason) {
+  default:
+    break;
+  case SingleExcess:
+    P = Cand.RPDelta.Excess;
+    break;
+  case SingleCritical:
+    P = Cand.RPDelta.CriticalMax;
+    break;
+  case SingleMax:
+    P = Cand.RPDelta.CurrentMax;
+    break;
+  case ResourceReduce:
+    ResIdx = Cand.Policy.ReduceResIdx;
+    break;
+  case ResourceDemand:
+    ResIdx = Cand.Policy.DemandResIdx;
+    break;
+  case TopDepthReduce:
+    Latency = Cand.SU->getDepth();
+    break;
+  case TopPathReduce:
+    Latency = Cand.SU->getHeight();
+    break;
+  case BotHeightReduce:
+    Latency = Cand.SU->getHeight();
+    break;
+  case BotPathReduce:
+    Latency = Cand.SU->getDepth();
+    break;
+  }
+  dbgs() << Label << " " << Zone.Available.getName() << " ";
+  if (P.isValid())
+    dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease
+           << " ";
+  else
+    dbgs() << "     ";
+  if (ResIdx)
+    dbgs() << SchedModel->getProcResource(ResIdx)->Name << " ";
+  else
+    dbgs() << "        ";
+  if (Latency)
+    dbgs() << Latency << " cycles ";
+  else
+    dbgs() << "         ";
+  Cand.SU->dump(DAG);
+}
+#endif
+
 /// Pick the best candidate from the top queue.
 ///
 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
 /// DAG building. To adjust for the current scheduling location we need to
 /// maintain the number of vreg uses remaining to be top-scheduled.
-ConvergingScheduler::CandResult ConvergingScheduler::
-pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker,
-                  SchedCandidate &Candidate) {
+void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone,
+                                            const RegPressureTracker &RPTracker,
+                                            SchedCandidate &Cand) {
+  ReadyQueue &Q = Zone.Available;
+
   DEBUG(Q.dump());
 
   // getMaxPressureDelta temporarily modifies the tracker.
   RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
 
-  // BestSU remains NULL if no top candidates beat the best existing candidate.
-  CandResult FoundCandidate = NoCand;
   for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
-    RegPressureDelta RPDelta;
-    TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
-                                    DAG->getRegionCriticalPSets(),
-                                    DAG->getRegPressure().MaxSetPressure);
-
-    // Initialize the candidate if needed.
-    if (!Candidate.SU) {
-      Candidate.SU = *I;
-      Candidate.RPDelta = RPDelta;
-      FoundCandidate = NodeOrder;
-      continue;
-    }
-    // Avoid exceeding the target's limit.
-    if (RPDelta.Excess.UnitIncrease < Candidate.RPDelta.Excess.UnitIncrease) {
-      DEBUG(traceCandidate("ECAND", Q, *I, RPDelta.Excess));
-      Candidate.SU = *I;
-      Candidate.RPDelta = RPDelta;
-      FoundCandidate = SingleExcess;
-      continue;
-    }
-    if (RPDelta.Excess.UnitIncrease > Candidate.RPDelta.Excess.UnitIncrease)
-      continue;
-    if (FoundCandidate == SingleExcess)
-      FoundCandidate = MultiPressure;
-
-    // Avoid increasing the max critical pressure in the scheduled region.
-    if (RPDelta.CriticalMax.UnitIncrease
-        < Candidate.RPDelta.CriticalMax.UnitIncrease) {
-      DEBUG(traceCandidate("PCAND", Q, *I, RPDelta.CriticalMax));
-      Candidate.SU = *I;
-      Candidate.RPDelta = RPDelta;
-      FoundCandidate = SingleCritical;
-      continue;
-    }
-    if (RPDelta.CriticalMax.UnitIncrease
-        > Candidate.RPDelta.CriticalMax.UnitIncrease)
-      continue;
-    if (FoundCandidate == SingleCritical)
-      FoundCandidate = MultiPressure;
-
-    // Avoid increasing the max pressure of the entire region.
-    if (RPDelta.CurrentMax.UnitIncrease
-        < Candidate.RPDelta.CurrentMax.UnitIncrease) {
-      DEBUG(traceCandidate("MCAND", Q, *I, RPDelta.CurrentMax));
-      Candidate.SU = *I;
-      Candidate.RPDelta = RPDelta;
-      FoundCandidate = SingleMax;
-      continue;
-    }
-    if (RPDelta.CurrentMax.UnitIncrease
-        > Candidate.RPDelta.CurrentMax.UnitIncrease)
-      continue;
-    if (FoundCandidate == SingleMax)
-      FoundCandidate = MultiPressure;
-
-    // Fall through to original instruction order.
-    // Only consider node order if Candidate was chosen from this Q.
-    if (FoundCandidate == NoCand)
-      continue;
 
-    if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum)
-        || (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) {
-      DEBUG(traceCandidate("NCAND", Q, *I));
-      Candidate.SU = *I;
-      Candidate.RPDelta = RPDelta;
-      FoundCandidate = NodeOrder;
+    SchedCandidate TryCand(Cand.Policy);
+    TryCand.SU = *I;
+    tryCandidate(Cand, TryCand, Zone, RPTracker, TempTracker);
+    if (TryCand.Reason != NoCand) {
+      // Initialize resource delta if needed in case future heuristics query it.
+      if (TryCand.ResDelta == SchedResourceDelta())
+        TryCand.initResourceDelta(DAG, SchedModel);
+      Cand.setBest(TryCand);
+      DEBUG(traceCandidate(Cand, Zone));
     }
   }
-  return FoundCandidate;
+}
+
+static void tracePick(const ConvergingScheduler::SchedCandidate &Cand,
+                      bool IsTop) {
+  DEBUG(dbgs() << "Pick " << (IsTop ? "top" : "bot")
+        << " SU(" << Cand.SU->NodeNum << ") "
+        << ConvergingScheduler::getReasonStr(Cand.Reason) << '\n');
 }
 
 /// Pick the best candidate node from either the top or bottom queue.
-SUnit *ConvergingScheduler::pickNodeBidrectional(bool &IsTopNode) {
+SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) {
   // Schedule as far as possible in the direction of no choice. This is most
   // efficient, but also provides the best heuristics for CriticalPSets.
   if (SUnit *SU = Bot.pickOnlyChoice()) {
@@ -1211,11 +1920,14 @@ SUnit *ConvergingScheduler::pickNodeBidrectional(bool &IsTopNode) {
     IsTopNode = true;
     return SU;
   }
-  SchedCandidate BotCand;
+  CandPolicy NoPolicy;
+  SchedCandidate BotCand(NoPolicy);
+  SchedCandidate TopCand(NoPolicy);
+  checkResourceLimits(TopCand, BotCand);
+
   // Prefer bottom scheduling when heuristics are silent.
-  CandResult BotResult = pickNodeFromQueue(Bot.Available,
-                                           DAG->getBotRPTracker(), BotCand);
-  assert(BotResult != NoCand && "failed to find the first candidate");
+  pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
+  assert(BotCand.Reason != NoCand && "failed to find the first candidate");
 
   // If either Q has a single candidate that provides the least increase in
   // Excess pressure, we can immediately schedule from that Q.
@@ -1224,37 +1936,41 @@ SUnit *ConvergingScheduler::pickNodeBidrectional(bool &IsTopNode) {
   // affects picking from either Q. If scheduling in one direction must
   // increase pressure for one of the excess PSets, then schedule in that
   // direction first to provide more freedom in the other direction.
-  if (BotResult == SingleExcess || BotResult == SingleCritical) {
+  if (BotCand.Reason == SingleExcess || BotCand.Reason == SingleCritical) {
     IsTopNode = false;
+    tracePick(BotCand, IsTopNode);
     return BotCand.SU;
   }
   // Check if the top Q has a better candidate.
-  SchedCandidate TopCand;
-  CandResult TopResult = pickNodeFromQueue(Top.Available,
-                                           DAG->getTopRPTracker(), TopCand);
-  assert(TopResult != NoCand && "failed to find the first candidate");
+  pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
+  assert(TopCand.Reason != NoCand && "failed to find the first candidate");
 
-  if (TopResult == SingleExcess || TopResult == SingleCritical) {
-    IsTopNode = true;
-    return TopCand.SU;
-  }
   // If either Q has a single candidate that minimizes pressure above the
   // original region's pressure pick it.
-  if (BotResult == SingleMax) {
+  if (TopCand.Reason <= SingleMax || BotCand.Reason <= SingleMax) {
+    if (TopCand.Reason < BotCand.Reason) {
+      IsTopNode = true;
+      tracePick(TopCand, IsTopNode);
+      return TopCand.SU;
+    }
     IsTopNode = false;
+    tracePick(BotCand, IsTopNode);
     return BotCand.SU;
   }
-  if (TopResult == SingleMax) {
+  // Check for a salient pressure difference and pick the best from either side.
+  if (compareRPDelta(TopCand.RPDelta, BotCand.RPDelta)) {
     IsTopNode = true;
+    tracePick(TopCand, IsTopNode);
     return TopCand.SU;
   }
-  // Check for a salient pressure difference and pick the best from either side.
-  if (compareRPDelta(TopCand.RPDelta, BotCand.RPDelta)) {
+  // Otherwise prefer the bottom candidate, in node order if all else failed.
+  if (TopCand.Reason < BotCand.Reason) {
     IsTopNode = true;
+    tracePick(TopCand, IsTopNode);
     return TopCand.SU;
   }
-  // Otherwise prefer the bottom candidate in node order.
   IsTopNode = false;
+  tracePick(BotCand, IsTopNode);
   return BotCand.SU;
 }
 
@@ -1266,33 +1982,34 @@ SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) {
     return NULL;
   }
   SUnit *SU;
-  if (ForceTopDown) {
-    SU = Top.pickOnlyChoice();
-    if (!SU) {
-      SchedCandidate TopCand;
-      CandResult TopResult =
-        pickNodeFromQueue(Top.Available, DAG->getTopRPTracker(), TopCand);
-      assert(TopResult != NoCand && "failed to find the first candidate");
-      (void)TopResult;
-      SU = TopCand.SU;
+  do {
+    if (ForceTopDown) {
+      SU = Top.pickOnlyChoice();
+      if (!SU) {
+        CandPolicy NoPolicy;
+        SchedCandidate TopCand(NoPolicy);
+        pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
+        assert(TopCand.Reason != NoCand && "failed to find the first candidate");
+        SU = TopCand.SU;
+      }
+      IsTopNode = true;
     }
-    IsTopNode = true;
-  }
-  else if (ForceBottomUp) {
-    SU = Bot.pickOnlyChoice();
-    if (!SU) {
-      SchedCandidate BotCand;
-      CandResult BotResult =
-        pickNodeFromQueue(Bot.Available, DAG->getBotRPTracker(), BotCand);
-      assert(BotResult != NoCand && "failed to find the first candidate");
-      (void)BotResult;
-      SU = BotCand.SU;
+    else if (ForceBottomUp) {
+      SU = Bot.pickOnlyChoice();
+      if (!SU) {
+        CandPolicy NoPolicy;
+        SchedCandidate BotCand(NoPolicy);
+        pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
+        assert(BotCand.Reason != NoCand && "failed to find the first candidate");
+        SU = BotCand.SU;
+      }
+      IsTopNode = false;
     }
-    IsTopNode = false;
-  }
-  else {
-    SU = pickNodeBidrectional(IsTopNode);
-  }
+    else {
+      SU = pickNodeBidirectional(IsTopNode);
+    }
+  } while (SU->isScheduled);
+
   if (SU->isTopReady())
     Top.removeReady(SU);
   if (SU->isBottomReady())
@@ -1324,13 +2041,142 @@ void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) {
 static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) {
   assert((!ForceTopDown || !ForceBottomUp) &&
          "-misched-topdown incompatible with -misched-bottomup");
-  return new ScheduleDAGMI(C, new ConvergingScheduler());
+  ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new ConvergingScheduler());
+  // Register DAG post-processors.
+  if (EnableLoadCluster)
+    DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI));
+  if (EnableMacroFusion)
+    DAG->addMutation(new MacroFusion(DAG->TII));
+  return DAG;
 }
 static MachineSchedRegistry
 ConvergingSchedRegistry("converge", "Standard converging scheduler.",
                         createConvergingSched);
 
 //===----------------------------------------------------------------------===//
+// ILP Scheduler. Currently for experimental analysis of heuristics.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// \brief Order nodes by the ILP metric.
+struct ILPOrder {
+  SchedDFSResult *DFSResult;
+  BitVector *ScheduledTrees;
+  bool MaximizeILP;
+
+  ILPOrder(SchedDFSResult *dfs, BitVector *schedtrees, bool MaxILP)
+    : DFSResult(dfs), ScheduledTrees(schedtrees), MaximizeILP(MaxILP) {}
+
+  /// \brief Apply a less-than relation on node priority.
+  ///
+  /// (Return true if A comes after B in the Q.)
+  bool operator()(const SUnit *A, const SUnit *B) const {
+    unsigned SchedTreeA = DFSResult->getSubtreeID(A);
+    unsigned SchedTreeB = DFSResult->getSubtreeID(B);
+    if (SchedTreeA != SchedTreeB) {
+      // Unscheduled trees have lower priority.
+      if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
+        return ScheduledTrees->test(SchedTreeB);
+
+      // Trees with shallower connections have have lower priority.
+      if (DFSResult->getSubtreeLevel(SchedTreeA)
+          != DFSResult->getSubtreeLevel(SchedTreeB)) {
+        return DFSResult->getSubtreeLevel(SchedTreeA)
+          < DFSResult->getSubtreeLevel(SchedTreeB);
+      }
+    }
+    if (MaximizeILP)
+      return DFSResult->getILP(A) < DFSResult->getILP(B);
+    else
+      return DFSResult->getILP(A) > DFSResult->getILP(B);
+  }
+};
+
+/// \brief Schedule based on the ILP metric.
+class ILPScheduler : public MachineSchedStrategy {
+  /// In case all subtrees are eventually connected to a common root through
+  /// data dependence (e.g. reduction), place an upper limit on their size.
+  ///
+  /// FIXME: A subtree limit is generally good, but in the situation commented
+  /// above, where multiple similar subtrees feed a common root, we should
+  /// only split at a point where the resulting subtrees will be balanced.
+  /// (a motivating test case must be found).
+  static const unsigned SubtreeLimit = 16;
+
+  SchedDFSResult DFSResult;
+  BitVector ScheduledTrees;
+  ILPOrder Cmp;
+
+  std::vector<SUnit*> ReadyQ;
+public:
+  ILPScheduler(bool MaximizeILP)
+  : DFSResult(/*BottomUp=*/true, SubtreeLimit),
+    Cmp(&DFSResult, &ScheduledTrees, MaximizeILP) {}
+
+  virtual void initialize(ScheduleDAGMI *DAG) {
+    ReadyQ.clear();
+    DFSResult.clear();
+    DFSResult.resize(DAG->SUnits.size());
+    ScheduledTrees.clear();
+  }
+
+  virtual void registerRoots() {
+    DFSResult.compute(ReadyQ);
+    ScheduledTrees.resize(DFSResult.getNumSubtrees());
+    // Restore the heap in ReadyQ with the updated DFS results.
+    std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
+  }
+
+  /// Implement MachineSchedStrategy interface.
+  /// -----------------------------------------
+
+  /// Callback to select the highest priority node from the ready Q.
+  virtual SUnit *pickNode(bool &IsTopNode) {
+    if (ReadyQ.empty()) return NULL;
+    pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
+    SUnit *SU = ReadyQ.back();
+    ReadyQ.pop_back();
+    IsTopNode = false;
+    DEBUG(dbgs() << "*** Scheduling " << "SU(" << SU->NodeNum << "): "
+          << *SU->getInstr()
+          << " ILP: " << DFSResult.getILP(SU)
+          << " Tree: " << DFSResult.getSubtreeID(SU) << " @"
+          << DFSResult.getSubtreeLevel(DFSResult.getSubtreeID(SU))<< '\n');
+    return SU;
+  }
+
+  /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
+  /// DFSResults, and resort the priority Q.
+  virtual void schedNode(SUnit *SU, bool IsTopNode) {
+    assert(!IsTopNode && "SchedDFSResult needs bottom-up");
+    if (!ScheduledTrees.test(DFSResult.getSubtreeID(SU))) {
+      ScheduledTrees.set(DFSResult.getSubtreeID(SU));
+      DFSResult.scheduleTree(DFSResult.getSubtreeID(SU));
+      std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
+    }
+  }
+
+  virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ }
+
+  virtual void releaseBottomNode(SUnit *SU) {
+    ReadyQ.push_back(SU);
+    std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
+  }
+};
+} // namespace
+
+static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
+  return new ScheduleDAGMI(C, new ILPScheduler(true));
+}
+static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
+  return new ScheduleDAGMI(C, new ILPScheduler(false));
+}
+static MachineSchedRegistry ILPMaxRegistry(
+  "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
+static MachineSchedRegistry ILPMinRegistry(
+  "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
+
+//===----------------------------------------------------------------------===//
 // Machine Instruction Shuffler for Correctness Testing
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index bc383cb..4dafbe5 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -18,18 +18,18 @@
 
 #define DEBUG_TYPE "machine-sink"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 static cl::opt<bool>
@@ -49,7 +49,6 @@ namespace {
     MachineDominatorTree *DT;   // Machine dominator tree
     MachineLoopInfo *LI;
     AliasAnalysis *AA;
-    BitVector AllocatableSet;   // Which physregs are allocatable?
 
     // Remember which edges have been considered for breaking.
     SmallSet<std::pair<MachineBasicBlock*,MachineBasicBlock*>, 8>
@@ -229,7 +228,6 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
   DT = &getAnalysis<MachineDominatorTree>();
   LI = &getAnalysis<MachineLoopInfo>();
   AA = &getAnalysis<AliasAnalysis>();
-  AllocatableSet = TRI->getAllocatableSet(MF);
 
   bool EverMadeChange = false;
 
diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp
index 1a3aa60..685ccab 100644
--- a/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/lib/CodeGen/MachineTraceMetrics.cpp
@@ -9,18 +9,19 @@
 
 #define DEBUG_TYPE "machine-trace-metrics"
 #include "MachineTraceMetrics.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SparseSet.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/MC/MCInstrItineraries.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/ADT/SparseSet.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 
 using namespace llvm;
 
@@ -50,9 +51,11 @@ bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) {
   MF = &Func;
   TII = MF->getTarget().getInstrInfo();
   TRI = MF->getTarget().getRegisterInfo();
-  ItinData = MF->getTarget().getInstrItineraryData();
   MRI = &MF->getRegInfo();
   Loops = &getAnalysis<MachineLoopInfo>();
+  const TargetSubtargetInfo &ST =
+    MF->getTarget().getSubtarget<TargetSubtargetInfo>();
+  SchedModel.init(*ST.getSchedModel(), &ST, TII);
   BlockInfo.resize(MF->getNumBlockIDs());
   return false;
 }
@@ -674,7 +677,7 @@ computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) {
     const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
     // Ignore dependencies outside the current trace.
     const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()];
-    if (!DefTBI.hasValidDepth() || DefTBI.Head != TBI.Head)
+    if (!DefTBI.isEarlierInSameTrace(TBI))
       continue;
     unsigned Len = LIR.Height + Cycles[DefMI].Depth;
     MaxLen = std::max(MaxLen, Len);
@@ -737,16 +740,15 @@ computeInstrDepths(const MachineBasicBlock *MBB) {
         const TraceBlockInfo&DepTBI =
           BlockInfo[Dep.DefMI->getParent()->getNumber()];
         // Ignore dependencies from outside the current trace.
-        if (!DepTBI.hasValidDepth() || DepTBI.Head != TBI.Head)
+        if (!DepTBI.isEarlierInSameTrace(TBI))
           continue;
         assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency");
         unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth;
         // Add latency if DefMI is a real instruction. Transients get latency 0.
         if (!Dep.DefMI->isTransient())
-          DepCycle += MTM.TII->computeOperandLatency(MTM.ItinData,
-                                                     Dep.DefMI, Dep.DefOp,
-                                                     UseMI, Dep.UseOp,
-                                                     /* FindMin = */ false);
+          DepCycle += MTM.SchedModel
+            .computeOperandLatency(Dep.DefMI, Dep.DefOp, UseMI, Dep.UseOp,
+                                   /* FindMin = */ false);
         Cycle = std::max(Cycle, DepCycle);
       }
       // Remember the instruction depth.
@@ -769,7 +771,7 @@ computeInstrDepths(const MachineBasicBlock *MBB) {
 // Height is the issue height computed from virtual register dependencies alone.
 static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height,
                                       SparseSet<LiveRegUnit> &RegUnits,
-                                      const InstrItineraryData *ItinData,
+                                      const TargetSchedModel &SchedModel,
                                       const TargetInstrInfo *TII,
                                       const TargetRegisterInfo *TRI) {
   SmallVector<unsigned, 8> ReadOps;
@@ -792,14 +794,10 @@ static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height,
       unsigned DepHeight = I->Cycle;
       if (!MI->isTransient()) {
         // We may not know the UseMI of this dependency, if it came from the
-        // live-in list.
-        if (I->MI)
-          DepHeight += TII->computeOperandLatency(ItinData,
-                                                  MI, MO.getOperandNo(),
-                                                  I->MI, I->Op);
-        else
-          // No UseMI. Just use the MI latency instead.
-          DepHeight += TII->getInstrLatency(ItinData, MI);
+        // live-in list. SchedModel can handle a NULL UseMI.
+        DepHeight += SchedModel
+          .computeOperandLatency(MI, MO.getOperandNo(), I->MI, I->Op,
+                                 /* FindMin = */ false);
       }
       Height = std::max(Height, DepHeight);
       // This regunit is dead above MI.
@@ -832,12 +830,12 @@ typedef DenseMap<const MachineInstr *, unsigned> MIHeightMap;
 static bool pushDepHeight(const DataDep &Dep,
                           const MachineInstr *UseMI, unsigned UseHeight,
                           MIHeightMap &Heights,
-                          const InstrItineraryData *ItinData,
+                          const TargetSchedModel &SchedModel,
                           const TargetInstrInfo *TII) {
   // Adjust height by Dep.DefMI latency.
   if (!Dep.DefMI->isTransient())
-    UseHeight += TII->computeOperandLatency(ItinData, Dep.DefMI, Dep.DefOp,
-                                            UseMI, Dep.UseOp);
+    UseHeight += SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp,
+                                                  UseMI, Dep.UseOp, false);
 
   // Update Heights[DefMI] to be the maximum height seen.
   MIHeightMap::iterator I;
@@ -852,14 +850,14 @@ static bool pushDepHeight(const DataDep &Dep,
   return false;
 }
 
-/// Assuming that DefMI was used by Trace.back(), add it to the live-in lists
-/// of all the blocks in Trace. Stop when reaching the block that contains
-/// DefMI.
+/// Assuming that the virtual register defined by DefMI:DefOp was used by
+/// Trace.back(), add it to the live-in lists of all the blocks in Trace. Stop
+/// when reaching the block that contains DefMI.
 void MachineTraceMetrics::Ensemble::
-addLiveIns(const MachineInstr *DefMI,
+addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
            ArrayRef<const MachineBasicBlock*> Trace) {
   assert(!Trace.empty() && "Trace should contain at least one block");
-  unsigned Reg = DefMI->getOperand(0).getReg();
+  unsigned Reg = DefMI->getOperand(DefOp).getReg();
   assert(TargetRegisterInfo::isVirtualRegister(Reg));
   const MachineBasicBlock *DefMBB = DefMI->getParent();
 
@@ -951,8 +949,8 @@ computeInstrHeights(const MachineBasicBlock *MBB) {
           unsigned Height = TBI.Succ ? Cycles.lookup(PHI).Height : 0;
           DEBUG(dbgs() << "pred\t" << Height << '\t' << *PHI);
           if (pushDepHeight(Deps.front(), PHI, Height,
-                            Heights, MTM.ItinData, MTM.TII))
-            addLiveIns(Deps.front().DefMI, Stack);
+                            Heights, MTM.SchedModel, MTM.TII))
+            addLiveIns(Deps.front().DefMI, Deps.front().DefOp, Stack);
         }
       }
     }
@@ -980,12 +978,12 @@ computeInstrHeights(const MachineBasicBlock *MBB) {
       // There may also be regunit dependencies to include in the height.
       if (HasPhysRegs)
         Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits,
-                                      MTM.ItinData, MTM.TII, MTM.TRI);
+                                      MTM.SchedModel, MTM.TII, MTM.TRI);
 
       // Update the required height of any virtual registers read by MI.
       for (unsigned i = 0, e = Deps.size(); i != e; ++i)
-        if (pushDepHeight(Deps[i], MI, Cycle, Heights, MTM.ItinData, MTM.TII))
-          addLiveIns(Deps[i].DefMI, Stack);
+        if (pushDepHeight(Deps[i], MI, Cycle, Heights, MTM.SchedModel, MTM.TII))
+          addLiveIns(Deps[i].DefMI, Deps[i].DefOp, Stack);
 
       InstrCycles &MICycles = Cycles[MI];
       MICycles.Height = Cycle;
@@ -1054,10 +1052,8 @@ MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr *PHI) const {
   unsigned DepCycle = getInstrCycles(Dep.DefMI).Depth;
   // Add latency if DefMI is a real instruction. Transients get latency 0.
   if (!Dep.DefMI->isTransient())
-    DepCycle += TE.MTM.TII->computeOperandLatency(TE.MTM.ItinData,
-                                                  Dep.DefMI, Dep.DefOp,
-                                                  PHI, Dep.UseOp,
-                                                  /* FindMin = */ false);
+    DepCycle += TE.MTM.SchedModel
+      .computeOperandLatency(Dep.DefMI, Dep.DefOp, PHI, Dep.UseOp, false);
   return DepCycle;
 }
 
@@ -1068,9 +1064,8 @@ unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {
   unsigned Instrs = TBI.InstrDepth;
   if (Bottom)
     Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount;
-  if (const MCSchedModel *Model = TE.MTM.ItinData->SchedModel)
-    if (Model->IssueWidth != 0)
-      return Instrs / Model->IssueWidth;
+  if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
+    Instrs /= IW;
   // Assume issue width 1 without a schedule model.
   return Instrs;
 }
@@ -1080,9 +1075,8 @@ getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks) const {
   unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;
   for (unsigned i = 0, e = Extrablocks.size(); i != e; ++i)
     Instrs += TE.MTM.getResources(Extrablocks[i])->InstrCount;
-  if (const MCSchedModel *Model = TE.MTM.ItinData->SchedModel)
-    if (Model->IssueWidth != 0)
-      return Instrs / Model->IssueWidth;
+  if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
+    Instrs /= IW;
   // Assume issue width 1 without a schedule model.
   return Instrs;
 }
diff --git a/lib/CodeGen/MachineTraceMetrics.h b/lib/CodeGen/MachineTraceMetrics.h
index c5b86f3..460730b 100644
--- a/lib/CodeGen/MachineTraceMetrics.h
+++ b/lib/CodeGen/MachineTraceMetrics.h
@@ -50,6 +50,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/TargetSchedule.h"
 
 namespace llvm {
 
@@ -67,9 +68,9 @@ class MachineTraceMetrics : public MachineFunctionPass {
   const MachineFunction *MF;
   const TargetInstrInfo *TII;
   const TargetRegisterInfo *TRI;
-  const InstrItineraryData *ItinData;
   const MachineRegisterInfo *MRI;
   const MachineLoopInfo *Loops;
+  TargetSchedModel SchedModel;
 
 public:
   class Ensemble;
@@ -164,6 +165,14 @@ public:
     /// Invalidate height resources when a block below this one has changed.
     void invalidateHeight() { InstrHeight = ~0u; HasValidInstrHeights = false; }
 
+    /// Determine if this block belongs to the same trace as TBI and comes
+    /// before it in the trace.
+    /// Also returns true when TBI == this.
+    bool isEarlierInSameTrace(const TraceBlockInfo &TBI) const {
+      return hasValidDepth() && TBI.hasValidDepth() &&
+        Head == TBI.Head && InstrDepth <= TBI.InstrDepth;
+    }
+
     // Data-dependency-related information. Per-instruction depth and height
     // are computed from data dependencies in the current trace, using
     // itinerary data.
@@ -270,7 +279,7 @@ public:
     unsigned computeCrossBlockCriticalPath(const TraceBlockInfo&);
     void computeInstrDepths(const MachineBasicBlock*);
     void computeInstrHeights(const MachineBasicBlock*);
-    void addLiveIns(const MachineInstr *DefMI,
+    void addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
                     ArrayRef<const MachineBasicBlock*> Trace);
 
   protected:
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 181e09e..4b12300 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -23,28 +23,28 @@
 // the verifier errors.
 //===----------------------------------------------------------------------===//
 
-#include "llvm/BasicBlock.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/Instructions.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
-#include "llvm/CodeGen/MachineInstrBundle.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/MC/MCAsmInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/SetOperations.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 namespace {
@@ -80,7 +80,6 @@ namespace {
     BlockSet FunctionBlocks;
 
     BitVector regsReserved;
-    BitVector regsAllocatable;
     RegSet regsLive;
     RegVector regsDefined, regsDead, regsKilled;
     RegMaskVector regMasks;
@@ -186,7 +185,7 @@ namespace {
     }
 
     bool isAllocatable(unsigned Reg) {
-      return Reg < regsAllocatable.size() && regsAllocatable.test(Reg);
+      return Reg < TRI->getNumRegs() && MRI->isAllocatable(Reg);
     }
 
     // Analysis information if available
@@ -308,6 +307,9 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
     visitMachineBasicBlockBefore(MFI);
     // Keep track of the current bundle header.
     const MachineInstr *CurBundle = 0;
+    // Do we expect the next instruction to be part of the same bundle?
+    bool InBundle = false;
+
     for (MachineBasicBlock::const_instr_iterator MBBI = MFI->instr_begin(),
            MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) {
       if (MBBI->getParent() != MFI) {
@@ -315,6 +317,15 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
         *OS << "Instruction: " << *MBBI;
         continue;
       }
+
+      // Check for consistent bundle flags.
+      if (InBundle && !MBBI->isBundledWithPred())
+        report("Missing BundledPred flag, "
+               "BundledSucc was set on predecessor", MBBI);
+      if (!InBundle && MBBI->isBundledWithPred())
+        report("BundledPred flag is set, "
+               "but BundledSucc not set on predecessor", MBBI);
+
       // Is this a bundle header?
       if (!MBBI->isInsideBundle()) {
         if (CurBundle)
@@ -327,9 +338,14 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
       for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I)
         visitMachineOperand(&MBBI->getOperand(I), I);
       visitMachineInstrAfter(MBBI);
+
+      // Was this the last bundled instruction?
+      InBundle = MBBI->isBundledWithSucc();
     }
     if (CurBundle)
       visitMachineBundleAfter(CurBundle);
+    if (InBundle)
+      report("BundledSucc flag set on last instruction in block", &MFI->back());
     visitMachineBasicBlockAfter(MFI);
   }
   visitMachineFunctionAfter();
@@ -427,7 +443,7 @@ void MachineVerifier::markReachable(const MachineBasicBlock *MBB) {
 
 void MachineVerifier::visitMachineFunctionBefore() {
   lastIndex = SlotIndex();
-  regsReserved = TRI->getReservedRegs(*MF);
+  regsReserved = MRI->getReservedRegs();
 
   // A sub-register of a reserved register is also reserved
   for (int Reg = regsReserved.find_first(); Reg>=0;
@@ -439,8 +455,6 @@ void MachineVerifier::visitMachineFunctionBefore() {
     }
   }
 
-  regsAllocatable = TRI->getAllocatableSet(*MF);
-
   markReachable(&MF->front());
 
   // Build a set of the basic blocks in the function.
@@ -583,7 +597,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
       ++MBBI;
       if (MBBI == MF->end()) {
         report("MBB conditionally falls through out of function!", MBB);
-      } if (MBB->succ_size() == 1) {
+      } else if (MBB->succ_size() == 1) {
         // A conditional branch with only one successor is weird, but allowed.
         if (&*MBBI != TBB)
           report("MBB exits via conditional branch/fall-through but only has "
@@ -710,8 +724,9 @@ void MachineVerifier::verifyInlineAsm(const MachineInstr *MI) {
     report("Asm string must be an external symbol", MI);
   if (!MI->getOperand(1).isImm())
     report("Asm flags must be an immediate", MI);
-  // Allowed flags are Extra_HasSideEffects = 1, and Extra_IsAlignStack = 2.
-  if (!isUInt<2>(MI->getOperand(1).getImm()))
+  // Allowed flags are Extra_HasSideEffects = 1, Extra_IsAlignStack = 2,
+  // Extra_AsmDialect = 4, Extra_MayLoad = 8, and Extra_MayStore = 16.
+  if (!isUInt<5>(MI->getOperand(1).getImm()))
     report("Unknown asm flags", &MI->getOperand(1), 1);
 
   assert(InlineAsm::MIOp_FirstOperand == 2 && "Asm format changed");
diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp
index 6da313e..3982612 100644
--- a/lib/CodeGen/OptimizePHIs.cpp
+++ b/lib/CodeGen/OptimizePHIs.cpp
@@ -14,13 +14,13 @@
 
 #define DEBUG_TYPE "phi-opt"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Function.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
 STATISTIC(NumPHICycles, "Number of PHI cycles replaced");
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index e6e23da..4daa211 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -14,23 +14,23 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "phielim"
+#include "llvm/CodeGen/Passes.h"
 #include "PHIEliminationUtils.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LiveVariables.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Function.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
 #include <algorithm>
 using namespace llvm;
 
diff --git a/lib/CodeGen/PHIEliminationUtils.cpp b/lib/CodeGen/PHIEliminationUtils.cpp
index 10bfdcc..e1b56e9 100644
--- a/lib/CodeGen/PHIEliminationUtils.cpp
+++ b/lib/CodeGen/PHIEliminationUtils.cpp
@@ -8,10 +8,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "PHIEliminationUtils.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/ADT/SmallPtrSet.h"
 using namespace llvm;
 
 // findCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index a6dd5de..2a135bc 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -12,21 +12,22 @@
 //
 //===---------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/Verifier.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/PassManager.h"
+#include "llvm/Assembly/PrintModulePass.h"
 #include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetOptions.h"
 #include "llvm/MC/MCAsmInfo.h"
-#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Transforms/Scalar.h"
 
 using namespace llvm;
 
@@ -49,8 +50,8 @@ static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden,
     cl::desc("Disable Stack Slot Coloring"));
 static cl::opt<bool> DisableMachineDCE("disable-machine-dce", cl::Hidden,
     cl::desc("Disable Machine Dead Code Elimination"));
-static cl::opt<bool> EnableEarlyIfConversion("enable-early-ifcvt", cl::Hidden,
-    cl::desc("Enable Early If-conversion"));
+static cl::opt<bool> DisableEarlyIfConversion("disable-early-ifcvt", cl::Hidden,
+    cl::desc("Disable Early If-conversion"));
 static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden,
     cl::desc("Disable Machine LICM"));
 static cl::opt<bool> DisableMachineCSE("disable-machine-cse", cl::Hidden,
@@ -161,7 +162,7 @@ static AnalysisID overridePass(AnalysisID StandardID, AnalysisID TargetID) {
     return applyDisable(TargetID, DisableMachineDCE);
 
   if (StandardID == &EarlyIfConverterID)
-    return applyDisable(TargetID, !EnableEarlyIfConversion);
+    return applyDisable(TargetID, DisableEarlyIfConversion);
 
   if (StandardID == &MachineLICMID)
     return applyDisable(TargetID, DisableMachineLICM);
@@ -241,7 +242,9 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
   disablePass(&EarlyIfConverterID);
 
   // Temporarily disable experimental passes.
-  substitutePass(&MachineSchedulerID, 0);
+  const TargetSubtargetInfo &ST = TM->getSubtarget<TargetSubtargetInfo>();
+  if (!ST.enableMachineScheduler())
+    disablePass(&MachineSchedulerID);
 }
 
 /// Insert InsertedPassID pass after TargetPassID.
@@ -359,7 +362,7 @@ void TargetPassConfig::addIRPasses() {
 
   // Run loop strength reduction before anything else.
   if (getOptLevel() != CodeGenOpt::None && !DisableLSR) {
-    addPass(createLoopStrengthReducePass(getTargetLowering()));
+    addPass(createLoopStrengthReducePass());
     if (PrintLSR)
       addPass(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs()));
   }
@@ -397,12 +400,16 @@ void TargetPassConfig::addPassesToHandleExceptions() {
   }
 }
 
-/// Add common passes that perform LLVM IR to IR transforms in preparation for
-/// instruction selection.
-void TargetPassConfig::addISelPrepare() {
+/// Add pass to prepare the LLVM IR for code generation. This should be done
+/// before exception handling preparation passes.
+void TargetPassConfig::addCodeGenPrepare() {
   if (getOptLevel() != CodeGenOpt::None && !DisableCGP)
     addPass(createCodeGenPreparePass(getTargetLowering()));
+}
 
+/// Add common passes that perform LLVM IR to IR transforms in preparation for
+/// instruction selection.
+void TargetPassConfig::addISelPrepare() {
   addPass(createStackProtectorPass(getTargetLowering()));
 
   addPreISel();
@@ -462,8 +469,7 @@ void TargetPassConfig::addMachinePasses() {
   // Add passes that optimize machine instructions in SSA form.
   if (getOptLevel() != CodeGenOpt::None) {
     addMachineSSAOptimization();
-  }
-  else {
+  } else {
     // If the target requests it, assign local variables to stack slots relative
     // to one another and simplify frame index references where possible.
     addPass(&LocalStackSlotAllocationID);
@@ -507,9 +513,10 @@ void TargetPassConfig::addMachinePasses() {
   }
 
   // GC
-  addPass(&GCMachineCodeAnalysisID);
-  if (PrintGCInfo)
-    addPass(createGCInfoPrinter(dbgs()));
+  if (addGCPasses()) {
+    if (PrintGCInfo)
+      addPass(createGCInfoPrinter(dbgs()));
+  }
 
   // Basic block placement.
   if (getOptLevel() != CodeGenOpt::None)
@@ -726,6 +733,12 @@ void TargetPassConfig::addMachineLateOptimization() {
     printAndVerify("After copy propagation pass");
 }
 
+/// Add standard GC passes.
+bool TargetPassConfig::addGCPasses() {
+  addPass(&GCMachineCodeAnalysisID);
+  return true;
+}
+
 /// Add standard basic block placement passes.
 void TargetPassConfig::addBlockPlacement() {
   AnalysisID PassID = 0;
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index 9099862..a7439b5 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -49,20 +49,26 @@
 //     v1 = bitcast v0
 //        = v0
 //
+// - Optimize Loads:
+//
+//     Loads that can be folded into a later instruction. A load is foldable
+//     if it loads to virtual registers and the virtual register defined has 
+//     a single use.
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "peephole-opt"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
 // Optimize Extensions
@@ -473,6 +479,9 @@ bool PeepholeOptimizer::foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
 }
 
 bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
+  DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n");
+  DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n');
+
   if (DisablePeephole)
     return false;
 
@@ -527,6 +536,11 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
         SeenMoveImm = true;
       } else {
         Changed |= optimizeExtInstr(MI, MBB, LocalMIs);
+        // optimizeExtInstr might have created new instructions after MI
+        // and before the already incremented MII. Adjust MII so that the
+        // next iteration sees the new instructions.
+        MII = MI;
+        ++MII;
         if (SeenMoveImm)
           Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
       }
@@ -542,6 +556,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
                                                       FoldAsLoadDefReg, DefMI);
         if (FoldMI) {
           // Update LocalMIs since we replaced MI with FoldMI and deleted DefMI.
+          DEBUG(dbgs() << "Replacing: " << *MI);
+          DEBUG(dbgs() << "     With: " << *FoldMI);
           LocalMIs.erase(MI);
           LocalMIs.erase(DefMI);
           LocalMIs.insert(FoldMI);
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index 6090752..488f1d4 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -19,32 +19,33 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "post-RA-sched"
-#include "AntiDepBreaker.h"
+#include "llvm/CodeGen/Passes.h"
 #include "AggressiveAntiDepBreaker.h"
+#include "AntiDepBreaker.h"
 #include "CriticalAntiDepBreaker.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/LatencyPriorityQueue.h"
-#include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 using namespace llvm;
 
 STATISTIC(NumNoops, "Number of noops inserted");
@@ -111,9 +112,6 @@ namespace {
     /// added to the AvailableQueue.
     std::vector<SUnit*> PendingQueue;
 
-    /// Topo - A topological ordering for SUnits.
-    ScheduleDAGTopologicalSort Topo;
-
     /// HazardRec - The hazard recognizer to use.
     ScheduleHazardRecognizer *HazardRec;
 
@@ -198,7 +196,7 @@ SchedulePostRATDList::SchedulePostRATDList(
   AliasAnalysis *AA, const RegisterClassInfo &RCI,
   TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
   SmallVectorImpl<const TargetRegisterClass*> &CriticalPathRCs)
-  : ScheduleDAGInstrs(MF, MLI, MDT, /*IsPostRA=*/true), Topo(SUnits), AA(AA),
+  : ScheduleDAGInstrs(MF, MLI, MDT, /*IsPostRA=*/true), AA(AA),
     LiveRegs(TRI->getNumRegs())
 {
   const TargetMachine &TM = MF.getTarget();
@@ -240,7 +238,7 @@ void SchedulePostRATDList::exitRegion() {
   ScheduleDAGInstrs::exitRegion();
 }
 
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 /// dumpSchedule - dump the scheduled Sequence.
 void SchedulePostRATDList::dumpSchedule() const {
   for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
@@ -467,13 +465,10 @@ bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI,
   MO.setIsKill(false);
   bool AllDead = true;
   const unsigned SuperReg = MO.getReg();
+  MachineInstrBuilder MIB(MF, MI);
   for (MCSubRegIterator SubRegs(SuperReg, TRI); SubRegs.isValid(); ++SubRegs) {
     if (LiveRegs.test(*SubRegs)) {
-      MI->addOperand(MachineOperand::CreateReg(*SubRegs,
-                                               true  /*IsDef*/,
-                                               true  /*IsImp*/,
-                                               false /*IsKill*/,
-                                               false /*IsDead*/));
+      MIB.addReg(*SubRegs, RegState::ImplicitDefine);
       AllDead = false;
     }
   }
@@ -490,7 +485,6 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
   DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n');
 
   BitVector killedRegs(TRI->getNumRegs());
-  BitVector ReservedRegs = TRI->getReservedRegs(MF);
 
   StartBlockForKills(MBB);
 
@@ -531,7 +525,7 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
       MachineOperand &MO = MI->getOperand(i);
       if (!MO.isReg() || !MO.isUse()) continue;
       unsigned Reg = MO.getReg();
-      if ((Reg == 0) || ReservedRegs.test(Reg)) continue;
+      if ((Reg == 0) || MRI.isReserved(Reg)) continue;
 
       bool kill = false;
       if (!killedRegs.test(Reg)) {
@@ -566,7 +560,7 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
       MachineOperand &MO = MI->getOperand(i);
       if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
       unsigned Reg = MO.getReg();
-      if ((Reg == 0) || ReservedRegs.test(Reg)) continue;
+      if ((Reg == 0) || MRI.isReserved(Reg)) continue;
 
       LiveRegs.set(Reg);
 
@@ -581,10 +575,14 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
 //===----------------------------------------------------------------------===//
 
 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
-/// the PendingQueue if the count reaches zero. Also update its cycle bound.
+/// the PendingQueue if the count reaches zero.
 void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) {
   SUnit *SuccSU = SuccEdge->getSUnit();
 
+  if (SuccEdge->isWeak()) {
+    --SuccSU->WeakPredsLeft;
+    return;
+  }
 #ifndef NDEBUG
   if (SuccSU->NumPredsLeft == 0) {
     dbgs() << "*** Scheduling failed! ***\n";
@@ -654,8 +652,7 @@ void SchedulePostRATDList::ListScheduleTopDown() {
   // Add all leaves to Available queue.
   for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
     // It is available if it has no predecessors.
-    bool available = SUnits[i].Preds.empty();
-    if (available) {
+    if (!SUnits[i].NumPredsLeft && !SUnits[i].isAvailable) {
       AvailableQueue.push(&SUnits[i]);
       SUnits[i].isAvailable = true;
     }
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index c791ffb..1d0e71e 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -21,25 +21,25 @@
 
 #define DEBUG_TYPE "pei"
 #include "PrologEpilogInserter.h"
-#include "llvm/InlineAsm.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/IR/InlineAsm.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/ADT/IndexedMap.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <climits>
 
 using namespace llvm;
@@ -96,7 +96,8 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
   placeCSRSpillsAndRestores(Fn);
 
   // Add the code to save and restore the callee saved registers
-  if (!F->hasFnAttr(Attribute::Naked))
+  if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                       Attribute::Naked))
     insertCSRSpillsAndRestores(Fn);
 
   // Allow the target machine to make final modifications to the function
@@ -111,7 +112,8 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
   // called functions.  Because of this, calculateCalleeSavedRegisters()
   // must be called before this function in order to set the AdjustsStack
   // and MaxCallFrameSize variables.
-  if (!F->hasFnAttr(Attribute::Naked))
+  if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                       Attribute::Naked))
     insertPrologEpilogCode(Fn);
 
   // Replace all MO_FrameIndex operands with physical register references
@@ -133,19 +135,6 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
   return true;
 }
 
-#if 0
-void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.setPreservesCFG();
-  if (ShrinkWrapping || ShrinkWrapFunc != "") {
-    AU.addRequired<MachineLoopInfo>();
-    AU.addRequired<MachineDominatorTree>();
-  }
-  AU.addPreserved<MachineLoopInfo>();
-  AU.addPreserved<MachineDominatorTree>();
-  MachineFunctionPass::getAnalysisUsage(AU);
-}
-#endif
-
 /// calculateCallsInformation - Calculate the MaxCallFrameSize and AdjustsStack
 /// variables for the function's frame information and eliminate call frame
 /// pseudo instructions.
@@ -204,13 +193,13 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) {
 
 /// calculateCalleeSavedRegisters - Scan the function for modified callee saved
 /// registers.
-void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
-  const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
-  const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
-  MachineFrameInfo *MFI = Fn.getFrameInfo();
+void PEI::calculateCalleeSavedRegisters(MachineFunction &F) {
+  const TargetRegisterInfo *RegInfo = F.getTarget().getRegisterInfo();
+  const TargetFrameLowering *TFI = F.getTarget().getFrameLowering();
+  MachineFrameInfo *MFI = F.getFrameInfo();
 
   // Get the callee saved register list...
-  const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&Fn);
+  const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&F);
 
   // These are used to keep track the callee-save area. Initialize them.
   MinCSFrameIndex = INT_MAX;
@@ -221,13 +210,14 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
     return;
 
   // In Naked functions we aren't going to save any registers.
-  if (Fn.getFunction()->hasFnAttr(Attribute::Naked))
+  if (F.getFunction()->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                                    Attribute::Naked))
     return;
 
   std::vector<CalleeSavedInfo> CSI;
   for (unsigned i = 0; CSRegs[i]; ++i) {
     unsigned Reg = CSRegs[i];
-    if (Fn.getRegInfo().isPhysRegOrOverlapUsed(Reg)) {
+    if (F.getRegInfo().isPhysRegUsed(Reg)) {
       // If the reg is modified, save it!
       CSI.push_back(CalleeSavedInfo(Reg));
     }
@@ -248,7 +238,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
     const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
 
     int FrameIdx;
-    if (RegInfo->hasReservedSpillSlot(Fn, Reg, FrameIdx)) {
+    if (RegInfo->hasReservedSpillSlot(F, Reg, FrameIdx)) {
       I->setFrameIdx(FrameIdx);
       continue;
     }
diff --git a/lib/CodeGen/PrologEpilogInserter.h b/lib/CodeGen/PrologEpilogInserter.h
index 0d140a9..87fff9a 100644
--- a/lib/CodeGen/PrologEpilogInserter.h
+++ b/lib/CodeGen/PrologEpilogInserter.h
@@ -22,11 +22,11 @@
 #ifndef LLVM_CODEGEN_PEI_H
 #define LLVM_CODEGEN_PEI_H
 
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SparseBitVector.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/ADT/SparseBitVector.h"
-#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 
 namespace llvm {
diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp
index 49599b3..8564911 100644
--- a/lib/CodeGen/PseudoSourceValue.cpp
+++ b/lib/CodeGen/PseudoSourceValue.cpp
@@ -11,14 +11,14 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/LLVMContext.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Mutex.h"
+#include "llvm/Support/raw_ostream.h"
 #include <map>
 using namespace llvm;
 
diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp
index 993dbc7..c035590 100644
--- a/lib/CodeGen/RegAllocBase.cpp
+++ b/lib/CodeGen/RegAllocBase.cpp
@@ -14,14 +14,14 @@
 
 #define DEBUG_TYPE "regalloc"
 #include "RegAllocBase.h"
-#include "LiveRegMatrix.h"
 #include "Spiller.h"
-#include "VirtRegMap.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #ifndef NDEBUG
diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h
index db0c8e1..064e40f 100644
--- a/lib/CodeGen/RegAllocBase.h
+++ b/lib/CodeGen/RegAllocBase.h
@@ -37,9 +37,9 @@
 #ifndef LLVM_CODEGEN_REGALLOCBASE
 #define LLVM_CODEGEN_REGALLOCBASE
 
-#include "LiveIntervalUnion.h"
-#include "llvm/CodeGen/RegisterClassInfo.h"
 #include "llvm/ADT/OwningPtr.h"
+#include "llvm/CodeGen/LiveIntervalUnion.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
 
 namespace llvm {
 
diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp
index 8a49609..3053119 100644
--- a/lib/CodeGen/RegAllocBasic.cpp
+++ b/lib/CodeGen/RegAllocBasic.cpp
@@ -13,30 +13,29 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "regalloc"
+#include "llvm/CodeGen/Passes.h"
 #include "AllocationOrder.h"
-#include "RegAllocBase.h"
 #include "LiveDebugVariables.h"
+#include "RegAllocBase.h"
 #include "Spiller.h"
-#include "VirtRegMap.h"
-#include "LiveRegMatrix.h"
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/PassAnalysisSupport.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/PassAnalysisSupport.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
 #include <cstdlib>
 #include <queue>
 
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index f573d41..8d849dc 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -13,28 +13,28 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "regalloc"
-#include "llvm/BasicBlock.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SparseSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/IR/BasicBlock.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/IndexedMap.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/SparseSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -113,9 +113,11 @@ namespace {
     // PhysRegState - One of the RegState enums, or a virtreg.
     std::vector<unsigned> PhysRegState;
 
-    // UsedInInstr - BitVector of physregs that are used in the current
-    // instruction, and so cannot be allocated.
-    BitVector UsedInInstr;
+    typedef SparseSet<unsigned> UsedInInstrSet;
+
+    // UsedInInstr - Set of physregs that are used in the current instruction,
+    // and so cannot be allocated.
+    UsedInInstrSet UsedInInstr;
 
     // SkippedInstrs - Descriptors of instructions whose clobber list was
     // ignored because all registers were spilled. It is still necessary to
@@ -173,7 +175,7 @@ namespace {
                                        unsigned VirtReg, unsigned Hint);
     LiveRegMap::iterator reloadVirtReg(MachineInstr *MI, unsigned OpNum,
                                        unsigned VirtReg, unsigned Hint);
-    void spillAll(MachineInstr *MI);
+    void spillAll(MachineBasicBlock::iterator MI);
     bool setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg);
     void addRetOperands(MachineBasicBlock *MBB);
   };
@@ -312,7 +314,7 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
 }
 
 /// spillAll - Spill all dirty virtregs without killing them.
-void RAFast::spillAll(MachineInstr *MI) {
+void RAFast::spillAll(MachineBasicBlock::iterator MI) {
   if (LiveVirtRegs.empty()) return;
   isBulkSpilling = true;
   // The LiveRegMap is keyed by an unsigned (the virtreg number), so the order
@@ -340,7 +342,7 @@ void RAFast::usePhysReg(MachineOperand &MO) {
     PhysRegState[PhysReg] = regFree;
     // Fall through
   case regFree:
-    UsedInInstr.set(PhysReg);
+    UsedInInstr.insert(PhysReg);
     MO.setIsKill();
     return;
   default:
@@ -360,13 +362,13 @@ void RAFast::usePhysReg(MachineOperand &MO) {
              "Instruction is not using a subregister of a reserved register");
       // Leave the superregister in the working set.
       PhysRegState[Alias] = regFree;
-      UsedInInstr.set(Alias);
+      UsedInInstr.insert(Alias);
       MO.getParent()->addRegisterKilled(Alias, TRI, true);
       return;
     case regFree:
       if (TRI->isSuperRegister(PhysReg, Alias)) {
         // Leave the superregister in the working set.
-        UsedInInstr.set(Alias);
+        UsedInInstr.insert(Alias);
         MO.getParent()->addRegisterKilled(Alias, TRI, true);
         return;
       }
@@ -380,7 +382,7 @@ void RAFast::usePhysReg(MachineOperand &MO) {
 
   // All aliases are disabled, bring register into working set.
   PhysRegState[PhysReg] = regFree;
-  UsedInInstr.set(PhysReg);
+  UsedInInstr.insert(PhysReg);
   MO.setIsKill();
 }
 
@@ -389,7 +391,7 @@ void RAFast::usePhysReg(MachineOperand &MO) {
 /// reserved instead of allocated.
 void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg,
                            RegState NewState) {
-  UsedInInstr.set(PhysReg);
+  UsedInInstr.insert(PhysReg);
   switch (unsigned VirtReg = PhysRegState[PhysReg]) {
   case regDisabled:
     break;
@@ -429,7 +431,7 @@ void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg,
 // can be allocated directly.
 // Returns spillImpossible when PhysReg or an alias can't be spilled.
 unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
-  if (UsedInInstr.test(PhysReg)) {
+  if (UsedInInstr.count(PhysReg)) {
     DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is already used in instr.\n");
     return spillImpossible;
   }
@@ -454,7 +456,7 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
   unsigned Cost = 0;
   for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
     unsigned Alias = *AI;
-    if (UsedInInstr.test(Alias))
+    if (UsedInInstr.count(Alias))
       return spillImpossible;
     switch (unsigned VirtReg = PhysRegState[Alias]) {
     case regDisabled:
@@ -509,7 +511,7 @@ RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI,
 
   // Ignore invalid hints.
   if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) ||
-               !RC->contains(Hint) || !RegClassInfo.isAllocatable(Hint)))
+               !RC->contains(Hint) || !MRI->isAllocatable(Hint)))
     Hint = 0;
 
   // Take hint when possible.
@@ -525,12 +527,12 @@ RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI,
     }
   }
 
-  ArrayRef<unsigned> AO = RegClassInfo.getOrder(RC);
+  ArrayRef<MCPhysReg> AO = RegClassInfo.getOrder(RC);
 
   // First try to find a completely free register.
-  for (ArrayRef<unsigned>::iterator I = AO.begin(), E = AO.end(); I != E; ++I) {
+  for (ArrayRef<MCPhysReg>::iterator I = AO.begin(), E = AO.end(); I != E; ++I){
     unsigned PhysReg = *I;
-    if (PhysRegState[PhysReg] == regFree && !UsedInInstr.test(PhysReg)) {
+    if (PhysRegState[PhysReg] == regFree && !UsedInInstr.count(PhysReg)) {
       assignVirtToPhysReg(*LRI, PhysReg);
       return LRI;
     }
@@ -540,7 +542,7 @@ RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI,
                << RC->getName() << "\n");
 
   unsigned BestReg = 0, BestCost = spillImpossible;
-  for (ArrayRef<unsigned>::iterator I = AO.begin(), E = AO.end(); I != E; ++I) {
+  for (ArrayRef<MCPhysReg>::iterator I = AO.begin(), E = AO.end(); I != E; ++I){
     unsigned Cost = calcSpillCost(*I);
     DEBUG(dbgs() << "\tRegister: " << PrintReg(*I, TRI) << "\n");
     DEBUG(dbgs() << "\tCost: " << Cost << "\n");
@@ -596,7 +598,7 @@ RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum,
   LRI->LastUse = MI;
   LRI->LastOpNum = OpNum;
   LRI->Dirty = true;
-  UsedInInstr.set(LRI->PhysReg);
+  UsedInInstr.insert(LRI->PhysReg);
   return LRI;
 }
 
@@ -646,7 +648,7 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum,
   assert(LRI->PhysReg && "Register not assigned");
   LRI->LastUse = MI;
   LRI->LastOpNum = OpNum;
-  UsedInInstr.set(LRI->PhysReg);
+  UsedInInstr.insert(LRI->PhysReg);
   return LRI;
 }
 
@@ -708,7 +710,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
     unsigned Reg = MO.getReg();
     if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
     for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
-      UsedInInstr.set(*AI);
+      UsedInInstr.insert(*AI);
       if (ThroughRegs.count(PhysRegState[*AI]))
         definePhysReg(MI, *AI, regFree);
     }
@@ -756,7 +758,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
   }
 
   // Restore UsedInInstr to a state usable for allocating normal virtual uses.
-  UsedInInstr.reset();
+  UsedInInstr.clear();
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     MachineOperand &MO = MI->getOperand(i);
     if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue;
@@ -764,12 +766,12 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
     if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
     DEBUG(dbgs() << "\tSetting " << PrintReg(Reg, TRI)
                  << " as used in instr\n");
-    UsedInInstr.set(Reg);
+    UsedInInstr.insert(Reg);
   }
 
   // Also mark PartialDefs as used to avoid reallocation.
   for (unsigned i = 0, e = PartialDefs.size(); i != e; ++i)
-    UsedInInstr.set(PartialDefs[i]);
+    UsedInInstr.insert(PartialDefs[i]);
 }
 
 /// addRetOperand - ensure that a return instruction has an operand for each
@@ -820,10 +822,8 @@ void RAFast::addRetOperands(MachineBasicBlock *MBB) {
       }
     }
     if (!Found)
-      MI->addOperand(MachineOperand::CreateReg(Reg,
-                                               false /*IsDef*/,
-                                               true  /*IsImp*/,
-                                               hasDef/*IsKill*/));
+      MachineInstrBuilder(*MF, MI)
+        .addReg(Reg, llvm::RegState::Implicit | getKillRegState(hasDef));
   }
 }
 
@@ -838,7 +838,7 @@ void RAFast::AllocateBasicBlock() {
   // Add live-in registers as live.
   for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
          E = MBB->livein_end(); I != E; ++I)
-    if (RegClassInfo.isAllocatable(*I))
+    if (MRI->isAllocatable(*I))
       definePhysReg(MII, *I, regReserved);
 
   SmallVector<unsigned, 8> VirtDead;
@@ -942,7 +942,7 @@ void RAFast::AllocateBasicBlock() {
     }
 
     // Track registers used by instruction.
-    UsedInInstr.reset();
+    UsedInInstr.clear();
 
     // First scan.
     // Mark physreg uses and early clobbers as used.
@@ -954,6 +954,11 @@ void RAFast::AllocateBasicBlock() {
     bool hasPhysDefs = false;
     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
       MachineOperand &MO = MI->getOperand(i);
+      // Make sure MRI knows about registers clobbered by regmasks.
+      if (MO.isRegMask()) {
+        MRI->addPhysRegsUsedFromRegMask(MO.getRegMask());
+        continue;
+      }
       if (!MO.isReg()) continue;
       unsigned Reg = MO.getReg();
       if (!Reg) continue;
@@ -970,7 +975,7 @@ void RAFast::AllocateBasicBlock() {
         }
         continue;
       }
-      if (!RegClassInfo.isAllocatable(Reg)) continue;
+      if (!MRI->isAllocatable(Reg)) continue;
       if (MO.isUse()) {
         usePhysReg(MO);
       } else if (MO.isEarlyClobber()) {
@@ -1016,11 +1021,13 @@ void RAFast::AllocateBasicBlock() {
       }
     }
 
-    MRI->addPhysRegsUsed(UsedInInstr);
+    for (UsedInInstrSet::iterator
+         I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I)
+      MRI->setPhysRegUsed(*I);
 
     // Track registers defined by instruction - early clobbers and tied uses at
     // this point.
-    UsedInInstr.reset();
+    UsedInInstr.clear();
     if (hasEarlyClobbers) {
       for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
         MachineOperand &MO = MI->getOperand(i);
@@ -1030,7 +1037,7 @@ void RAFast::AllocateBasicBlock() {
         // Look for physreg defs and tied uses.
         if (!MO.isDef() && !MI->isRegTiedToDefOperand(i)) continue;
         for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
-          UsedInInstr.set(*AI);
+          UsedInInstr.insert(*AI);
       }
     }
 
@@ -1058,7 +1065,7 @@ void RAFast::AllocateBasicBlock() {
       unsigned Reg = MO.getReg();
 
       if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
-        if (!RegClassInfo.isAllocatable(Reg)) continue;
+        if (!MRI->isAllocatable(Reg)) continue;
         definePhysReg(MI, Reg, (MO.isImplicit() || MO.isDead()) ?
                                regFree : regReserved);
         continue;
@@ -1080,7 +1087,9 @@ void RAFast::AllocateBasicBlock() {
       killVirtReg(VirtDead[i]);
     VirtDead.clear();
 
-    MRI->addPhysRegsUsed(UsedInInstr);
+    for (UsedInInstrSet::iterator
+         I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I)
+      MRI->setPhysRegUsed(*I);
 
     if (CopyDst && CopyDst == CopySrc && CopyDstSub == CopySrcSub) {
       DEBUG(dbgs() << "-- coalescing: " << *MI);
@@ -1118,7 +1127,8 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
   TII = TM->getInstrInfo();
   MRI->freezeReservedRegs(Fn);
   RegClassInfo.runOnMachineFunction(Fn);
-  UsedInInstr.resize(TRI->getNumRegs());
+  UsedInInstr.clear();
+  UsedInInstr.setUniverse(TRI->getNumRegs());
 
   assert(!MRI->isSSA() && "regalloc requires leaving SSA");
 
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index c021a93..1884452 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -13,36 +13,35 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "regalloc"
+#include "llvm/CodeGen/Passes.h"
 #include "AllocationOrder.h"
 #include "InterferenceCache.h"
 #include "LiveDebugVariables.h"
-#include "LiveRegMatrix.h"
 #include "RegAllocBase.h"
-#include "Spiller.h"
 #include "SpillPlacement.h"
+#include "Spiller.h"
 #include "SplitKit.h"
-#include "VirtRegMap.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/PassAnalysisSupport.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/EdgeBundles.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/PassAnalysisSupport.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Timer.h"
-
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
 #include <queue>
 
 using namespace llvm;
@@ -330,9 +329,9 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addPreserved<SlotIndexes>();
   AU.addRequired<LiveDebugVariables>();
   AU.addPreserved<LiveDebugVariables>();
-  AU.addRequired<CalculateSpillWeights>();
   AU.addRequired<LiveStacks>();
   AU.addPreserved<LiveStacks>();
+  AU.addRequired<CalculateSpillWeights>();
   AU.addRequired<MachineDominatorTree>();
   AU.addPreserved<MachineDominatorTree>();
   AU.addRequired<MachineLoopInfo>();
@@ -414,7 +413,7 @@ void RAGreedy::enqueue(LiveInterval *LI) {
     Prio = (1u << 31) + Size;
 
     // Boost ranges that have a physical register hint.
-    if (TargetRegisterInfo::isPhysicalRegister(VRM->getRegAllocPref(Reg)))
+    if (VRM->hasKnownPreference(Reg))
       Prio |= (1u << 30);
   }
 
@@ -443,7 +442,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
   while ((PhysReg = Order.next()))
     if (!Matrix->checkInterference(VirtReg, PhysReg))
       break;
-  if (!PhysReg || Order.isHint(PhysReg))
+  if (!PhysReg || Order.isHint())
     return PhysReg;
 
   // PhysReg is available, but there may be a better choice.
@@ -508,7 +507,7 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint,
 ///
 /// @param VirtReg Live range that is about to be assigned.
 /// @param PhysReg Desired register for assignment.
-/// @prarm IsHint  True when PhysReg is VirtReg's preferred register.
+/// @param IsHint  True when PhysReg is VirtReg's preferred register.
 /// @param MaxCost Only look for cheaper candidates and update with new cost
 ///                when returning true.
 /// @returns True when interference can be evicted cheaper than MaxCost.
@@ -662,7 +661,7 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
     BestPhys = PhysReg;
 
     // Stop if the hint can be used.
-    if (Order.isHint(PhysReg))
+    if (Order.isHint())
       break;
   }
 
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index fcdbce7..607edac 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -31,24 +31,24 @@
 
 #define DEBUG_TYPE "regalloc"
 
-#include "Spiller.h"
-#include "VirtRegMap.h"
+#include "llvm/CodeGen/RegAllocPBQP.h"
 #include "RegisterCoalescer.h"
-#include "llvm/Module.h"
+#include "Spiller.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveRangeEdit.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
-#include "llvm/CodeGen/RegAllocPBQP.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PBQP/HeuristicSolver.h"
 #include "llvm/CodeGen/PBQP/Graph.h"
+#include "llvm/CodeGen/PBQP/HeuristicSolver.h"
 #include "llvm/CodeGen/PBQP/Heuristics/Briggs.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
@@ -118,7 +118,6 @@ private:
   typedef std::vector<AllowedSet> AllowedSetMap;
   typedef std::pair<unsigned, unsigned> RegPair;
   typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap;
-  typedef std::vector<PBQP::Graph::NodeItr> NodeVector;
   typedef std::set<unsigned> RegSet;
 
 
@@ -208,8 +207,6 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
     mri->setPhysRegUsed(Reg);
   }
 
-  BitVector reservedRegs = tri->getReservedRegs(*mf);
-
   // Iterate over vregs.
   for (RegSet::const_iterator vregItr = vregs.begin(), vregEnd = vregs.end();
        vregItr != vregEnd; ++vregItr) {
@@ -218,7 +215,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
     LiveInterval *vregLI = &LIS->getInterval(vreg);
 
     // Record any overlaps with regmask operands.
-    BitVector regMaskOverlaps(tri->getNumRegs());
+    BitVector regMaskOverlaps;
     LIS->checkRegMaskInterference(*vregLI, regMaskOverlaps);
 
     // Compute an initial allowed set for the current vreg.
@@ -227,7 +224,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
     ArrayRef<uint16_t> rawOrder = trc->getRawAllocationOrder(*mf);
     for (unsigned i = 0; i != rawOrder.size(); ++i) {
       unsigned preg = rawOrder[i];
-      if (reservedRegs.test(preg))
+      if (mri->isReserved(preg))
         continue;
 
       // vregLI crosses a regmask operand that clobbers preg.
@@ -357,7 +354,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build(
                                                    loopInfo->getLoopDepth(mbb));
 
       if (cp.isPhys()) {
-        if (!lis->isAllocatable(dst)) {
+        if (!mf->getRegInfo().isAllocatable(dst)) {
           continue;
         }
 
@@ -432,6 +429,7 @@ void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
   au.addRequired<SlotIndexes>();
   au.addPreserved<SlotIndexes>();
   au.addRequired<LiveIntervals>();
+  au.addPreserved<LiveIntervals>();
   //au.addRequiredID(SplitCriticalEdgesID);
   if (customPassID)
     au.addRequiredID(*customPassID);
@@ -443,6 +441,7 @@ void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
   au.addRequired<MachineLoopInfo>();
   au.addPreserved<MachineLoopInfo>();
   au.addRequired<VirtRegMap>();
+  au.addPreserved<VirtRegMap>();
   MachineFunctionPass::getAnalysisUsage(au);
 }
 
@@ -527,7 +526,7 @@ void RegAllocPBQP::finalizeAlloc() const {
          itr != end; ++itr) {
     LiveInterval *li = &lis->getInterval(*itr);
 
-    unsigned physReg = vrm->getRegAllocPref(li->reg);
+    unsigned physReg = mri->getSimpleHint(li->reg);
 
     if (physReg == 0) {
       const TargetRegisterClass *liRC = mri->getRegClass(li->reg);
diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp
index 652bc30..078a0df 100644
--- a/lib/CodeGen/RegisterClassInfo.cpp
+++ b/lib/CodeGen/RegisterClassInfo.cpp
@@ -15,12 +15,13 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "regalloc"
-#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
 
 using namespace llvm;
 
@@ -43,7 +44,7 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
   }
 
   // Does this MF have different CSRs?
-  const uint16_t *CSR = TRI->getCalleeSavedRegs(MF);
+  const MCPhysReg *CSR = TRI->getCalleeSavedRegs(MF);
   if (Update || CSR != CalleeSaved) {
     // Build a CSRNum map. Every CSR alias gets an entry pointing to the last
     // overlapping CSR.
@@ -57,10 +58,11 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
   CalleeSaved = CSR;
 
   // Different reserved registers?
-  BitVector RR = TRI->getReservedRegs(*MF);
-  if (RR != Reserved)
+  const BitVector &RR = MF->getRegInfo().getReservedRegs();
+  if (Reserved.size() != RR.size() || RR != Reserved) {
     Update = true;
-  Reserved = RR;
+    Reserved = RR;
+  }
 
   // Invalidate cached information from previous function.
   if (Update)
@@ -77,14 +79,14 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
   unsigned NumRegs = RC->getNumRegs();
 
   if (!RCI.Order)
-    RCI.Order.reset(new unsigned[NumRegs]);
+    RCI.Order.reset(new MCPhysReg[NumRegs]);
 
   unsigned N = 0;
-  SmallVector<unsigned, 16> CSRAlias;
+  SmallVector<MCPhysReg, 16> CSRAlias;
 
   // FIXME: Once targets reserve registers instead of removing them from the
   // allocation order, we can simply use begin/end here.
-  ArrayRef<uint16_t> RawOrder = RC->getRawAllocationOrder(*MF);
+  ArrayRef<MCPhysReg> RawOrder = RC->getRawAllocationOrder(*MF);
   for (unsigned i = 0; i != RawOrder.size(); ++i) {
     unsigned PhysReg = RawOrder[i];
     // Remove reserved registers from the allocation order.
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index d018835..36d8101 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -16,35 +16,31 @@
 #define DEBUG_TYPE "regalloc"
 #include "RegisterCoalescer.h"
 #include "LiveDebugVariables.h"
-#include "VirtRegMap.h"
-
-#include "llvm/Pass.h"
-#include "llvm/Value.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveRangeEdit.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include <algorithm>
 #include <cmath>
 using namespace llvm;
@@ -55,12 +51,25 @@ STATISTIC(numCommutes , "Number of instruction commuting performed");
 STATISTIC(numExtends  , "Number of copies extended");
 STATISTIC(NumReMats   , "Number of instructions re-materialized");
 STATISTIC(NumInflated , "Number of register classes inflated");
+STATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested");
+STATISTIC(NumLaneResolves,  "Number of dead lane conflicts resolved");
 
 static cl::opt<bool>
 EnableJoining("join-liveintervals",
               cl::desc("Coalesce copies (default=true)"),
               cl::init(true));
 
+// Temporary flag to test critical edge unsplitting.
+static cl::opt<bool>
+EnableJoinSplits("join-splitedges",
+  cl::desc("Coalesce copies on split edges (default=subtarget)"), cl::Hidden);
+
+// Temporary flag to test global copy optimization.
+static cl::opt<cl::boolOrDefault>
+EnableGlobalCopies("join-globalcopies",
+  cl::desc("Coalesce copies that span blocks (default=subtarget)"),
+  cl::init(cl::BOU_UNSET), cl::Hidden);
+
 static cl::opt<bool>
 VerifyCoalescing("verify-coalescing",
          cl::desc("Verify machine instrs before and after register coalescing"),
@@ -80,8 +89,17 @@ namespace {
     AliasAnalysis *AA;
     RegisterClassInfo RegClassInfo;
 
+    /// \brief True if the coalescer should aggressively coalesce global copies
+    /// in favor of keeping local copies.
+    bool JoinGlobalCopies;
+
+    /// \brief True if the coalescer should aggressively coalesce fall-thru
+    /// blocks exclusively containing copies.
+    bool JoinSplitEdges;
+
     /// WorkList - Copy instructions yet to be coalesced.
     SmallVector<MachineInstr*, 8> WorkList;
+    SmallVector<MachineInstr*, 8> LocalWorkList;
 
     /// ErasedInstrs - Set of instruction pointers that have been erased, and
     /// that may be present in WorkList.
@@ -99,6 +117,9 @@ namespace {
     /// LiveRangeEdit callback.
     void LRE_WillEraseInstruction(MachineInstr *MI);
 
+    /// coalesceLocals - coalesce the LocalWorkList.
+    void coalesceLocals();
+
     /// joinAllIntervals - join compatible live intervals
     void joinAllIntervals();
 
@@ -106,9 +127,9 @@ namespace {
     /// copies that cannot yet be coalesced into WorkList.
     void copyCoalesceInMBB(MachineBasicBlock *MBB);
 
-    /// copyCoalesceWorkList - Try to coalesce all copies in WorkList after
-    /// position From. Return true if any progress was made.
-    bool copyCoalesceWorkList(unsigned From = 0);
+    /// copyCoalesceWorkList - Try to coalesce all copies in CurrList. Return
+    /// true if any progress was made.
+    bool copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList);
 
     /// joinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
     /// which are the src/dst of the copy instruction CopyMI.  This returns
@@ -123,6 +144,9 @@ namespace {
     /// can use this information below to update aliases.
     bool joinIntervals(CoalescerPair &CP);
 
+    /// Attempt joining two virtual registers. Return true on success.
+    bool joinVirtRegs(CoalescerPair &CP);
+
     /// Attempt joining with a reserved physreg.
     bool joinReservedPhysReg(CoalescerPair &CP);
 
@@ -149,7 +173,7 @@ namespace {
                                  MachineInstr *CopyMI);
 
     /// canJoinPhys - Return true if a physreg copy should be joined.
-    bool canJoinPhys(CoalescerPair &CP);
+    bool canJoinPhys(const CoalescerPair &CP);
 
     /// updateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
     /// update the subregister number if it is not zero. If DstReg is a
@@ -193,12 +217,6 @@ INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing",
 
 char RegisterCoalescer::ID = 0;
 
-static unsigned compose(const TargetRegisterInfo &tri, unsigned a, unsigned b) {
-  if (!a) return b;
-  if (!b) return a;
-  return tri.composeSubRegIndices(a, b);
-}
-
 static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI,
                         unsigned &Src, unsigned &Dst,
                         unsigned &SrcSub, unsigned &DstSub) {
@@ -209,8 +227,8 @@ static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI,
     SrcSub = MI->getOperand(1).getSubReg();
   } else if (MI->isSubregToReg()) {
     Dst = MI->getOperand(0).getReg();
-    DstSub = compose(tri, MI->getOperand(0).getSubReg(),
-                     MI->getOperand(3).getImm());
+    DstSub = tri.composeSubRegIndices(MI->getOperand(0).getSubReg(),
+                                      MI->getOperand(3).getImm());
     Src = MI->getOperand(2).getReg();
     SrcSub = MI->getOperand(2).getSubReg();
   } else
@@ -218,6 +236,23 @@ static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI,
   return true;
 }
 
+// Return true if this block should be vacated by the coalescer to eliminate
+// branches. The important cases to handle in the coalescer are critical edges
+// split during phi elimination which contain only copies. Simple blocks that
+// contain non-branches should also be vacated, but this can be handled by an
+// earlier pass similar to early if-conversion.
+static bool isSplitEdge(const MachineBasicBlock *MBB) {
+  if (MBB->pred_size() != 1 || MBB->succ_size() != 1)
+    return false;
+
+  for (MachineBasicBlock::const_iterator MII = MBB->begin(), E = MBB->end();
+       MII != E; ++MII) {
+    if (!MII->isCopyLike() && !MII->isUnconditionalBranch())
+      return false;
+  }
+  return true;
+}
+
 bool CoalescerPair::setRegisters(const MachineInstr *MI) {
   SrcReg = DstReg = 0;
   SrcIdx = DstIdx = 0;
@@ -349,7 +384,8 @@ bool CoalescerPair::isCoalescable(const MachineInstr *MI) const {
     if (DstReg != Dst)
       return false;
     // Registers match, do the subregisters line up?
-    return compose(TRI, SrcIdx, SrcSub) == compose(TRI, DstIdx, DstSub);
+    return TRI.composeSubRegIndices(SrcIdx, SrcSub) ==
+           TRI.composeSubRegIndices(DstIdx, DstSub);
   }
 }
 
@@ -425,7 +461,8 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
   // If AValNo is defined as a copy from IntB, we can potentially process this.
   // Get the instruction that defines this value number.
   MachineInstr *ACopyMI = LIS->getInstructionFromIndex(AValNo->def);
-  if (!CP.isCoalescable(ACopyMI))
+  // Don't allow any partial copies, even if isCoalescable() allows them.
+  if (!CP.isCoalescable(ACopyMI) || !ACopyMI->isFullCopy())
     return false;
 
   // Get the LiveRange in IntB that this value number starts with.
@@ -583,7 +620,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
 
   MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
   unsigned NewReg = NewDstMO.getReg();
-  if (NewReg != IntB.reg || !NewDstMO.isKill())
+  if (NewReg != IntB.reg || !LiveRangeQuery(IntB, AValNo->def).isKill())
     return false;
 
   // Make sure there are no other definitions of IntB that would reach the
@@ -849,8 +886,17 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
   // Update LiveDebugVariables.
   LDV->renameRegister(SrcReg, DstReg, SubIdx);
 
+  SmallPtrSet<MachineInstr*, 8> Visited;
   for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(SrcReg);
        MachineInstr *UseMI = I.skipInstruction();) {
+    // Each instruction can only be rewritten once because sub-register
+    // composition is not always idempotent. When SrcReg != DstReg, rewriting
+    // the UseMI operands removes them from the SrcReg use-def chain, but when
+    // SrcReg is DstReg we could encounter UseMI twice if it has multiple
+    // operands mentioning the virtual register.
+    if (SrcReg == DstReg && !Visited.insert(UseMI))
+      continue;
+
     SmallVector<unsigned,8> Ops;
     bool Reads, Writes;
     tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops);
@@ -886,11 +932,11 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
 }
 
 /// canJoinPhys - Return true if a copy involving a physreg should be joined.
-bool RegisterCoalescer::canJoinPhys(CoalescerPair &CP) {
+bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) {
   /// Always join simple intervals that are defined by a single copy from a
   /// reserved register. This doesn't increase register pressure, so it is
   /// always beneficial.
-  if (!RegClassInfo.isReserved(CP.getDstReg())) {
+  if (!MRI->isReserved(CP.getDstReg())) {
     DEBUG(dbgs() << "\tCan only merge into reserved registers.\n");
     return false;
   }
@@ -1065,7 +1111,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
 /// Attempt joining with a reserved physreg.
 bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
   assert(CP.isPhys() && "Must be a physreg copy");
-  assert(RegClassInfo.isReserved(CP.getDstReg()) && "Not a reserved register");
+  assert(MRI->isReserved(CP.getDstReg()) && "Not a reserved register");
   LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
   DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS
                << '\n');
@@ -1102,389 +1148,898 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
   return true;
 }
 
-/// ComputeUltimateVN - Assuming we are going to join two live intervals,
-/// compute what the resultant value numbers for each value in the input two
-/// ranges will be.  This is complicated by copies between the two which can
-/// and will commonly cause multiple value numbers to be merged into one.
-///
-/// VN is the value number that we're trying to resolve.  InstDefiningValue
-/// keeps track of the new InstDefiningValue assignment for the result
-/// LiveInterval.  ThisFromOther/OtherFromThis are sets that keep track of
-/// whether a value in this or other is a copy from the opposite set.
-/// ThisValNoAssignments/OtherValNoAssignments keep track of value #'s that have
-/// already been assigned.
-///
-/// ThisFromOther[x] - If x is defined as a copy from the other interval, this
-/// contains the value number the copy is from.
-///
-static unsigned ComputeUltimateVN(VNInfo *VNI,
-                                  SmallVector<VNInfo*, 16> &NewVNInfo,
-                                  DenseMap<VNInfo*, VNInfo*> &ThisFromOther,
-                                  DenseMap<VNInfo*, VNInfo*> &OtherFromThis,
-                                  SmallVector<int, 16> &ThisValNoAssignments,
-                                  SmallVector<int, 16> &OtherValNoAssignments) {
-  unsigned VN = VNI->id;
-
-  // If the VN has already been computed, just return it.
-  if (ThisValNoAssignments[VN] >= 0)
-    return ThisValNoAssignments[VN];
-  assert(ThisValNoAssignments[VN] != -2 && "Cyclic value numbers");
-
-  // If this val is not a copy from the other val, then it must be a new value
-  // number in the destination.
-  DenseMap<VNInfo*, VNInfo*>::iterator I = ThisFromOther.find(VNI);
-  if (I == ThisFromOther.end()) {
-    NewVNInfo.push_back(VNI);
-    return ThisValNoAssignments[VN] = NewVNInfo.size()-1;
-  }
-  VNInfo *OtherValNo = I->second;
-
-  // Otherwise, this *is* a copy from the RHS.  If the other side has already
-  // been computed, return it.
-  if (OtherValNoAssignments[OtherValNo->id] >= 0)
-    return ThisValNoAssignments[VN] = OtherValNoAssignments[OtherValNo->id];
-
-  // Mark this value number as currently being computed, then ask what the
-  // ultimate value # of the other value is.
-  ThisValNoAssignments[VN] = -2;
-  unsigned UltimateVN =
-    ComputeUltimateVN(OtherValNo, NewVNInfo, OtherFromThis, ThisFromOther,
-                      OtherValNoAssignments, ThisValNoAssignments);
-  return ThisValNoAssignments[VN] = UltimateVN;
-}
+//===----------------------------------------------------------------------===//
+//                 Interference checking and interval joining
+//===----------------------------------------------------------------------===//
+//
+// In the easiest case, the two live ranges being joined are disjoint, and
+// there is no interference to consider. It is quite common, though, to have
+// overlapping live ranges, and we need to check if the interference can be
+// resolved.
+//
+// The live range of a single SSA value forms a sub-tree of the dominator tree.
+// This means that two SSA values overlap if and only if the def of one value
+// is contained in the live range of the other value. As a special case, the
+// overlapping values can be defined at the same index.
+//
+// The interference from an overlapping def can be resolved in these cases:
+//
+// 1. Coalescable copies. The value is defined by a copy that would become an
+//    identity copy after joining SrcReg and DstReg. The copy instruction will
+//    be removed, and the value will be merged with the source value.
+//
+//    There can be several copies back and forth, causing many values to be
+//    merged into one. We compute a list of ultimate values in the joined live
+//    range as well as a mappings from the old value numbers.
+//
+// 2. IMPLICIT_DEF. This instruction is only inserted to ensure all PHI
+//    predecessors have a live out value. It doesn't cause real interference,
+//    and can be merged into the value it overlaps. Like a coalescable copy, it
+//    can be erased after joining.
+//
+// 3. Copy of external value. The overlapping def may be a copy of a value that
+//    is already in the other register. This is like a coalescable copy, but
+//    the live range of the source register must be trimmed after erasing the
+//    copy instruction:
+//
+//      %src = COPY %ext
+//      %dst = COPY %ext  <-- Remove this COPY, trim the live range of %ext.
+//
+// 4. Clobbering undefined lanes. Vector registers are sometimes built by
+//    defining one lane at a time:
+//
+//      %dst:ssub0<def,read-undef> = FOO
+//      %src = BAR
+//      %dst:ssub1<def> = COPY %src
+//
+//    The live range of %src overlaps the %dst value defined by FOO, but
+//    merging %src into %dst:ssub1 is only going to clobber the ssub1 lane
+//    which was undef anyway.
+//
+//    The value mapping is more complicated in this case. The final live range
+//    will have different value numbers for both FOO and BAR, but there is no
+//    simple mapping from old to new values. It may even be necessary to add
+//    new PHI values.
+//
+// 5. Clobbering dead lanes. A def may clobber a lane of a vector register that
+//    is live, but never read. This can happen because we don't compute
+//    individual live ranges per lane.
+//
+//      %dst<def> = FOO
+//      %src = BAR
+//      %dst:ssub1<def> = COPY %src
+//
+//    This kind of interference is only resolved locally. If the clobbered
+//    lane value escapes the block, the join is aborted.
 
+namespace {
+/// Track information about values in a single virtual register about to be
+/// joined. Objects of this class are always created in pairs - one for each
+/// side of the CoalescerPair.
+class JoinVals {
+  LiveInterval &LI;
+
+  // Location of this register in the final joined register.
+  // Either CP.DstIdx or CP.SrcIdx.
+  unsigned SubIdx;
+
+  // Values that will be present in the final live range.
+  SmallVectorImpl<VNInfo*> &NewVNInfo;
+
+  const CoalescerPair &CP;
+  LiveIntervals *LIS;
+  SlotIndexes *Indexes;
+  const TargetRegisterInfo *TRI;
+
+  // Value number assignments. Maps value numbers in LI to entries in NewVNInfo.
+  // This is suitable for passing to LiveInterval::join().
+  SmallVector<int, 8> Assignments;
+
+  // Conflict resolution for overlapping values.
+  enum ConflictResolution {
+    // No overlap, simply keep this value.
+    CR_Keep,
+
+    // Merge this value into OtherVNI and erase the defining instruction.
+    // Used for IMPLICIT_DEF, coalescable copies, and copies from external
+    // values.
+    CR_Erase,
+
+    // Merge this value into OtherVNI but keep the defining instruction.
+    // This is for the special case where OtherVNI is defined by the same
+    // instruction.
+    CR_Merge,
+
+    // Keep this value, and have it replace OtherVNI where possible. This
+    // complicates value mapping since OtherVNI maps to two different values
+    // before and after this def.
+    // Used when clobbering undefined or dead lanes.
+    CR_Replace,
+
+    // Unresolved conflict. Visit later when all values have been mapped.
+    CR_Unresolved,
+
+    // Unresolvable conflict. Abort the join.
+    CR_Impossible
+  };
 
-// Find out if we have something like
-// A = X
-// B = X
-// if so, we can pretend this is actually
-// A = X
-// B = A
-// which allows us to coalesce A and B.
-// VNI is the definition of B. LR is the life range of A that includes
-// the slot just before B. If we return true, we add "B = X" to DupCopies.
-// This implies that A dominates B.
-static bool RegistersDefinedFromSameValue(LiveIntervals &li,
-                                          const TargetRegisterInfo &tri,
-                                          CoalescerPair &CP,
-                                          VNInfo *VNI,
-                                          VNInfo *OtherVNI,
-                                     SmallVector<MachineInstr*, 8> &DupCopies) {
-  // FIXME: This is very conservative. For example, we don't handle
-  // physical registers.
-
-  MachineInstr *MI = li.getInstructionFromIndex(VNI->def);
-
-  if (!MI || CP.isPartial() || CP.isPhys())
-    return false;
+  // Per-value info for LI. The lane bit masks are all relative to the final
+  // joined register, so they can be compared directly between SrcReg and
+  // DstReg.
+  struct Val {
+    ConflictResolution Resolution;
+
+    // Lanes written by this def, 0 for unanalyzed values.
+    unsigned WriteLanes;
+
+    // Lanes with defined values in this register. Other lanes are undef and
+    // safe to clobber.
+    unsigned ValidLanes;
+
+    // Value in LI being redefined by this def.
+    VNInfo *RedefVNI;
+
+    // Value in the other live range that overlaps this def, if any.
+    VNInfo *OtherVNI;
+
+    // Is this value an IMPLICIT_DEF that can be erased?
+    //
+    // IMPLICIT_DEF values should only exist at the end of a basic block that
+    // is a predecessor to a phi-value. These IMPLICIT_DEF instructions can be
+    // safely erased if they are overlapping a live value in the other live
+    // interval.
+    //
+    // Weird control flow graphs and incomplete PHI handling in
+    // ProcessImplicitDefs can very rarely create IMPLICIT_DEF values with
+    // longer live ranges. Such IMPLICIT_DEF values should be treated like
+    // normal values.
+    bool ErasableImplicitDef;
+
+    // True when the live range of this value will be pruned because of an
+    // overlapping CR_Replace value in the other live range.
+    bool Pruned;
+
+    // True once Pruned above has been computed.
+    bool PrunedComputed;
+
+    Val() : Resolution(CR_Keep), WriteLanes(0), ValidLanes(0),
+            RedefVNI(0), OtherVNI(0), ErasableImplicitDef(false),
+            Pruned(false), PrunedComputed(false) {}
+
+    bool isAnalyzed() const { return WriteLanes != 0; }
+  };
 
-  unsigned A = CP.getDstReg();
-  if (!TargetRegisterInfo::isVirtualRegister(A))
-    return false;
+  // One entry per value number in LI.
+  SmallVector<Val, 8> Vals;
+
+  unsigned computeWriteLanes(const MachineInstr *DefMI, bool &Redef);
+  VNInfo *stripCopies(VNInfo *VNI);
+  ConflictResolution analyzeValue(unsigned ValNo, JoinVals &Other);
+  void computeAssignment(unsigned ValNo, JoinVals &Other);
+  bool taintExtent(unsigned, unsigned, JoinVals&,
+                   SmallVectorImpl<std::pair<SlotIndex, unsigned> >&);
+  bool usesLanes(MachineInstr *MI, unsigned, unsigned, unsigned);
+  bool isPrunedValue(unsigned ValNo, JoinVals &Other);
+
+public:
+  JoinVals(LiveInterval &li, unsigned subIdx,
+           SmallVectorImpl<VNInfo*> &newVNInfo,
+           const CoalescerPair &cp,
+           LiveIntervals *lis,
+           const TargetRegisterInfo *tri)
+    : LI(li), SubIdx(subIdx), NewVNInfo(newVNInfo), CP(cp), LIS(lis),
+      Indexes(LIS->getSlotIndexes()), TRI(tri),
+      Assignments(LI.getNumValNums(), -1), Vals(LI.getNumValNums())
+  {}
+
+  /// Analyze defs in LI and compute a value mapping in NewVNInfo.
+  /// Returns false if any conflicts were impossible to resolve.
+  bool mapValues(JoinVals &Other);
+
+  /// Try to resolve conflicts that require all values to be mapped.
+  /// Returns false if any conflicts were impossible to resolve.
+  bool resolveConflicts(JoinVals &Other);
+
+  /// Prune the live range of values in Other.LI where they would conflict with
+  /// CR_Replace values in LI. Collect end points for restoring the live range
+  /// after joining.
+  void pruneValues(JoinVals &Other, SmallVectorImpl<SlotIndex> &EndPoints);
+
+  /// Erase any machine instructions that have been coalesced away.
+  /// Add erased instructions to ErasedInstrs.
+  /// Add foreign virtual registers to ShrinkRegs if their live range ended at
+  /// the erased instrs.
+  void eraseInstrs(SmallPtrSet<MachineInstr*, 8> &ErasedInstrs,
+                   SmallVectorImpl<unsigned> &ShrinkRegs);
+
+  /// Get the value assignments suitable for passing to LiveInterval::join.
+  const int *getAssignments() const { return Assignments.data(); }
+};
+} // end anonymous namespace
+
+/// Compute the bitmask of lanes actually written by DefMI.
+/// Set Redef if there are any partial register definitions that depend on the
+/// previous value of the register.
+unsigned JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef) {
+  unsigned L = 0;
+  for (ConstMIOperands MO(DefMI); MO.isValid(); ++MO) {
+    if (!MO->isReg() || MO->getReg() != LI.reg || !MO->isDef())
+      continue;
+    L |= TRI->getSubRegIndexLaneMask(
+           TRI->composeSubRegIndices(SubIdx, MO->getSubReg()));
+    if (MO->readsReg())
+      Redef = true;
+  }
+  return L;
+}
 
-  unsigned B = CP.getSrcReg();
-  if (!TargetRegisterInfo::isVirtualRegister(B))
-    return false;
+/// Find the ultimate value that VNI was copied from.
+VNInfo *JoinVals::stripCopies(VNInfo *VNI) {
+  while (!VNI->isPHIDef()) {
+    MachineInstr *MI = Indexes->getInstructionFromIndex(VNI->def);
+    assert(MI && "No defining instruction");
+    if (!MI->isFullCopy())
+      break;
+    unsigned Reg = MI->getOperand(1).getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(Reg))
+      break;
+    LiveRangeQuery LRQ(LIS->getInterval(Reg), VNI->def);
+    if (!LRQ.valueIn())
+      break;
+    VNI = LRQ.valueIn();
+  }
+  return VNI;
+}
 
-  MachineInstr *OtherMI = li.getInstructionFromIndex(OtherVNI->def);
-  if (!OtherMI)
-    return false;
+/// Analyze ValNo in this live range, and set all fields of Vals[ValNo].
+/// Return a conflict resolution when possible, but leave the hard cases as
+/// CR_Unresolved.
+/// Recursively calls computeAssignment() on this and Other, guaranteeing that
+/// both OtherVNI and RedefVNI have been analyzed and mapped before returning.
+/// The recursion always goes upwards in the dominator tree, making loops
+/// impossible.
+JoinVals::ConflictResolution
+JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
+  Val &V = Vals[ValNo];
+  assert(!V.isAnalyzed() && "Value has already been analyzed!");
+  VNInfo *VNI = LI.getValNumInfo(ValNo);
+  if (VNI->isUnused()) {
+    V.WriteLanes = ~0u;
+    return CR_Keep;
+  }
 
-  if (MI->isImplicitDef()) {
-    DupCopies.push_back(MI);
-    return true;
+  // Get the instruction defining this value, compute the lanes written.
+  const MachineInstr *DefMI = 0;
+  if (VNI->isPHIDef()) {
+    // Conservatively assume that all lanes in a PHI are valid.
+    V.ValidLanes = V.WriteLanes = TRI->getSubRegIndexLaneMask(SubIdx);
   } else {
-    if (!MI->isFullCopy())
-      return false;
-    unsigned Src = MI->getOperand(1).getReg();
-    if (!TargetRegisterInfo::isVirtualRegister(Src))
-      return false;
-    if (!OtherMI->isFullCopy())
-      return false;
-    unsigned OtherSrc = OtherMI->getOperand(1).getReg();
-    if (!TargetRegisterInfo::isVirtualRegister(OtherSrc))
-      return false;
+    DefMI = Indexes->getInstructionFromIndex(VNI->def);
+    bool Redef = false;
+    V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef);
+
+    // If this is a read-modify-write instruction, there may be more valid
+    // lanes than the ones written by this instruction.
+    // This only covers partial redef operands. DefMI may have normal use
+    // operands reading the register. They don't contribute valid lanes.
+    //
+    // This adds ssub1 to the set of valid lanes in %src:
+    //
+    //   %src:ssub1<def> = FOO
+    //
+    // This leaves only ssub1 valid, making any other lanes undef:
+    //
+    //   %src:ssub1<def,read-undef> = FOO %src:ssub2
+    //
+    // The <read-undef> flag on the def operand means that old lane values are
+    // not important.
+    if (Redef) {
+      V.RedefVNI = LiveRangeQuery(LI, VNI->def).valueIn();
+      assert(V.RedefVNI && "Instruction is reading nonexistent value");
+      computeAssignment(V.RedefVNI->id, Other);
+      V.ValidLanes |= Vals[V.RedefVNI->id].ValidLanes;
+    }
 
-    if (Src != OtherSrc)
-      return false;
+    // An IMPLICIT_DEF writes undef values.
+    if (DefMI->isImplicitDef()) {
+      // We normally expect IMPLICIT_DEF values to be live only until the end
+      // of their block. If the value is really live longer and gets pruned in
+      // another block, this flag is cleared again.
+      V.ErasableImplicitDef = true;
+      V.ValidLanes &= ~V.WriteLanes;
+    }
+  }
 
-    // If the copies use two different value numbers of X, we cannot merge
-    // A and B.
-    LiveInterval &SrcInt = li.getInterval(Src);
-    // getVNInfoBefore returns NULL for undef copies. In this case, the
-    // optimization is still safe.
-    if (SrcInt.getVNInfoBefore(OtherVNI->def) !=
-        SrcInt.getVNInfoBefore(VNI->def))
-      return false;
+  // Find the value in Other that overlaps VNI->def, if any.
+  LiveRangeQuery OtherLRQ(Other.LI, VNI->def);
+
+  // It is possible that both values are defined by the same instruction, or
+  // the values are PHIs defined in the same block. When that happens, the two
+  // values should be merged into one, but not into any preceding value.
+  // The first value defined or visited gets CR_Keep, the other gets CR_Merge.
+  if (VNInfo *OtherVNI = OtherLRQ.valueDefined()) {
+    assert(SlotIndex::isSameInstr(VNI->def, OtherVNI->def) && "Broken LRQ");
+
+    // One value stays, the other is merged. Keep the earlier one, or the first
+    // one we see.
+    if (OtherVNI->def < VNI->def)
+      Other.computeAssignment(OtherVNI->id, *this);
+    else if (VNI->def < OtherVNI->def && OtherLRQ.valueIn()) {
+      // This is an early-clobber def overlapping a live-in value in the other
+      // register. Not mergeable.
+      V.OtherVNI = OtherLRQ.valueIn();
+      return CR_Impossible;
+    }
+    V.OtherVNI = OtherVNI;
+    Val &OtherV = Other.Vals[OtherVNI->id];
+    // Keep this value, check for conflicts when analyzing OtherVNI.
+    if (!OtherV.isAnalyzed())
+      return CR_Keep;
+    // Both sides have been analyzed now.
+    // Allow overlapping PHI values. Any real interference would show up in a
+    // predecessor, the PHI itself can't introduce any conflicts.
+    if (VNI->isPHIDef())
+      return CR_Merge;
+    if (V.ValidLanes & OtherV.ValidLanes)
+      // Overlapping lanes can't be resolved.
+      return CR_Impossible;
+    else
+      return CR_Merge;
+  }
 
-    DupCopies.push_back(MI);
-    return true;
+  // No simultaneous def. Is Other live at the def?
+  V.OtherVNI = OtherLRQ.valueIn();
+  if (!V.OtherVNI)
+    // No overlap, no conflict.
+    return CR_Keep;
+
+  assert(!SlotIndex::isSameInstr(VNI->def, V.OtherVNI->def) && "Broken LRQ");
+
+  // We have overlapping values, or possibly a kill of Other.
+  // Recursively compute assignments up the dominator tree.
+  Other.computeAssignment(V.OtherVNI->id, *this);
+  Val &OtherV = Other.Vals[V.OtherVNI->id];
+
+  // Check if OtherV is an IMPLICIT_DEF that extends beyond its basic block.
+  // This shouldn't normally happen, but ProcessImplicitDefs can leave such
+  // IMPLICIT_DEF instructions behind, and there is nothing wrong with it
+  // technically.
+  //
+  // WHen it happens, treat that IMPLICIT_DEF as a normal value, and don't try
+  // to erase the IMPLICIT_DEF instruction.
+  if (OtherV.ErasableImplicitDef && DefMI &&
+      DefMI->getParent() != Indexes->getMBBFromIndex(V.OtherVNI->def)) {
+    DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def
+                 << " extends into BB#" << DefMI->getParent()->getNumber()
+                 << ", keeping it.\n");
+    OtherV.ErasableImplicitDef = false;
   }
-}
 
-/// joinIntervals - Attempt to join these two intervals.  On failure, this
-/// returns false.
-bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) {
-  // Handle physreg joins separately.
-  if (CP.isPhys())
-    return joinReservedPhysReg(CP);
+  // Allow overlapping PHI values. Any real interference would show up in a
+  // predecessor, the PHI itself can't introduce any conflicts.
+  if (VNI->isPHIDef())
+    return CR_Replace;
+
+  // Check for simple erasable conflicts.
+  if (DefMI->isImplicitDef())
+    return CR_Erase;
+
+  // Include the non-conflict where DefMI is a coalescable copy that kills
+  // OtherVNI. We still want the copy erased and value numbers merged.
+  if (CP.isCoalescable(DefMI)) {
+    // Some of the lanes copied from OtherVNI may be undef, making them undef
+    // here too.
+    V.ValidLanes &= ~V.WriteLanes | OtherV.ValidLanes;
+    return CR_Erase;
+  }
 
-  LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
-  DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS
-               << '\n');
+  // This may not be a real conflict if DefMI simply kills Other and defines
+  // VNI.
+  if (OtherLRQ.isKill() && OtherLRQ.endPoint() <= VNI->def)
+    return CR_Keep;
+
+  // Handle the case where VNI and OtherVNI can be proven to be identical:
+  //
+  //   %other = COPY %ext
+  //   %this  = COPY %ext <-- Erase this copy
+  //
+  if (DefMI->isFullCopy() && !CP.isPartial() &&
+      stripCopies(VNI) == stripCopies(V.OtherVNI))
+    return CR_Erase;
+
+  // If the lanes written by this instruction were all undef in OtherVNI, it is
+  // still safe to join the live ranges. This can't be done with a simple value
+  // mapping, though - OtherVNI will map to multiple values:
+  //
+  //   1 %dst:ssub0 = FOO                <-- OtherVNI
+  //   2 %src = BAR                      <-- VNI
+  //   3 %dst:ssub1 = COPY %src<kill>    <-- Eliminate this copy.
+  //   4 BAZ %dst<kill>
+  //   5 QUUX %src<kill>
+  //
+  // Here OtherVNI will map to itself in [1;2), but to VNI in [2;5). CR_Replace
+  // handles this complex value mapping.
+  if ((V.WriteLanes & OtherV.ValidLanes) == 0)
+    return CR_Replace;
+
+  // If the other live range is killed by DefMI and the live ranges are still
+  // overlapping, it must be because we're looking at an early clobber def:
+  //
+  //   %dst<def,early-clobber> = ASM %src<kill>
+  //
+  // In this case, it is illegal to merge the two live ranges since the early
+  // clobber def would clobber %src before it was read.
+  if (OtherLRQ.isKill()) {
+    // This case where the def doesn't overlap the kill is handled above.
+    assert(VNI->def.isEarlyClobber() &&
+           "Only early clobber defs can overlap a kill");
+    return CR_Impossible;
+  }
 
-  // Compute the final value assignment, assuming that the live ranges can be
-  // coalesced.
-  SmallVector<int, 16> LHSValNoAssignments;
-  SmallVector<int, 16> RHSValNoAssignments;
-  DenseMap<VNInfo*, VNInfo*> LHSValsDefinedFromRHS;
-  DenseMap<VNInfo*, VNInfo*> RHSValsDefinedFromLHS;
-  SmallVector<VNInfo*, 16> NewVNInfo;
+  // VNI is clobbering live lanes in OtherVNI, but there is still the
+  // possibility that no instructions actually read the clobbered lanes.
+  // If we're clobbering all the lanes in OtherVNI, at least one must be read.
+  // Otherwise Other.LI wouldn't be live here.
+  if ((TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes) == 0)
+    return CR_Impossible;
+
+  // We need to verify that no instructions are reading the clobbered lanes. To
+  // save compile time, we'll only check that locally. Don't allow the tainted
+  // value to escape the basic block.
+  MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
+  if (OtherLRQ.endPoint() >= Indexes->getMBBEndIdx(MBB))
+    return CR_Impossible;
+
+  // There are still some things that could go wrong besides clobbered lanes
+  // being read, for example OtherVNI may be only partially redefined in MBB,
+  // and some clobbered lanes could escape the block. Save this analysis for
+  // resolveConflicts() when all values have been mapped. We need to know
+  // RedefVNI and WriteLanes for any later defs in MBB, and we can't compute
+  // that now - the recursive analyzeValue() calls must go upwards in the
+  // dominator tree.
+  return CR_Unresolved;
+}
 
-  SmallVector<MachineInstr*, 8> DupCopies;
-  SmallVector<MachineInstr*, 8> DeadCopies;
+/// Compute the value assignment for ValNo in LI.
+/// This may be called recursively by analyzeValue(), but never for a ValNo on
+/// the stack.
+void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) {
+  Val &V = Vals[ValNo];
+  if (V.isAnalyzed()) {
+    // Recursion should always move up the dominator tree, so ValNo is not
+    // supposed to reappear before it has been assigned.
+    assert(Assignments[ValNo] != -1 && "Bad recursion?");
+    return;
+  }
+  switch ((V.Resolution = analyzeValue(ValNo, Other))) {
+  case CR_Erase:
+  case CR_Merge:
+    // Merge this ValNo into OtherVNI.
+    assert(V.OtherVNI && "OtherVNI not assigned, can't merge.");
+    assert(Other.Vals[V.OtherVNI->id].isAnalyzed() && "Missing recursion");
+    Assignments[ValNo] = Other.Assignments[V.OtherVNI->id];
+    DEBUG(dbgs() << "\t\tmerge " << PrintReg(LI.reg) << ':' << ValNo << '@'
+                 << LI.getValNumInfo(ValNo)->def << " into "
+                 << PrintReg(Other.LI.reg) << ':' << V.OtherVNI->id << '@'
+                 << V.OtherVNI->def << " --> @"
+                 << NewVNInfo[Assignments[ValNo]]->def << '\n');
+    break;
+  case CR_Replace:
+  case CR_Unresolved:
+    // The other value is going to be pruned if this join is successful.
+    assert(V.OtherVNI && "OtherVNI not assigned, can't prune");
+    Other.Vals[V.OtherVNI->id].Pruned = true;
+    // Fall through.
+  default:
+    // This value number needs to go in the final joined live range.
+    Assignments[ValNo] = NewVNInfo.size();
+    NewVNInfo.push_back(LI.getValNumInfo(ValNo));
+    break;
+  }
+}
 
-  LiveInterval &LHS = LIS->getOrCreateInterval(CP.getDstReg());
-  DEBUG(dbgs() << "\t\tLHS = " << PrintReg(CP.getDstReg(), TRI) << ' ' << LHS
-               << '\n');
+bool JoinVals::mapValues(JoinVals &Other) {
+  for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) {
+    computeAssignment(i, Other);
+    if (Vals[i].Resolution == CR_Impossible) {
+      DEBUG(dbgs() << "\t\tinterference at " << PrintReg(LI.reg) << ':' << i
+                   << '@' << LI.getValNumInfo(i)->def << '\n');
+      return false;
+    }
+  }
+  return true;
+}
 
-  // Loop over the value numbers of the LHS, seeing if any are defined from
-  // the RHS.
-  for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
-       i != e; ++i) {
-    VNInfo *VNI = *i;
-    if (VNI->isUnused() || VNI->isPHIDef())
-      continue;
-    MachineInstr *MI = LIS->getInstructionFromIndex(VNI->def);
-    assert(MI && "Missing def");
-    if (!MI->isCopyLike() && !MI->isImplicitDef()) // Src not defined by a copy?
-      continue;
+/// Assuming ValNo is going to clobber some valid lanes in Other.LI, compute
+/// the extent of the tainted lanes in the block.
+///
+/// Multiple values in Other.LI can be affected since partial redefinitions can
+/// preserve previously tainted lanes.
+///
+///   1 %dst = VLOAD           <-- Define all lanes in %dst
+///   2 %src = FOO             <-- ValNo to be joined with %dst:ssub0
+///   3 %dst:ssub1 = BAR       <-- Partial redef doesn't clear taint in ssub0
+///   4 %dst:ssub0 = COPY %src <-- Conflict resolved, ssub0 wasn't read
+///
+/// For each ValNo in Other that is affected, add an (EndIndex, TaintedLanes)
+/// entry to TaintedVals.
+///
+/// Returns false if the tainted lanes extend beyond the basic block.
+bool JoinVals::
+taintExtent(unsigned ValNo, unsigned TaintedLanes, JoinVals &Other,
+            SmallVectorImpl<std::pair<SlotIndex, unsigned> > &TaintExtent) {
+  VNInfo *VNI = LI.getValNumInfo(ValNo);
+  MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
+  SlotIndex MBBEnd = Indexes->getMBBEndIdx(MBB);
+
+  // Scan Other.LI from VNI.def to MBBEnd.
+  LiveInterval::iterator OtherI = Other.LI.find(VNI->def);
+  assert(OtherI != Other.LI.end() && "No conflict?");
+  do {
+    // OtherI is pointing to a tainted value. Abort the join if the tainted
+    // lanes escape the block.
+    SlotIndex End = OtherI->end;
+    if (End >= MBBEnd) {
+      DEBUG(dbgs() << "\t\ttaints global " << PrintReg(Other.LI.reg) << ':'
+                   << OtherI->valno->id << '@' << OtherI->start << '\n');
+      return false;
+    }
+    DEBUG(dbgs() << "\t\ttaints local " << PrintReg(Other.LI.reg) << ':'
+                 << OtherI->valno->id << '@' << OtherI->start
+                 << " to " << End << '\n');
+    // A dead def is not a problem.
+    if (End.isDead())
+      break;
+    TaintExtent.push_back(std::make_pair(End, TaintedLanes));
+
+    // Check for another def in the MBB.
+    if (++OtherI == Other.LI.end() || OtherI->start >= MBBEnd)
+      break;
+
+    // Lanes written by the new def are no longer tainted.
+    const Val &OV = Other.Vals[OtherI->valno->id];
+    TaintedLanes &= ~OV.WriteLanes;
+    if (!OV.RedefVNI)
+      break;
+  } while (TaintedLanes);
+  return true;
+}
 
-    // Figure out the value # from the RHS.
-    VNInfo *OtherVNI = RHS.getVNInfoBefore(VNI->def);
-    // The copy could be to an aliased physreg.
-    if (!OtherVNI)
+/// Return true if MI uses any of the given Lanes from Reg.
+/// This does not include partial redefinitions of Reg.
+bool JoinVals::usesLanes(MachineInstr *MI, unsigned Reg, unsigned SubIdx,
+                         unsigned Lanes) {
+  if (MI->isDebugValue())
+    return false;
+  for (ConstMIOperands MO(MI); MO.isValid(); ++MO) {
+    if (!MO->isReg() || MO->isDef() || MO->getReg() != Reg)
       continue;
-
-    // DstReg is known to be a register in the LHS interval.  If the src is
-    // from the RHS interval, we can use its value #.
-    if (CP.isCoalescable(MI))
-      DeadCopies.push_back(MI);
-    else if (!RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, OtherVNI,
-                                            DupCopies))
+    if (!MO->readsReg())
       continue;
-
-    LHSValsDefinedFromRHS[VNI] = OtherVNI;
+    if (Lanes & TRI->getSubRegIndexLaneMask(
+                  TRI->composeSubRegIndices(SubIdx, MO->getSubReg())))
+      return true;
   }
+  return false;
+}
 
-  // Loop over the value numbers of the RHS, seeing if any are defined from
-  // the LHS.
-  for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
-       i != e; ++i) {
-    VNInfo *VNI = *i;
-    if (VNI->isUnused() || VNI->isPHIDef())
-      continue;
-    MachineInstr *MI = LIS->getInstructionFromIndex(VNI->def);
-    assert(MI && "Missing def");
-    if (!MI->isCopyLike() && !MI->isImplicitDef()) // Src not defined by a copy?
+bool JoinVals::resolveConflicts(JoinVals &Other) {
+  for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) {
+    Val &V = Vals[i];
+    assert (V.Resolution != CR_Impossible && "Unresolvable conflict");
+    if (V.Resolution != CR_Unresolved)
       continue;
+    DEBUG(dbgs() << "\t\tconflict at " << PrintReg(LI.reg) << ':' << i
+                 << '@' << LI.getValNumInfo(i)->def << '\n');
+    ++NumLaneConflicts;
+    assert(V.OtherVNI && "Inconsistent conflict resolution.");
+    VNInfo *VNI = LI.getValNumInfo(i);
+    const Val &OtherV = Other.Vals[V.OtherVNI->id];
+
+    // VNI is known to clobber some lanes in OtherVNI. If we go ahead with the
+    // join, those lanes will be tainted with a wrong value. Get the extent of
+    // the tainted lanes.
+    unsigned TaintedLanes = V.WriteLanes & OtherV.ValidLanes;
+    SmallVector<std::pair<SlotIndex, unsigned>, 8> TaintExtent;
+    if (!taintExtent(i, TaintedLanes, Other, TaintExtent))
+      // Tainted lanes would extend beyond the basic block.
+      return false;
 
-    // Figure out the value # from the LHS.
-    VNInfo *OtherVNI = LHS.getVNInfoBefore(VNI->def);
-    // The copy could be to an aliased physreg.
-    if (!OtherVNI)
-      continue;
+    assert(!TaintExtent.empty() && "There should be at least one conflict.");
 
-    // DstReg is known to be a register in the RHS interval.  If the src is
-    // from the LHS interval, we can use its value #.
-    if (CP.isCoalescable(MI))
-      DeadCopies.push_back(MI);
-    else if (!RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, OtherVNI,
-                                            DupCopies))
-        continue;
+    // Now look at the instructions from VNI->def to TaintExtent (inclusive).
+    MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
+    MachineBasicBlock::iterator MI = MBB->begin();
+    if (!VNI->isPHIDef()) {
+      MI = Indexes->getInstructionFromIndex(VNI->def);
+      // No need to check the instruction defining VNI for reads.
+      ++MI;
+    }
+    assert(!SlotIndex::isSameInstr(VNI->def, TaintExtent.front().first) &&
+           "Interference ends on VNI->def. Should have been handled earlier");
+    MachineInstr *LastMI =
+      Indexes->getInstructionFromIndex(TaintExtent.front().first);
+    assert(LastMI && "Range must end at a proper instruction");
+    unsigned TaintNum = 0;
+    for(;;) {
+      assert(MI != MBB->end() && "Bad LastMI");
+      if (usesLanes(MI, Other.LI.reg, Other.SubIdx, TaintedLanes)) {
+        DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI);
+        return false;
+      }
+      // LastMI is the last instruction to use the current value.
+      if (&*MI == LastMI) {
+        if (++TaintNum == TaintExtent.size())
+          break;
+        LastMI = Indexes->getInstructionFromIndex(TaintExtent[TaintNum].first);
+        assert(LastMI && "Range must end at a proper instruction");
+        TaintedLanes = TaintExtent[TaintNum].second;
+      }
+      ++MI;
+    }
 
-    RHSValsDefinedFromLHS[VNI] = OtherVNI;
+    // The tainted lanes are unused.
+    V.Resolution = CR_Replace;
+    ++NumLaneResolves;
   }
+  return true;
+}
 
-  LHSValNoAssignments.resize(LHS.getNumValNums(), -1);
-  RHSValNoAssignments.resize(RHS.getNumValNums(), -1);
-  NewVNInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums());
+// Determine if ValNo is a copy of a value number in LI or Other.LI that will
+// be pruned:
+//
+//   %dst = COPY %src
+//   %src = COPY %dst  <-- This value to be pruned.
+//   %dst = COPY %src  <-- This value is a copy of a pruned value.
+//
+bool JoinVals::isPrunedValue(unsigned ValNo, JoinVals &Other) {
+  Val &V = Vals[ValNo];
+  if (V.Pruned || V.PrunedComputed)
+    return V.Pruned;
+
+  if (V.Resolution != CR_Erase && V.Resolution != CR_Merge)
+    return V.Pruned;
+
+  // Follow copies up the dominator tree and check if any intermediate value
+  // has been pruned.
+  V.PrunedComputed = true;
+  V.Pruned = Other.isPrunedValue(V.OtherVNI->id, *this);
+  return V.Pruned;
+}
 
-  for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
-       i != e; ++i) {
-    VNInfo *VNI = *i;
-    unsigned VN = VNI->id;
-    if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused())
-      continue;
-    ComputeUltimateVN(VNI, NewVNInfo,
-                      LHSValsDefinedFromRHS, RHSValsDefinedFromLHS,
-                      LHSValNoAssignments, RHSValNoAssignments);
-  }
-  for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
-       i != e; ++i) {
-    VNInfo *VNI = *i;
-    unsigned VN = VNI->id;
-    if (RHSValNoAssignments[VN] >= 0 || VNI->isUnused())
-      continue;
-    // If this value number isn't a copy from the LHS, it's a new number.
-    if (RHSValsDefinedFromLHS.find(VNI) == RHSValsDefinedFromLHS.end()) {
-      NewVNInfo.push_back(VNI);
-      RHSValNoAssignments[VN] = NewVNInfo.size()-1;
-      continue;
+void JoinVals::pruneValues(JoinVals &Other,
+                           SmallVectorImpl<SlotIndex> &EndPoints) {
+  for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) {
+    SlotIndex Def = LI.getValNumInfo(i)->def;
+    switch (Vals[i].Resolution) {
+    case CR_Keep:
+      break;
+    case CR_Replace: {
+      // This value takes precedence over the value in Other.LI.
+      LIS->pruneValue(&Other.LI, Def, &EndPoints);
+      // Check if we're replacing an IMPLICIT_DEF value. The IMPLICIT_DEF
+      // instructions are only inserted to provide a live-out value for PHI
+      // predecessors, so the instruction should simply go away once its value
+      // has been replaced.
+      Val &OtherV = Other.Vals[Vals[i].OtherVNI->id];
+      bool EraseImpDef = OtherV.ErasableImplicitDef &&
+                         OtherV.Resolution == CR_Keep;
+      if (!Def.isBlock()) {
+        // Remove <def,read-undef> flags. This def is now a partial redef.
+        // Also remove <def,dead> flags since the joined live range will
+        // continue past this instruction.
+        for (MIOperands MO(Indexes->getInstructionFromIndex(Def));
+             MO.isValid(); ++MO)
+          if (MO->isReg() && MO->isDef() && MO->getReg() == LI.reg) {
+            MO->setIsUndef(EraseImpDef);
+            MO->setIsDead(false);
+          }
+        // This value will reach instructions below, but we need to make sure
+        // the live range also reaches the instruction at Def.
+        if (!EraseImpDef)
+          EndPoints.push_back(Def);
+      }
+      DEBUG(dbgs() << "\t\tpruned " << PrintReg(Other.LI.reg) << " at " << Def
+                   << ": " << Other.LI << '\n');
+      break;
+    }
+    case CR_Erase:
+    case CR_Merge:
+      if (isPrunedValue(i, Other)) {
+        // This value is ultimately a copy of a pruned value in LI or Other.LI.
+        // We can no longer trust the value mapping computed by
+        // computeAssignment(), the value that was originally copied could have
+        // been replaced.
+        LIS->pruneValue(&LI, Def, &EndPoints);
+        DEBUG(dbgs() << "\t\tpruned all of " << PrintReg(LI.reg) << " at "
+                     << Def << ": " << LI << '\n');
+      }
+      break;
+    case CR_Unresolved:
+    case CR_Impossible:
+      llvm_unreachable("Unresolved conflicts");
     }
-
-    ComputeUltimateVN(VNI, NewVNInfo,
-                      RHSValsDefinedFromLHS, LHSValsDefinedFromRHS,
-                      RHSValNoAssignments, LHSValNoAssignments);
   }
+}
 
-  // Armed with the mappings of LHS/RHS values to ultimate values, walk the
-  // interval lists to see if these intervals are coalescable.
-  LiveInterval::const_iterator I = LHS.begin();
-  LiveInterval::const_iterator IE = LHS.end();
-  LiveInterval::const_iterator J = RHS.begin();
-  LiveInterval::const_iterator JE = RHS.end();
-
-  // Collect interval end points that will no longer be kills.
-  SmallVector<MachineInstr*, 8> LHSOldKills;
-  SmallVector<MachineInstr*, 8> RHSOldKills;
-
-  // Skip ahead until the first place of potential sharing.
-  if (I != IE && J != JE) {
-    if (I->start < J->start) {
-      I = std::upper_bound(I, IE, J->start);
-      if (I != LHS.begin()) --I;
-    } else if (J->start < I->start) {
-      J = std::upper_bound(J, JE, I->start);
-      if (J != RHS.begin()) --J;
+void JoinVals::eraseInstrs(SmallPtrSet<MachineInstr*, 8> &ErasedInstrs,
+                           SmallVectorImpl<unsigned> &ShrinkRegs) {
+  for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) {
+    // Get the def location before markUnused() below invalidates it.
+    SlotIndex Def = LI.getValNumInfo(i)->def;
+    switch (Vals[i].Resolution) {
+    case CR_Keep:
+      // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any
+      // longer. The IMPLICIT_DEF instructions are only inserted by
+      // PHIElimination to guarantee that all PHI predecessors have a value.
+      if (!Vals[i].ErasableImplicitDef || !Vals[i].Pruned)
+        break;
+      // Remove value number i from LI. Note that this VNInfo is still present
+      // in NewVNInfo, so it will appear as an unused value number in the final
+      // joined interval.
+      LI.getValNumInfo(i)->markUnused();
+      LI.removeValNo(LI.getValNumInfo(i));
+      DEBUG(dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LI << '\n');
+      // FALL THROUGH.
+
+    case CR_Erase: {
+      MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
+      assert(MI && "No instruction to erase");
+      if (MI->isCopy()) {
+        unsigned Reg = MI->getOperand(1).getReg();
+        if (TargetRegisterInfo::isVirtualRegister(Reg) &&
+            Reg != CP.getSrcReg() && Reg != CP.getDstReg())
+          ShrinkRegs.push_back(Reg);
+      }
+      ErasedInstrs.insert(MI);
+      DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI);
+      LIS->RemoveMachineInstrFromMaps(MI);
+      MI->eraseFromParent();
+      break;
+    }
+    default:
+      break;
     }
   }
+}
 
-  while (I != IE && J != JE) {
-    // Determine if these two live ranges overlap.
-    // If so, check value # info to determine if they are really different.
-    if (I->end > J->start && J->end > I->start) {
-      // If the live range overlap will map to the same value number in the
-      // result liverange, we can still coalesce them.  If not, we can't.
-      if (LHSValNoAssignments[I->valno->id] !=
-          RHSValNoAssignments[J->valno->id])
-        return false;
+bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
+  SmallVector<VNInfo*, 16> NewVNInfo;
+  LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
+  LiveInterval &LHS = LIS->getInterval(CP.getDstReg());
+  JoinVals RHSVals(RHS, CP.getSrcIdx(), NewVNInfo, CP, LIS, TRI);
+  JoinVals LHSVals(LHS, CP.getDstIdx(), NewVNInfo, CP, LIS, TRI);
 
-      // Extended live ranges should no longer be killed.
-      if (!I->end.isBlock() && I->end < J->end)
-        if (MachineInstr *MI = LIS->getInstructionFromIndex(I->end))
-          LHSOldKills.push_back(MI);
-      if (!J->end.isBlock() && J->end < I->end)
-        if (MachineInstr *MI = LIS->getInstructionFromIndex(J->end))
-          RHSOldKills.push_back(MI);
-    }
+  DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS
+               << "\n\t\tLHS = " << PrintReg(CP.getDstReg()) << ' ' << LHS
+               << '\n');
 
-    if (I->end < J->end)
-      ++I;
-    else
-      ++J;
-  }
+  // First compute NewVNInfo and the simple value mappings.
+  // Detect impossible conflicts early.
+  if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals))
+    return false;
 
-  // Clear kill flags where live ranges are extended.
-  while (!LHSOldKills.empty())
-    LHSOldKills.pop_back_val()->clearRegisterKills(LHS.reg, TRI);
-  while (!RHSOldKills.empty())
-    RHSOldKills.pop_back_val()->clearRegisterKills(RHS.reg, TRI);
-
-  if (LHSValNoAssignments.empty())
-    LHSValNoAssignments.push_back(-1);
-  if (RHSValNoAssignments.empty())
-    RHSValNoAssignments.push_back(-1);
-
-  // Now erase all the redundant copies.
-  for (unsigned i = 0, e = DeadCopies.size(); i != e; ++i) {
-    MachineInstr *MI = DeadCopies[i];
-    if (!ErasedInstrs.insert(MI))
-      continue;
-    DEBUG(dbgs() << "\t\terased:\t" << LIS->getInstructionIndex(MI)
-                 << '\t' << *MI);
-    LIS->RemoveMachineInstrFromMaps(MI);
-    MI->eraseFromParent();
-  }
+  // Some conflicts can only be resolved after all values have been mapped.
+  if (!LHSVals.resolveConflicts(RHSVals) || !RHSVals.resolveConflicts(LHSVals))
+    return false;
 
-  SmallVector<unsigned, 8> SourceRegisters;
-  for (SmallVector<MachineInstr*, 8>::iterator I = DupCopies.begin(),
-         E = DupCopies.end(); I != E; ++I) {
-    MachineInstr *MI = *I;
-    if (!ErasedInstrs.insert(MI))
-      continue;
+  // All clear, the live ranges can be merged.
+
+  // The merging algorithm in LiveInterval::join() can't handle conflicting
+  // value mappings, so we need to remove any live ranges that overlap a
+  // CR_Replace resolution. Collect a set of end points that can be used to
+  // restore the live range after joining.
+  SmallVector<SlotIndex, 8> EndPoints;
+  LHSVals.pruneValues(RHSVals, EndPoints);
+  RHSVals.pruneValues(LHSVals, EndPoints);
+
+  // Erase COPY and IMPLICIT_DEF instructions. This may cause some external
+  // registers to require trimming.
+  SmallVector<unsigned, 8> ShrinkRegs;
+  LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs);
+  RHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs);
+  while (!ShrinkRegs.empty())
+    LIS->shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val()));
+
+  // Join RHS into LHS.
+  LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo,
+           MRI);
 
-    // If MI is a copy, then we have pretended that the assignment to B in
-    // A = X
-    // B = X
-    // was actually a copy from A. Now that we decided to coalesce A and B,
-    // transform the code into
-    // A = X
-    // In the case of the implicit_def, we just have to remove it.
-    if (!MI->isImplicitDef()) {
-      unsigned Src = MI->getOperand(1).getReg();
-      SourceRegisters.push_back(Src);
-    }
-    LIS->RemoveMachineInstrFromMaps(MI);
-    MI->eraseFromParent();
-  }
+  // Kill flags are going to be wrong if the live ranges were overlapping.
+  // Eventually, we should simply clear all kill flags when computing live
+  // ranges. They are reinserted after register allocation.
+  MRI->clearKillFlags(LHS.reg);
+  MRI->clearKillFlags(RHS.reg);
 
-  // If B = X was the last use of X in a liverange, we have to shrink it now
-  // that B = X is gone.
-  for (SmallVector<unsigned, 8>::iterator I = SourceRegisters.begin(),
-         E = SourceRegisters.end(); I != E; ++I) {
-    LIS->shrinkToUses(&LIS->getInterval(*I));
-  }
+  if (EndPoints.empty())
+    return true;
 
-  // If we get here, we know that we can coalesce the live ranges.  Ask the
-  // intervals to coalesce themselves now.
-  LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo,
-           MRI);
+  // Recompute the parts of the live range we had to remove because of
+  // CR_Replace conflicts.
+  DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size()
+               << " points: " << LHS << '\n');
+  LIS->extendToIndices(&LHS, EndPoints);
   return true;
 }
 
+/// joinIntervals - Attempt to join these two intervals.  On failure, this
+/// returns false.
+bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) {
+  return CP.isPhys() ? joinReservedPhysReg(CP) : joinVirtRegs(CP);
+}
+
 namespace {
-  // DepthMBBCompare - Comparison predicate that sort first based on the loop
-  // depth of the basic block (the unsigned), and then on the MBB number.
-  struct DepthMBBCompare {
-    typedef std::pair<unsigned, MachineBasicBlock*> DepthMBBPair;
-    bool operator()(const DepthMBBPair &LHS, const DepthMBBPair &RHS) const {
-      // Deeper loops first
-      if (LHS.first != RHS.first)
-        return LHS.first > RHS.first;
-
-      // Prefer blocks that are more connected in the CFG. This takes care of
-      // the most difficult copies first while intervals are short.
-      unsigned cl = LHS.second->pred_size() + LHS.second->succ_size();
-      unsigned cr = RHS.second->pred_size() + RHS.second->succ_size();
-      if (cl != cr)
-        return cl > cr;
-
-      // As a last resort, sort by block number.
-      return LHS.second->getNumber() < RHS.second->getNumber();
-    }
-  };
+// Information concerning MBB coalescing priority.
+struct MBBPriorityInfo {
+  MachineBasicBlock *MBB;
+  unsigned Depth;
+  bool IsSplit;
+
+  MBBPriorityInfo(MachineBasicBlock *mbb, unsigned depth, bool issplit)
+    : MBB(mbb), Depth(depth), IsSplit(issplit) {}
+};
+}
+
+// C-style comparator that sorts first based on the loop depth of the basic
+// block (the unsigned), and then on the MBB number.
+//
+// EnableGlobalCopies assumes that the primary sort key is loop depth.
+static int compareMBBPriority(const void *L, const void *R) {
+  const MBBPriorityInfo *LHS = static_cast<const MBBPriorityInfo*>(L);
+  const MBBPriorityInfo *RHS = static_cast<const MBBPriorityInfo*>(R);
+  // Deeper loops first
+  if (LHS->Depth != RHS->Depth)
+    return LHS->Depth > RHS->Depth ? -1 : 1;
+
+  // Try to unsplit critical edges next.
+  if (LHS->IsSplit != RHS->IsSplit)
+    return LHS->IsSplit ? -1 : 1;
+
+  // Prefer blocks that are more connected in the CFG. This takes care of
+  // the most difficult copies first while intervals are short.
+  unsigned cl = LHS->MBB->pred_size() + LHS->MBB->succ_size();
+  unsigned cr = RHS->MBB->pred_size() + RHS->MBB->succ_size();
+  if (cl != cr)
+    return cl > cr ? -1 : 1;
+
+  // As a last resort, sort by block number.
+  return LHS->MBB->getNumber() < RHS->MBB->getNumber() ? -1 : 1;
+}
+
+/// \returns true if the given copy uses or defines a local live range.
+static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) {
+  if (!Copy->isCopy())
+    return false;
+
+  unsigned SrcReg = Copy->getOperand(1).getReg();
+  unsigned DstReg = Copy->getOperand(0).getReg();
+  if (TargetRegisterInfo::isPhysicalRegister(SrcReg)
+      || TargetRegisterInfo::isPhysicalRegister(DstReg))
+    return false;
+
+  return LIS->intervalIsInOneMBB(LIS->getInterval(SrcReg))
+    || LIS->intervalIsInOneMBB(LIS->getInterval(DstReg));
 }
 
 // Try joining WorkList copies starting from index From.
 // Null out any successful joins.
-bool RegisterCoalescer::copyCoalesceWorkList(unsigned From) {
-  assert(From <= WorkList.size() && "Out of range");
+bool RegisterCoalescer::
+copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) {
   bool Progress = false;
-  for (unsigned i = From, e = WorkList.size(); i != e; ++i) {
-    if (!WorkList[i])
+  for (unsigned i = 0, e = CurrList.size(); i != e; ++i) {
+    if (!CurrList[i])
       continue;
     // Skip instruction pointers that have already been erased, for example by
     // dead code elimination.
-    if (ErasedInstrs.erase(WorkList[i])) {
-      WorkList[i] = 0;
+    if (ErasedInstrs.erase(CurrList[i])) {
+      CurrList[i] = 0;
       continue;
     }
     bool Again = false;
-    bool Success = joinCopy(WorkList[i], Again);
+    bool Success = joinCopy(CurrList[i], Again);
     Progress |= Success;
     if (Success || !Again)
-      WorkList[i] = 0;
+      CurrList[i] = 0;
   }
   return Progress;
 }
@@ -1496,52 +2051,74 @@ RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) {
   // Collect all copy-like instructions in MBB. Don't start coalescing anything
   // yet, it might invalidate the iterator.
   const unsigned PrevSize = WorkList.size();
-  for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
-       MII != E; ++MII)
-    if (MII->isCopyLike())
-      WorkList.push_back(MII);
-
+  if (JoinGlobalCopies) {
+    // Coalesce copies bottom-up to coalesce local defs before local uses. They
+    // are not inherently easier to resolve, but slightly preferable until we
+    // have local live range splitting. In particular this is required by
+    // cmp+jmp macro fusion.
+    for (MachineBasicBlock::reverse_iterator
+           MII = MBB->rbegin(), E = MBB->rend(); MII != E; ++MII) {
+      if (!MII->isCopyLike())
+        continue;
+      if (isLocalCopy(&(*MII), LIS))
+        LocalWorkList.push_back(&(*MII));
+      else
+        WorkList.push_back(&(*MII));
+    }
+  }
+  else {
+     for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
+          MII != E; ++MII)
+       if (MII->isCopyLike())
+         WorkList.push_back(MII);
+  }
   // Try coalescing the collected copies immediately, and remove the nulls.
   // This prevents the WorkList from getting too large since most copies are
   // joinable on the first attempt.
-  if (copyCoalesceWorkList(PrevSize))
+  MutableArrayRef<MachineInstr*>
+    CurrList(WorkList.begin() + PrevSize, WorkList.end());
+  if (copyCoalesceWorkList(CurrList))
     WorkList.erase(std::remove(WorkList.begin() + PrevSize, WorkList.end(),
                                (MachineInstr*)0), WorkList.end());
 }
 
+void RegisterCoalescer::coalesceLocals() {
+  copyCoalesceWorkList(LocalWorkList);
+  for (unsigned j = 0, je = LocalWorkList.size(); j != je; ++j) {
+    if (LocalWorkList[j])
+      WorkList.push_back(LocalWorkList[j]);
+  }
+  LocalWorkList.clear();
+}
+
 void RegisterCoalescer::joinAllIntervals() {
   DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n");
-  assert(WorkList.empty() && "Old data still around.");
-
-  if (Loops->empty()) {
-    // If there are no loops in the function, join intervals in function order.
-    for (MachineFunction::iterator I = MF->begin(), E = MF->end();
-         I != E; ++I)
-      copyCoalesceInMBB(I);
-  } else {
-    // Otherwise, join intervals in inner loops before other intervals.
-    // Unfortunately we can't just iterate over loop hierarchy here because
-    // there may be more MBB's than BB's.  Collect MBB's for sorting.
-
-    // Join intervals in the function prolog first. We want to join physical
-    // registers with virtual registers before the intervals got too long.
-    std::vector<std::pair<unsigned, MachineBasicBlock*> > MBBs;
-    for (MachineFunction::iterator I = MF->begin(), E = MF->end();I != E;++I){
-      MachineBasicBlock *MBB = I;
-      MBBs.push_back(std::make_pair(Loops->getLoopDepth(MBB), I));
+  assert(WorkList.empty() && LocalWorkList.empty() && "Old data still around.");
+
+  std::vector<MBBPriorityInfo> MBBs;
+  MBBs.reserve(MF->size());
+  for (MachineFunction::iterator I = MF->begin(), E = MF->end();I != E;++I){
+    MachineBasicBlock *MBB = I;
+    MBBs.push_back(MBBPriorityInfo(MBB, Loops->getLoopDepth(MBB),
+                                   JoinSplitEdges && isSplitEdge(MBB)));
+  }
+  array_pod_sort(MBBs.begin(), MBBs.end(), compareMBBPriority);
+
+  // Coalesce intervals in MBB priority order.
+  unsigned CurrDepth = UINT_MAX;
+  for (unsigned i = 0, e = MBBs.size(); i != e; ++i) {
+    // Try coalescing the collected local copies for deeper loops.
+    if (JoinGlobalCopies && MBBs[i].Depth < CurrDepth) {
+      coalesceLocals();
+      CurrDepth = MBBs[i].Depth;
     }
-
-    // Sort by loop depth.
-    std::sort(MBBs.begin(), MBBs.end(), DepthMBBCompare());
-
-    // Finally, join intervals in loop nest order.
-    for (unsigned i = 0, e = MBBs.size(); i != e; ++i)
-      copyCoalesceInMBB(MBBs[i].second);
+    copyCoalesceInMBB(MBBs[i].MBB);
   }
+  coalesceLocals();
 
   // Joining intervals can allow other intervals to be joined.  Iteratively join
   // until we make no progress.
-  while (copyCoalesceWorkList())
+  while (copyCoalesceWorkList(WorkList))
     /* empty */ ;
 }
 
@@ -1563,6 +2140,17 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
   AA = &getAnalysis<AliasAnalysis>();
   Loops = &getAnalysis<MachineLoopInfo>();
 
+  const TargetSubtargetInfo &ST = TM->getSubtarget<TargetSubtargetInfo>();
+  if (EnableGlobalCopies == cl::BOU_UNSET)
+    JoinGlobalCopies = ST.enableMachineScheduler();
+  else
+    JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE);
+
+  // The MachineScheduler does not currently require JoinSplitEdges. This will
+  // either be enabled unconditionally or replaced by a more general live range
+  // splitting optimization.
+  JoinSplitEdges = EnableJoinSplits;
+
   DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n"
                << "********** Function: " << MF->getName() << '\n');
 
diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp
index 6cdfe7cd..62e95aa 100644
--- a/lib/CodeGen/RegisterPressure.cpp
+++ b/lib/CodeGen/RegisterPressure.cpp
@@ -12,25 +12,22 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/RegisterPressure.h"
 #include "llvm/CodeGen/LiveInterval.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
-#include "llvm/CodeGen/RegisterPressure.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
 
 using namespace llvm;
 
-/// Increase register pressure for each set impacted by this register class.
+/// Increase pressure for each pressure set provided by TargetRegisterInfo.
 static void increaseSetPressure(std::vector<unsigned> &CurrSetPressure,
                                 std::vector<unsigned> &MaxSetPressure,
-                                const TargetRegisterClass *RC,
-                                const TargetRegisterInfo *TRI) {
-  unsigned Weight = TRI->getRegClassWeight(RC).RegWeight;
-  for (const int *PSet = TRI->getRegClassPressureSets(RC);
-       *PSet != -1; ++PSet) {
+                                const int *PSet, unsigned Weight) {
+  for (; *PSet != -1; ++PSet) {
     CurrSetPressure[*PSet] += Weight;
     if (&CurrSetPressure != &MaxSetPressure
         && CurrSetPressure[*PSet] > MaxSetPressure[*PSet]) {
@@ -39,32 +36,57 @@ static void increaseSetPressure(std::vector<unsigned> &CurrSetPressure,
   }
 }
 
-/// Decrease register pressure for each set impacted by this register class.
+/// Decrease pressure for each pressure set provided by TargetRegisterInfo.
 static void decreaseSetPressure(std::vector<unsigned> &CurrSetPressure,
-                                const TargetRegisterClass *RC,
-                                const TargetRegisterInfo *TRI) {
-  unsigned Weight = TRI->getRegClassWeight(RC).RegWeight;
-  for (const int *PSet = TRI->getRegClassPressureSets(RC);
-       *PSet != -1; ++PSet) {
+                                const int *PSet, unsigned Weight) {
+  for (; *PSet != -1; ++PSet) {
     assert(CurrSetPressure[*PSet] >= Weight && "register pressure underflow");
     CurrSetPressure[*PSet] -= Weight;
   }
 }
 
 /// Directly increase pressure only within this RegisterPressure result.
-void RegisterPressure::increase(const TargetRegisterClass *RC,
-                                const TargetRegisterInfo *TRI) {
-  increaseSetPressure(MaxSetPressure, MaxSetPressure, RC, TRI);
+void RegisterPressure::increase(unsigned Reg, const TargetRegisterInfo *TRI,
+                                const MachineRegisterInfo *MRI) {
+  if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+    const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+    increaseSetPressure(MaxSetPressure, MaxSetPressure,
+                        TRI->getRegClassPressureSets(RC),
+                        TRI->getRegClassWeight(RC).RegWeight);
+  }
+  else {
+    increaseSetPressure(MaxSetPressure, MaxSetPressure,
+                        TRI->getRegUnitPressureSets(Reg),
+                        TRI->getRegUnitWeight(Reg));
+  }
 }
 
 /// Directly decrease pressure only within this RegisterPressure result.
-void RegisterPressure::decrease(const TargetRegisterClass *RC,
-                                const TargetRegisterInfo *TRI) {
-  decreaseSetPressure(MaxSetPressure, RC, TRI);
+void RegisterPressure::decrease(unsigned Reg, const TargetRegisterInfo *TRI,
+                                const MachineRegisterInfo *MRI) {
+  if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+    const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+    decreaseSetPressure(MaxSetPressure, TRI->getRegClassPressureSets(RC),
+                        TRI->getRegClassWeight(RC).RegWeight);
+  }
+  else {
+    decreaseSetPressure(MaxSetPressure, TRI->getRegUnitPressureSets(Reg),
+                        TRI->getRegUnitWeight(Reg));
+  }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+static void dumpSetPressure(const std::vector<unsigned> &SetPressure,
+                            const TargetRegisterInfo *TRI) {
+  for (unsigned i = 0, e = SetPressure.size(); i < e; ++i) {
+    if (SetPressure[i] != 0)
+      dbgs() << TRI->getRegPressureSetName(i) << "=" << SetPressure[i] << '\n';
+  }
 }
 
-#ifndef NDEBUG
-void RegisterPressure::dump(const TargetRegisterInfo *TRI) {
+void RegisterPressure::dump(const TargetRegisterInfo *TRI) const {
+  dbgs() << "Max Pressure: ";
+  dumpSetPressure(MaxSetPressure, TRI);
   dbgs() << "Live In: ";
   for (unsigned i = 0, e = LiveInRegs.size(); i < e; ++i)
     dbgs() << PrintReg(LiveInRegs[i], TRI) << " ";
@@ -73,42 +95,47 @@ void RegisterPressure::dump(const TargetRegisterInfo *TRI) {
   for (unsigned i = 0, e = LiveOutRegs.size(); i < e; ++i)
     dbgs() << PrintReg(LiveOutRegs[i], TRI) << " ";
   dbgs() << '\n';
-  for (unsigned i = 0, e = MaxSetPressure.size(); i < e; ++i) {
-    if (MaxSetPressure[i] != 0)
-      dbgs() << TRI->getRegPressureSetName(i) << "=" << MaxSetPressure[i]
-             << '\n';
-  }
-}
-#endif
-
-/// Increase the current pressure as impacted by these physical registers and
-/// bump the high water mark if needed.
-void RegPressureTracker::increasePhysRegPressure(ArrayRef<unsigned> Regs) {
-  for (unsigned I = 0, E = Regs.size(); I != E; ++I)
-    increaseSetPressure(CurrSetPressure, P.MaxSetPressure,
-                        TRI->getMinimalPhysRegClass(Regs[I]), TRI);
 }
 
-/// Simply decrease the current pressure as impacted by these physcial
-/// registers.
-void RegPressureTracker::decreasePhysRegPressure(ArrayRef<unsigned> Regs) {
-  for (unsigned I = 0, E = Regs.size(); I != E; ++I)
-    decreaseSetPressure(CurrSetPressure, TRI->getMinimalPhysRegClass(Regs[I]),
-                        TRI);
+void RegPressureTracker::dump() const {
+  dbgs() << "Curr Pressure: ";
+  dumpSetPressure(CurrSetPressure, TRI);
+  P.dump(TRI);
 }
+#endif
 
-/// Increase the current pressure as impacted by these virtual registers and
-/// bump the high water mark if needed.
-void RegPressureTracker::increaseVirtRegPressure(ArrayRef<unsigned> Regs) {
-  for (unsigned I = 0, E = Regs.size(); I != E; ++I)
-    increaseSetPressure(CurrSetPressure, P.MaxSetPressure,
-                        MRI->getRegClass(Regs[I]), TRI);
+/// Increase the current pressure as impacted by these registers and bump
+/// the high water mark if needed.
+void RegPressureTracker::increaseRegPressure(ArrayRef<unsigned> Regs) {
+  for (unsigned I = 0, E = Regs.size(); I != E; ++I) {
+    if (TargetRegisterInfo::isVirtualRegister(Regs[I])) {
+      const TargetRegisterClass *RC = MRI->getRegClass(Regs[I]);
+      increaseSetPressure(CurrSetPressure, P.MaxSetPressure,
+                          TRI->getRegClassPressureSets(RC),
+                          TRI->getRegClassWeight(RC).RegWeight);
+    }
+    else {
+      increaseSetPressure(CurrSetPressure, P.MaxSetPressure,
+                          TRI->getRegUnitPressureSets(Regs[I]),
+                          TRI->getRegUnitWeight(Regs[I]));
+    }
+  }
 }
 
-/// Simply decrease the current pressure as impacted by these virtual registers.
-void RegPressureTracker::decreaseVirtRegPressure(ArrayRef<unsigned> Regs) {
-  for (unsigned I = 0, E = Regs.size(); I != E; ++I)
-    decreaseSetPressure(CurrSetPressure, MRI->getRegClass(Regs[I]), TRI);
+/// Simply decrease the current pressure as impacted by these registers.
+void RegPressureTracker::decreaseRegPressure(ArrayRef<unsigned> Regs) {
+  for (unsigned I = 0, E = Regs.size(); I != E; ++I) {
+    if (TargetRegisterInfo::isVirtualRegister(Regs[I])) {
+      const TargetRegisterClass *RC = MRI->getRegClass(Regs[I]);
+      decreaseSetPressure(CurrSetPressure,
+                          TRI->getRegClassPressureSets(RC),
+                          TRI->getRegClassWeight(RC).RegWeight);
+    }
+    else {
+      decreaseSetPressure(CurrSetPressure, TRI->getRegUnitPressureSets(Regs[I]),
+                          TRI->getRegUnitWeight(Regs[I]));
+    }
+  }
 }
 
 /// Clear the result so it can be used for another round of pressure tracking.
@@ -160,6 +187,12 @@ void RegionPressure::openBottom(MachineBasicBlock::const_iterator PrevBottom) {
   LiveInRegs.clear();
 }
 
+const LiveInterval *RegPressureTracker::getInterval(unsigned Reg) const {
+  if (TargetRegisterInfo::isVirtualRegister(Reg))
+    return &LIS->getInterval(Reg);
+  return LIS->getCachedRegUnit(Reg);
+}
+
 /// Setup the RegPressureTracker.
 ///
 /// TODO: Add support for pressure without LiveIntervals.
@@ -181,9 +214,6 @@ void RegPressureTracker::init(const MachineFunction *mf,
   }
 
   CurrPos = pos;
-  while (CurrPos != MBB->end() && CurrPos->isDebugValue())
-    ++CurrPos;
-
   CurrSetPressure.assign(TRI->getNumRegPressureSets(), 0);
 
   if (RequireIntervals)
@@ -192,10 +222,10 @@ void RegPressureTracker::init(const MachineFunction *mf,
     static_cast<RegionPressure&>(P).reset();
   P.MaxSetPressure = CurrSetPressure;
 
-  LivePhysRegs.clear();
-  LivePhysRegs.setUniverse(TRI->getNumRegs());
-  LiveVirtRegs.clear();
-  LiveVirtRegs.setUniverse(MRI->getNumVirtRegs());
+  LiveRegs.PhysRegs.clear();
+  LiveRegs.PhysRegs.setUniverse(TRI->getNumRegs());
+  LiveRegs.VirtRegs.clear();
+  LiveRegs.VirtRegs.setUniverse(MRI->getNumVirtRegs());
 }
 
 /// Does this pressure result have a valid top position and live ins.
@@ -214,19 +244,28 @@ bool RegPressureTracker::isBottomClosed() const {
           MachineBasicBlock::const_iterator());
 }
 
+
+SlotIndex RegPressureTracker::getCurrSlot() const {
+  MachineBasicBlock::const_iterator IdxPos = CurrPos;
+  while (IdxPos != MBB->end() && IdxPos->isDebugValue())
+    ++IdxPos;
+  if (IdxPos == MBB->end())
+    return LIS->getMBBEndIdx(MBB);
+  return LIS->getInstructionIndex(IdxPos).getRegSlot();
+}
+
 /// Set the boundary for the top of the region and summarize live ins.
 void RegPressureTracker::closeTop() {
   if (RequireIntervals)
-    static_cast<IntervalPressure&>(P).TopIdx =
-      LIS->getInstructionIndex(CurrPos).getRegSlot();
+    static_cast<IntervalPressure&>(P).TopIdx = getCurrSlot();
   else
     static_cast<RegionPressure&>(P).TopPos = CurrPos;
 
   assert(P.LiveInRegs.empty() && "inconsistent max pressure result");
-  P.LiveInRegs.reserve(LivePhysRegs.size() + LiveVirtRegs.size());
-  P.LiveInRegs.append(LivePhysRegs.begin(), LivePhysRegs.end());
+  P.LiveInRegs.reserve(LiveRegs.PhysRegs.size() + LiveRegs.VirtRegs.size());
+  P.LiveInRegs.append(LiveRegs.PhysRegs.begin(), LiveRegs.PhysRegs.end());
   for (SparseSet<unsigned>::const_iterator I =
-         LiveVirtRegs.begin(), E = LiveVirtRegs.end(); I != E; ++I)
+         LiveRegs.VirtRegs.begin(), E = LiveRegs.VirtRegs.end(); I != E; ++I)
     P.LiveInRegs.push_back(*I);
   std::sort(P.LiveInRegs.begin(), P.LiveInRegs.end());
   P.LiveInRegs.erase(std::unique(P.LiveInRegs.begin(), P.LiveInRegs.end()),
@@ -236,19 +275,15 @@ void RegPressureTracker::closeTop() {
 /// Set the boundary for the bottom of the region and summarize live outs.
 void RegPressureTracker::closeBottom() {
   if (RequireIntervals)
-    if (CurrPos == MBB->end())
-      static_cast<IntervalPressure&>(P).BottomIdx = LIS->getMBBEndIdx(MBB);
-    else
-      static_cast<IntervalPressure&>(P).BottomIdx =
-        LIS->getInstructionIndex(CurrPos).getRegSlot();
+    static_cast<IntervalPressure&>(P).BottomIdx = getCurrSlot();
   else
     static_cast<RegionPressure&>(P).BottomPos = CurrPos;
 
   assert(P.LiveOutRegs.empty() && "inconsistent max pressure result");
-  P.LiveOutRegs.reserve(LivePhysRegs.size() + LiveVirtRegs.size());
-  P.LiveOutRegs.append(LivePhysRegs.begin(), LivePhysRegs.end());
+  P.LiveOutRegs.reserve(LiveRegs.PhysRegs.size() + LiveRegs.VirtRegs.size());
+  P.LiveOutRegs.append(LiveRegs.PhysRegs.begin(), LiveRegs.PhysRegs.end());
   for (SparseSet<unsigned>::const_iterator I =
-         LiveVirtRegs.begin(), E = LiveVirtRegs.end(); I != E; ++I)
+         LiveRegs.VirtRegs.begin(), E = LiveRegs.VirtRegs.end(); I != E; ++I)
     P.LiveOutRegs.push_back(*I);
   std::sort(P.LiveOutRegs.begin(), P.LiveOutRegs.end());
   P.LiveOutRegs.erase(std::unique(P.LiveOutRegs.begin(), P.LiveOutRegs.end()),
@@ -258,7 +293,7 @@ void RegPressureTracker::closeBottom() {
 /// Finalize the region boundaries and record live ins and live outs.
 void RegPressureTracker::closeRegion() {
   if (!isTopClosed() && !isBottomClosed()) {
-    assert(LivePhysRegs.empty() && LiveVirtRegs.empty() &&
+    assert(LiveRegs.PhysRegs.empty() && LiveRegs.VirtRegs.empty() &&
            "no region boundary");
     return;
   }
@@ -269,153 +304,98 @@ void RegPressureTracker::closeRegion() {
   // If both top and bottom are closed, do nothing.
 }
 
-/// Return true if Reg aliases a register in Regs SparseSet.
-static bool hasRegAlias(unsigned Reg, SparseSet<unsigned> &Regs,
-                        const TargetRegisterInfo *TRI) {
-  assert(!TargetRegisterInfo::isVirtualRegister(Reg) && "only for physregs");
-  for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
-    if (Regs.count(*AI))
-      return true;
-  return false;
-}
-
-/// Return true if Reg aliases a register in unsorted Regs SmallVector.
-/// This is only valid for physical registers.
-static SmallVectorImpl<unsigned>::iterator
-findRegAlias(unsigned Reg, SmallVectorImpl<unsigned> &Regs,
-             const TargetRegisterInfo *TRI) {
-  for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
-    SmallVectorImpl<unsigned>::iterator I =
-      std::find(Regs.begin(), Regs.end(), *AI);
-    if (I != Regs.end())
-      return I;
-  }
-  return Regs.end();
-}
-
-/// Return true if Reg can be inserted into Regs SmallVector. For virtual
-/// register, do a linear search. For physical registers check for aliases.
-static SmallVectorImpl<unsigned>::iterator
-findReg(unsigned Reg, bool isVReg, SmallVectorImpl<unsigned> &Regs,
-        const TargetRegisterInfo *TRI) {
-  if(isVReg)
-    return std::find(Regs.begin(), Regs.end(), Reg);
-  return findRegAlias(Reg, Regs, TRI);
+/// \brief Convenient wrapper for checking membership in RegisterOperands.
+static bool containsReg(ArrayRef<unsigned> Regs, unsigned Reg) {
+  return std::find(Regs.begin(), Regs.end(), Reg) != Regs.end();
 }
 
 /// Collect this instruction's unique uses and defs into SmallVectors for
 /// processing defs and uses in order.
-template<bool isVReg>
-struct RegisterOperands {
+class RegisterOperands {
+  const TargetRegisterInfo *TRI;
+  const MachineRegisterInfo *MRI;
+
+public:
   SmallVector<unsigned, 8> Uses;
   SmallVector<unsigned, 8> Defs;
   SmallVector<unsigned, 8> DeadDefs;
 
+  RegisterOperands(const TargetRegisterInfo *tri,
+                   const MachineRegisterInfo *mri): TRI(tri), MRI(mri) {}
+
   /// Push this operand's register onto the correct vector.
-  void collect(const MachineOperand &MO, const TargetRegisterInfo *TRI) {
-    if (MO.readsReg()) {
-      if (findReg(MO.getReg(), isVReg, Uses, TRI) == Uses.end())
-      Uses.push_back(MO.getReg());
-    }
+  void collect(const MachineOperand &MO) {
+    if (!MO.isReg() || !MO.getReg())
+      return;
+    if (MO.readsReg())
+      pushRegUnits(MO.getReg(), Uses);
     if (MO.isDef()) {
-      if (MO.isDead()) {
-        if (findReg(MO.getReg(), isVReg, DeadDefs, TRI) == DeadDefs.end())
-          DeadDefs.push_back(MO.getReg());
-      }
-      else {
-        if (findReg(MO.getReg(), isVReg, Defs, TRI) == Defs.end())
-          Defs.push_back(MO.getReg());
+      if (MO.isDead())
+        pushRegUnits(MO.getReg(), DeadDefs);
+      else
+        pushRegUnits(MO.getReg(), Defs);
+    }
+  }
+
+protected:
+  void pushRegUnits(unsigned Reg, SmallVectorImpl<unsigned> &Regs) {
+    if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+      if (containsReg(Regs, Reg))
+        return;
+      Regs.push_back(Reg);
+    }
+    else if (MRI->isAllocatable(Reg)) {
+      for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+        if (containsReg(Regs, *Units))
+          continue;
+        Regs.push_back(*Units);
       }
     }
   }
 };
-typedef RegisterOperands<false> PhysRegOperands;
-typedef RegisterOperands<true> VirtRegOperands;
 
 /// Collect physical and virtual register operands.
 static void collectOperands(const MachineInstr *MI,
-                            PhysRegOperands &PhysRegOpers,
-                            VirtRegOperands &VirtRegOpers,
-                            const TargetRegisterInfo *TRI,
-                            const RegisterClassInfo *RCI) {
-  for(ConstMIBundleOperands OperI(MI); OperI.isValid(); ++OperI) {
-    const MachineOperand &MO = *OperI;
-    if (!MO.isReg() || !MO.getReg())
-      continue;
+                            RegisterOperands &RegOpers) {
+  for(ConstMIBundleOperands OperI(MI); OperI.isValid(); ++OperI)
+    RegOpers.collect(*OperI);
 
-    if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
-      VirtRegOpers.collect(MO, TRI);
-    else if (RCI->isAllocatable(MO.getReg()))
-      PhysRegOpers.collect(MO, TRI);
-  }
   // Remove redundant physreg dead defs.
-  for (unsigned i = PhysRegOpers.DeadDefs.size(); i > 0; --i) {
-    unsigned Reg = PhysRegOpers.DeadDefs[i-1];
-    if (findRegAlias(Reg, PhysRegOpers.Defs, TRI) != PhysRegOpers.Defs.end())
-      PhysRegOpers.DeadDefs.erase(&PhysRegOpers.DeadDefs[i-1]);
+  for (unsigned i = RegOpers.DeadDefs.size(); i > 0; --i) {
+    unsigned Reg = RegOpers.DeadDefs[i-1];
+    if (containsReg(RegOpers.Defs, Reg))
+      RegOpers.DeadDefs.erase(&RegOpers.DeadDefs[i-1]);
   }
 }
 
 /// Force liveness of registers.
 void RegPressureTracker::addLiveRegs(ArrayRef<unsigned> Regs) {
   for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
-    if (TargetRegisterInfo::isVirtualRegister(Regs[i])) {
-      if (LiveVirtRegs.insert(Regs[i]).second)
-        increaseVirtRegPressure(Regs[i]);
-    }
-    else  {
-      if (!hasRegAlias(Regs[i], LivePhysRegs, TRI)) {
-        LivePhysRegs.insert(Regs[i]);
-        increasePhysRegPressure(Regs[i]);
-      }
-    }
+    if (LiveRegs.insert(Regs[i]))
+      increaseRegPressure(Regs[i]);
   }
 }
 
-/// Add PhysReg to the live in set and increase max pressure.
-void RegPressureTracker::discoverPhysLiveIn(unsigned Reg) {
-  assert(!LivePhysRegs.count(Reg) && "avoid bumping max pressure twice");
-  if (findRegAlias(Reg, P.LiveInRegs, TRI) != P.LiveInRegs.end())
+/// Add Reg to the live in set and increase max pressure.
+void RegPressureTracker::discoverLiveIn(unsigned Reg) {
+  assert(!LiveRegs.contains(Reg) && "avoid bumping max pressure twice");
+  if (containsReg(P.LiveInRegs, Reg))
     return;
 
   // At live in discovery, unconditionally increase the high water mark.
   P.LiveInRegs.push_back(Reg);
-  P.increase(TRI->getMinimalPhysRegClass(Reg), TRI);
+  P.increase(Reg, TRI, MRI);
 }
 
-/// Add PhysReg to the live out set and increase max pressure.
-void RegPressureTracker::discoverPhysLiveOut(unsigned Reg) {
-  assert(!LivePhysRegs.count(Reg) && "avoid bumping max pressure twice");
-  if (findRegAlias(Reg, P.LiveOutRegs, TRI) != P.LiveOutRegs.end())
+/// Add Reg to the live out set and increase max pressure.
+void RegPressureTracker::discoverLiveOut(unsigned Reg) {
+  assert(!LiveRegs.contains(Reg) && "avoid bumping max pressure twice");
+  if (containsReg(P.LiveOutRegs, Reg))
     return;
 
   // At live out discovery, unconditionally increase the high water mark.
   P.LiveOutRegs.push_back(Reg);
-  P.increase(TRI->getMinimalPhysRegClass(Reg), TRI);
-}
-
-/// Add VirtReg to the live in set and increase max pressure.
-void RegPressureTracker::discoverVirtLiveIn(unsigned Reg) {
-  assert(!LiveVirtRegs.count(Reg) && "avoid bumping max pressure twice");
-  if (std::find(P.LiveInRegs.begin(), P.LiveInRegs.end(), Reg) !=
-      P.LiveInRegs.end())
-    return;
-
-  // At live in discovery, unconditionally increase the high water mark.
-  P.LiveInRegs.push_back(Reg);
-  P.increase(MRI->getRegClass(Reg), TRI);
-}
-
-/// Add VirtReg to the live out set and increase max pressure.
-void RegPressureTracker::discoverVirtLiveOut(unsigned Reg) {
-  assert(!LiveVirtRegs.count(Reg) && "avoid bumping max pressure twice");
-  if (std::find(P.LiveOutRegs.begin(), P.LiveOutRegs.end(), Reg) !=
-      P.LiveOutRegs.end())
-    return;
-
-  // At live out discovery, unconditionally increase the high water mark.
-  P.LiveOutRegs.push_back(Reg);
-  P.increase(MRI->getRegClass(Reg), TRI);
+  P.increase(Reg, TRI, MRI);
 }
 
 /// Recede across the previous instruction.
@@ -449,52 +429,35 @@ bool RegPressureTracker::recede() {
   if (RequireIntervals && isTopClosed())
     static_cast<IntervalPressure&>(P).openTop(SlotIdx);
 
-  PhysRegOperands PhysRegOpers;
-  VirtRegOperands VirtRegOpers;
-  collectOperands(CurrPos, PhysRegOpers, VirtRegOpers, TRI, RCI);
+  RegisterOperands RegOpers(TRI, MRI);
+  collectOperands(CurrPos, RegOpers);
 
   // Boost pressure for all dead defs together.
-  increasePhysRegPressure(PhysRegOpers.DeadDefs);
-  increaseVirtRegPressure(VirtRegOpers.DeadDefs);
-  decreasePhysRegPressure(PhysRegOpers.DeadDefs);
-  decreaseVirtRegPressure(VirtRegOpers.DeadDefs);
+  increaseRegPressure(RegOpers.DeadDefs);
+  decreaseRegPressure(RegOpers.DeadDefs);
 
   // Kill liveness at live defs.
   // TODO: consider earlyclobbers?
-  for (unsigned i = 0, e = PhysRegOpers.Defs.size(); i < e; ++i) {
-    unsigned Reg = PhysRegOpers.Defs[i];
-    if (LivePhysRegs.erase(Reg))
-      decreasePhysRegPressure(Reg);
+  for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) {
+    unsigned Reg = RegOpers.Defs[i];
+    if (LiveRegs.erase(Reg))
+      decreaseRegPressure(Reg);
     else
-      discoverPhysLiveOut(Reg);
-  }
-  for (unsigned i = 0, e = VirtRegOpers.Defs.size(); i < e; ++i) {
-    unsigned Reg = VirtRegOpers.Defs[i];
-    if (LiveVirtRegs.erase(Reg))
-      decreaseVirtRegPressure(Reg);
-    else
-      discoverVirtLiveOut(Reg);
+      discoverLiveOut(Reg);
   }
 
   // Generate liveness for uses.
-  for (unsigned i = 0, e = PhysRegOpers.Uses.size(); i < e; ++i) {
-    unsigned Reg = PhysRegOpers.Uses[i];
-    if (!hasRegAlias(Reg, LivePhysRegs, TRI)) {
-      increasePhysRegPressure(Reg);
-      LivePhysRegs.insert(Reg);
-    }
-  }
-  for (unsigned i = 0, e = VirtRegOpers.Uses.size(); i < e; ++i) {
-    unsigned Reg = VirtRegOpers.Uses[i];
-    if (!LiveVirtRegs.count(Reg)) {
+  for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) {
+    unsigned Reg = RegOpers.Uses[i];
+    if (!LiveRegs.contains(Reg)) {
       // Adjust liveouts if LiveIntervals are available.
       if (RequireIntervals) {
-        const LiveInterval *LI = &LIS->getInterval(Reg);
-        if (!LI->killedAt(SlotIdx))
-          discoverVirtLiveOut(Reg);
+        const LiveInterval *LI = getInterval(Reg);
+        if (LI && !LI->killedAt(SlotIdx))
+          discoverLiveOut(Reg);
       }
-      increaseVirtRegPressure(Reg);
-      LiveVirtRegs.insert(Reg);
+      increaseRegPressure(Reg);
+      LiveRegs.insert(Reg);
     }
   }
   return true;
@@ -512,7 +475,7 @@ bool RegPressureTracker::advance() {
 
   SlotIndex SlotIdx;
   if (RequireIntervals)
-    SlotIdx = LIS->getInstructionIndex(CurrPos).getRegSlot();
+    SlotIdx = getCurrSlot();
 
   // Open the bottom of the region using slot indexes.
   if (isBottomClosed()) {
@@ -522,57 +485,43 @@ bool RegPressureTracker::advance() {
       static_cast<RegionPressure&>(P).openBottom(CurrPos);
   }
 
-  PhysRegOperands PhysRegOpers;
-  VirtRegOperands VirtRegOpers;
-  collectOperands(CurrPos, PhysRegOpers, VirtRegOpers, TRI, RCI);
-
-  // Kill liveness at last uses.
-  for (unsigned i = 0, e = PhysRegOpers.Uses.size(); i < e; ++i) {
-    unsigned Reg = PhysRegOpers.Uses[i];
-    if (!hasRegAlias(Reg, LivePhysRegs, TRI))
-      discoverPhysLiveIn(Reg);
-    else {
-      // Allocatable physregs are always single-use before regalloc.
-      decreasePhysRegPressure(Reg);
-      LivePhysRegs.erase(Reg);
-    }
-  }
-  for (unsigned i = 0, e = VirtRegOpers.Uses.size(); i < e; ++i) {
-    unsigned Reg = VirtRegOpers.Uses[i];
+  RegisterOperands RegOpers(TRI, MRI);
+  collectOperands(CurrPos, RegOpers);
+
+  for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) {
+    unsigned Reg = RegOpers.Uses[i];
+    // Discover live-ins.
+    bool isLive = LiveRegs.contains(Reg);
+    if (!isLive)
+      discoverLiveIn(Reg);
+    // Kill liveness at last uses.
+    bool lastUse = false;
     if (RequireIntervals) {
-      const LiveInterval *LI = &LIS->getInterval(Reg);
-      if (LI->killedAt(SlotIdx)) {
-        if (LiveVirtRegs.erase(Reg))
-          decreaseVirtRegPressure(Reg);
-        else
-          discoverVirtLiveIn(Reg);
-      }
+      const LiveInterval *LI = getInterval(Reg);
+      lastUse = LI && LI->killedAt(SlotIdx);
+    }
+    else {
+      // Allocatable physregs are always single-use before register rewriting.
+      lastUse = !TargetRegisterInfo::isVirtualRegister(Reg);
     }
-    else if (!LiveVirtRegs.count(Reg)) {
-      discoverVirtLiveIn(Reg);
-      increaseVirtRegPressure(Reg);
+    if (lastUse && isLive) {
+      LiveRegs.erase(Reg);
+      decreaseRegPressure(Reg);
     }
+    else if (!lastUse && !isLive)
+      increaseRegPressure(Reg);
   }
 
   // Generate liveness for defs.
-  for (unsigned i = 0, e = PhysRegOpers.Defs.size(); i < e; ++i) {
-    unsigned Reg = PhysRegOpers.Defs[i];
-    if (!hasRegAlias(Reg, LivePhysRegs, TRI)) {
-      increasePhysRegPressure(Reg);
-      LivePhysRegs.insert(Reg);
-    }
-  }
-  for (unsigned i = 0, e = VirtRegOpers.Defs.size(); i < e; ++i) {
-    unsigned Reg = VirtRegOpers.Defs[i];
-    if (LiveVirtRegs.insert(Reg).second)
-      increaseVirtRegPressure(Reg);
+  for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) {
+    unsigned Reg = RegOpers.Defs[i];
+    if (LiveRegs.insert(Reg))
+      increaseRegPressure(Reg);
   }
 
   // Boost pressure for all dead defs together.
-  increasePhysRegPressure(PhysRegOpers.DeadDefs);
-  increaseVirtRegPressure(VirtRegOpers.DeadDefs);
-  decreasePhysRegPressure(PhysRegOpers.DeadDefs);
-  decreaseVirtRegPressure(VirtRegOpers.DeadDefs);
+  increaseRegPressure(RegOpers.DeadDefs);
+  decreaseRegPressure(RegOpers.DeadDefs);
 
   // Find the next instruction.
   do
@@ -663,32 +612,28 @@ static void computeMaxPressureDelta(ArrayRef<unsigned> OldMaxPressureVec,
 /// This is intended for speculative queries. It leaves pressure inconsistent
 /// with the current position, so must be restored by the caller.
 void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) {
+  assert(!MI->isDebugValue() && "Expect a nondebug instruction.");
+
   // Account for register pressure similar to RegPressureTracker::recede().
-  PhysRegOperands PhysRegOpers;
-  VirtRegOperands VirtRegOpers;
-  collectOperands(MI, PhysRegOpers, VirtRegOpers, TRI, RCI);
+  RegisterOperands RegOpers(TRI, MRI);
+  collectOperands(MI, RegOpers);
 
   // Boost max pressure for all dead defs together.
   // Since CurrSetPressure and MaxSetPressure
-  increasePhysRegPressure(PhysRegOpers.DeadDefs);
-  increaseVirtRegPressure(VirtRegOpers.DeadDefs);
-  decreasePhysRegPressure(PhysRegOpers.DeadDefs);
-  decreaseVirtRegPressure(VirtRegOpers.DeadDefs);
+  increaseRegPressure(RegOpers.DeadDefs);
+  decreaseRegPressure(RegOpers.DeadDefs);
 
   // Kill liveness at live defs.
-  decreasePhysRegPressure(PhysRegOpers.Defs);
-  decreaseVirtRegPressure(VirtRegOpers.Defs);
-
-  // Generate liveness for uses.
-  for (unsigned i = 0, e = PhysRegOpers.Uses.size(); i < e; ++i) {
-    unsigned Reg = PhysRegOpers.Uses[i];
-    if (!hasRegAlias(Reg, LivePhysRegs, TRI))
-      increasePhysRegPressure(Reg);
+  for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) {
+    unsigned Reg = RegOpers.Defs[i];
+    if (!containsReg(RegOpers.Uses, Reg))
+      decreaseRegPressure(Reg);
   }
-  for (unsigned i = 0, e = VirtRegOpers.Uses.size(); i < e; ++i) {
-    unsigned Reg = VirtRegOpers.Uses[i];
-    if (!LiveVirtRegs.count(Reg))
-      increaseVirtRegPressure(Reg);
+  // Generate liveness for uses.
+  for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) {
+    unsigned Reg = RegOpers.Uses[i];
+    if (!LiveRegs.contains(Reg))
+      increaseRegPressure(Reg);
   }
 }
 
@@ -735,6 +680,8 @@ static bool findUseBetween(unsigned Reg,
          UI = MRI->use_nodbg_begin(Reg), UE = MRI->use_nodbg_end();
          UI != UE; UI.skipInstruction()) {
       const MachineInstr* MI = &*UI;
+      if (MI->isDebugValue())
+        continue;
       SlotIndex InstSlot = LIS->getInstructionIndex(MI).getRegSlot();
       if (InstSlot >= PriorUseIdx && InstSlot < NextUseIdx)
         return true;
@@ -749,38 +696,42 @@ static bool findUseBetween(unsigned Reg,
 /// This is intended for speculative queries. It leaves pressure inconsistent
 /// with the current position, so must be restored by the caller.
 void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) {
+  assert(!MI->isDebugValue() && "Expect a nondebug instruction.");
+
   // Account for register pressure similar to RegPressureTracker::recede().
-  PhysRegOperands PhysRegOpers;
-  VirtRegOperands VirtRegOpers;
-  collectOperands(MI, PhysRegOpers, VirtRegOpers, TRI, RCI);
+  RegisterOperands RegOpers(TRI, MRI);
+  collectOperands(MI, RegOpers);
 
   // Kill liveness at last uses. Assume allocatable physregs are single-use
   // rather than checking LiveIntervals.
-  decreasePhysRegPressure(PhysRegOpers.Uses);
-  if (RequireIntervals) {
-    SlotIndex SlotIdx = LIS->getInstructionIndex(MI).getRegSlot();
-    for (unsigned i = 0, e = VirtRegOpers.Uses.size(); i < e; ++i) {
-      unsigned Reg = VirtRegOpers.Uses[i];
-      const LiveInterval *LI = &LIS->getInterval(Reg);
-      // FIXME: allow the caller to pass in the list of vreg uses that remain to
-      // be bottom-scheduled to avoid searching uses at each query.
-      SlotIndex CurrIdx = LIS->getInstructionIndex(CurrPos).getRegSlot();
-      if (LI->killedAt(SlotIdx)
+  SlotIndex SlotIdx;
+  if (RequireIntervals)
+    SlotIdx = LIS->getInstructionIndex(MI).getRegSlot();
+
+  for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) {
+    unsigned Reg = RegOpers.Uses[i];
+    if (RequireIntervals) {
+      // FIXME: allow the caller to pass in the list of vreg uses that remain
+      // to be bottom-scheduled to avoid searching uses at each query.
+      SlotIndex CurrIdx = getCurrSlot();
+      const LiveInterval *LI = getInterval(Reg);
+      if (LI && LI->killedAt(SlotIdx)
           && !findUseBetween(Reg, CurrIdx, SlotIdx, MRI, LIS)) {
-        decreaseVirtRegPressure(Reg);
+        decreaseRegPressure(Reg);
       }
     }
+    else if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
+      // Allocatable physregs are always single-use before register rewriting.
+      decreaseRegPressure(Reg);
+    }
   }
 
   // Generate liveness for defs.
-  increasePhysRegPressure(PhysRegOpers.Defs);
-  increaseVirtRegPressure(VirtRegOpers.Defs);
+  increaseRegPressure(RegOpers.Defs);
 
   // Boost pressure for all dead defs together.
-  increasePhysRegPressure(PhysRegOpers.DeadDefs);
-  increaseVirtRegPressure(VirtRegOpers.DeadDefs);
-  decreasePhysRegPressure(PhysRegOpers.DeadDefs);
-  decreaseVirtRegPressure(VirtRegOpers.DeadDefs);
+  increaseRegPressure(RegOpers.DeadDefs);
+  decreaseRegPressure(RegOpers.DeadDefs);
 }
 
 /// Consider the pressure increase caused by traversing this instruction
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index d673794..88f67da 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -16,21 +16,17 @@
 
 #define DEBUG_TYPE "reg-scavenging"
 #include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 /// setUsed - Set the register and its sub-registers as being used.
@@ -43,7 +39,7 @@ void RegScavenger::setUsed(unsigned Reg) {
 
 bool RegScavenger::isAliasUsed(unsigned Reg) const {
   for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
-    if (isUsed(*AI))
+    if (isUsed(*AI, *AI == Reg))
       return true;
   return false;
 }
@@ -92,9 +88,6 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
     KillRegs.resize(NumPhysRegs);
     DefRegs.resize(NumPhysRegs);
 
-    // Create reserved registers bitvector.
-    ReservedRegs = TRI->getReservedRegs(MF);
-
     // Create callee-saved registers bitvector.
     CalleeSavedRegs.resize(NumPhysRegs);
     const uint16_t *CSRegs = TRI->getCalleeSavedRegs(&MF);
@@ -225,9 +218,9 @@ void RegScavenger::getRegsUsed(BitVector &used, bool includeReserved) {
   used = RegsAvailable;
   used.flip();
   if (includeReserved)
-    used |= ReservedRegs;
+    used |= MRI->getReservedRegs();
   else
-    used.reset(ReservedRegs);
+    used.reset(MRI->getReservedRegs());
 }
 
 unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const {
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index af8cd8f..e639c55 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -16,12 +16,12 @@
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <climits>
 using namespace llvm;
 
@@ -62,10 +62,14 @@ const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const {
 /// addPred - This adds the specified edge as a pred of the current node if
 /// not already.  It also adds the current node as a successor of the
 /// specified node.
-bool SUnit::addPred(const SDep &D) {
+bool SUnit::addPred(const SDep &D, bool Required) {
   // If this node already has this depenence, don't add a redundant one.
   for (SmallVector<SDep, 4>::iterator I = Preds.begin(), E = Preds.end();
        I != E; ++I) {
+    // Zero-latency weak edges may be added purely for heuristic ordering. Don't
+    // add them if another kind of edge already exists.
+    if (!Required && I->getSUnit() == D.getSUnit())
+      return false;
     if (I->overlaps(D)) {
       // Extend the latency if needed. Equivalent to removePred(I) + addPred(D).
       if (I->getLatency() < D.getLatency()) {
@@ -97,12 +101,22 @@ bool SUnit::addPred(const SDep &D) {
     ++N->NumSuccs;
   }
   if (!N->isScheduled) {
-    assert(NumPredsLeft < UINT_MAX && "NumPredsLeft will overflow!");
-    ++NumPredsLeft;
+    if (D.isWeak()) {
+      ++WeakPredsLeft;
+    }
+    else {
+      assert(NumPredsLeft < UINT_MAX && "NumPredsLeft will overflow!");
+      ++NumPredsLeft;
+    }
   }
   if (!isScheduled) {
-    assert(N->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
-    ++N->NumSuccsLeft;
+    if (D.isWeak()) {
+      ++N->WeakSuccsLeft;
+    }
+    else {
+      assert(N->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
+      ++N->NumSuccsLeft;
+    }
   }
   Preds.push_back(D);
   N->Succs.push_back(P);
@@ -144,12 +158,20 @@ void SUnit::removePred(const SDep &D) {
         --N->NumSuccs;
       }
       if (!N->isScheduled) {
-        assert(NumPredsLeft > 0 && "NumPredsLeft will underflow!");
-        --NumPredsLeft;
+        if (D.isWeak())
+          --WeakPredsLeft;
+        else {
+          assert(NumPredsLeft > 0 && "NumPredsLeft will underflow!");
+          --NumPredsLeft;
+        }
       }
       if (!isScheduled) {
-        assert(N->NumSuccsLeft > 0 && "NumSuccsLeft will underflow!");
-        --N->NumSuccsLeft;
+        if (D.isWeak())
+          --N->WeakSuccsLeft;
+        else {
+          assert(N->NumSuccsLeft > 0 && "NumSuccsLeft will underflow!");
+          --N->NumSuccsLeft;
+        }
       }
       if (P.getLatency() != 0) {
         this->setDepthDirty();
@@ -279,7 +301,7 @@ void SUnit::ComputeHeight() {
   } while (!WorkList.empty());
 }
 
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 /// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or
 /// a group of nodes flagged together.
 void SUnit::dump(const ScheduleDAG *G) const {
@@ -292,6 +314,10 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
 
   dbgs() << "  # preds left       : " << NumPredsLeft << "\n";
   dbgs() << "  # succs left       : " << NumSuccsLeft << "\n";
+  if (WeakPredsLeft)
+    dbgs() << "  # weak preds left  : " << WeakPredsLeft << "\n";
+  if (WeakSuccsLeft)
+    dbgs() << "  # weak succs left  : " << WeakSuccsLeft << "\n";
   dbgs() << "  # rdefs left       : " << NumRegDefsLeft << "\n";
   dbgs() << "  Latency            : " << Latency << "\n";
   dbgs() << "  Depth              : " << Depth << "\n";
@@ -429,6 +455,8 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
   Node2Index.resize(DAGSize);
 
   // Initialize the data structures.
+  if (ExitSU)
+    WorkList.push_back(ExitSU);
   for (unsigned i = 0, e = DAGSize; i != e; ++i) {
     SUnit *SU = &SUnits[i];
     int NodeNum = SU->NodeNum;
@@ -448,11 +476,12 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
   while (!WorkList.empty()) {
     SUnit *SU = WorkList.back();
     WorkList.pop_back();
-    Allocate(SU->NodeNum, --Id);
+    if (SU->NodeNum < DAGSize)
+      Allocate(SU->NodeNum, --Id);
     for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
          I != E; ++I) {
       SUnit *SU = I->getSUnit();
-      if (!--Node2Index[SU->NodeNum])
+      if (SU->NodeNum < DAGSize && !--Node2Index[SU->NodeNum])
         // If all dependencies of the node are processed already,
         // then the node can be computed now.
         WorkList.push_back(SU);
@@ -513,7 +542,10 @@ void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound,
     WorkList.pop_back();
     Visited.set(SU->NodeNum);
     for (int I = SU->Succs.size()-1; I >= 0; --I) {
-      int s = SU->Succs[I].getSUnit()->NodeNum;
+      unsigned s = SU->Succs[I].getSUnit()->NodeNum;
+      // Edges to non-SUnits are allowed but ignored (e.g. ExitSU).
+      if (s >= Node2Index.size())
+        continue;
       if (Node2Index[s] == UpperBound) {
         HasLoop = true;
         return;
@@ -554,15 +586,16 @@ void ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound,
 }
 
 
-/// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
-/// create a cycle.
-bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
-  if (IsReachable(TargetSU, SU))
+/// WillCreateCycle - Returns true if adding an edge to TargetSU from SU will
+/// create a cycle. If so, it is not safe to call AddPred(TargetSU, SU).
+bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *TargetSU, SUnit *SU) {
+  // Is SU reachable from TargetSU via successor edges?
+  if (IsReachable(SU, TargetSU))
     return true;
-  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
-       I != E; ++I)
+  for (SUnit::pred_iterator
+         I = TargetSU->Preds.begin(), E = TargetSU->Preds.end(); I != E; ++I)
     if (I->isAssignedRegDep() &&
-        IsReachable(TargetSU, I->getSUnit()))
+        IsReachable(SU, I->getSUnit()))
       return true;
   return false;
 }
@@ -592,6 +625,7 @@ void ScheduleDAGTopologicalSort::Allocate(int n, int index) {
 }
 
 ScheduleDAGTopologicalSort::
-ScheduleDAGTopologicalSort(std::vector<SUnit> &sunits) : SUnits(sunits) {}
+ScheduleDAGTopologicalSort(std::vector<SUnit> &sunits, SUnit *exitsu)
+  : SUnits(sunits), ExitSU(exitsu) {}
 
 ScheduleHazardRecognizer::~ScheduleHazardRecognizer() {}
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 2d8f235..662fc0e 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -12,8 +12,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "sched-instrs"
-#include "llvm/Operator.h"
+#define DEBUG_TYPE "misched"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
@@ -22,17 +25,17 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/RegisterPressure.h"
-#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/ScheduleDFS.h"
+#include "llvm/IR/Operator.h"
 #include "llvm/MC/MCInstrItineraries.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 using namespace llvm;
 
 static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
@@ -44,14 +47,15 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
                                      const MachineDominatorTree &mdt,
                                      bool IsPostRAFlag,
                                      LiveIntervals *lis)
-  : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()),
-    InstrItins(mf.getTarget().getInstrItineraryData()), LIS(lis),
-    IsPostRA(IsPostRAFlag), UnitLatencies(false), CanHandleTerminators(false),
-    LoopRegs(MDT), FirstDbgValue(0) {
+  : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), LIS(lis),
+    IsPostRA(IsPostRAFlag), CanHandleTerminators(false), FirstDbgValue(0) {
   assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals");
   DbgValues.clear();
   assert(!(IsPostRA && MRI.getNumVirtRegs()) &&
          "Virtual registers must be removed prior to PostRA scheduling");
+
+  const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+  SchedModel.init(*ST.getSchedModel(), &ST, TII);
 }
 
 /// getUnderlyingObjectFromInt - This is the function that does the work of
@@ -63,15 +67,16 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) {
       // regular getUnderlyingObjectFromInt.
       if (U->getOpcode() == Instruction::PtrToInt)
         return U->getOperand(0);
-      // If we find an add of a constant or a multiplied value, it's
+      // If we find an add of a constant, a multiplied value, or a phi, it's
       // likely that the other operand will lead us to the base
       // object. We don't have to worry about the case where the
       // object address is somehow being computed by the multiply,
       // because our callers only care when the result is an
-      // identifibale object.
+      // identifiable object.
       if (U->getOpcode() != Instruction::Add ||
           (!isa<ConstantInt>(U->getOperand(1)) &&
-           Operator::getOpcode(U->getOperand(1)) != Instruction::Mul))
+           Operator::getOpcode(U->getOperand(1)) != Instruction::Mul &&
+           !isa<PHINode>(U->getOperand(1))))
         return V;
       V = U->getOperand(0);
     } else {
@@ -81,64 +86,81 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) {
   } while (1);
 }
 
-/// getUnderlyingObject - This is a wrapper around GetUnderlyingObject
+/// getUnderlyingObjects - This is a wrapper around GetUnderlyingObjects
 /// and adds support for basic ptrtoint+arithmetic+inttoptr sequences.
-static const Value *getUnderlyingObject(const Value *V) {
-  // First just call Value::getUnderlyingObject to let it do what it does.
+static void getUnderlyingObjects(const Value *V,
+                                 SmallVectorImpl<Value *> &Objects) {
+  SmallPtrSet<const Value*, 16> Visited;
+  SmallVector<const Value *, 4> Working(1, V);
   do {
-    V = GetUnderlyingObject(V);
-    // If it found an inttoptr, use special code to continue climing.
-    if (Operator::getOpcode(V) != Instruction::IntToPtr)
-      break;
-    const Value *O = getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0));
-    // If that succeeded in finding a pointer, continue the search.
-    if (!O->getType()->isPointerTy())
-      break;
-    V = O;
-  } while (1);
-  return V;
+    V = Working.pop_back_val();
+
+    SmallVector<Value *, 4> Objs;
+    GetUnderlyingObjects(const_cast<Value *>(V), Objs);
+
+    for (SmallVector<Value *, 4>::iterator I = Objs.begin(), IE = Objs.end();
+         I != IE; ++I) {
+      V = *I;
+      if (!Visited.insert(V))
+        continue;
+      if (Operator::getOpcode(V) == Instruction::IntToPtr) {
+        const Value *O =
+          getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0));
+        if (O->getType()->isPointerTy()) {
+          Working.push_back(O);
+          continue;
+        }
+      }
+      Objects.push_back(const_cast<Value *>(V));
+    }
+  } while (!Working.empty());
 }
 
-/// getUnderlyingObjectForInstr - If this machine instr has memory reference
+/// getUnderlyingObjectsForInstr - If this machine instr has memory reference
 /// information and it can be tracked to a normal reference to a known
-/// object, return the Value for that object. Otherwise return null.
-static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI,
-                                                const MachineFrameInfo *MFI,
-                                                bool &MayAlias) {
-  MayAlias = true;
+/// object, return the Value for that object.
+static void getUnderlyingObjectsForInstr(const MachineInstr *MI,
+              const MachineFrameInfo *MFI,
+              SmallVectorImpl<std::pair<const Value *, bool> > &Objects) {
   if (!MI->hasOneMemOperand() ||
       !(*MI->memoperands_begin())->getValue() ||
       (*MI->memoperands_begin())->isVolatile())
-    return 0;
+    return;
 
   const Value *V = (*MI->memoperands_begin())->getValue();
   if (!V)
-    return 0;
-
-  V = getUnderlyingObject(V);
-  if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
-    // For now, ignore PseudoSourceValues which may alias LLVM IR values
-    // because the code that uses this function has no way to cope with
-    // such aliases.
-    if (PSV->isAliased(MFI))
-      return 0;
-
-    MayAlias = PSV->mayAlias(MFI);
-    return V;
-  }
+    return;
+
+  SmallVector<Value *, 4> Objs;
+  getUnderlyingObjects(V, Objs);
+
+  for (SmallVector<Value *, 4>::iterator I = Objs.begin(), IE = Objs.end();
+       I != IE; ++I) {
+    bool MayAlias = true;
+    V = *I;
+
+    if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
+      // For now, ignore PseudoSourceValues which may alias LLVM IR values
+      // because the code that uses this function has no way to cope with
+      // such aliases.
 
-  if (isIdentifiedObject(V))
-    return V;
+      if (PSV->isAliased(MFI)) {
+        Objects.clear();
+        return;
+      }
+
+      MayAlias = PSV->mayAlias(MFI);
+    } else if (!isIdentifiedObject(V)) {
+      Objects.clear();
+      return;
+    }
 
-  return 0;
+    Objects.push_back(std::make_pair(V, MayAlias));
+  }
 }
 
 void ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) {
   BB = bb;
-  LoopRegs.Deps.clear();
-  if (MachineLoop *ML = MLI.getLoopFor(BB))
-    if (BB == ML->getLoopLatch())
-      LoopRegs.VisitLoop(ML);
 }
 
 void ScheduleDAGInstrs::finishBlock() {
@@ -174,9 +196,6 @@ void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb,
   EndIndex = endcount;
   MISUnitMap.clear();
 
-  // Check to see if the scheduler cares about latencies.
-  UnitLatencies = forceUnitLatencies();
-
   ScheduleDAG::clearDAG();
 }
 
@@ -212,7 +231,8 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() {
         Uses[Reg].push_back(PhysRegSUOper(&ExitSU, -1));
       else {
         assert(!IsPostRA && "Virtual register encountered after regalloc.");
-        addVRegUseDeps(&ExitSU, i);
+        if (MO.readsReg()) // ignore undef operands
+          addVRegUseDeps(&ExitSU, i);
       }
     }
   } else {
@@ -238,8 +258,6 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
 
   // Ask the target if address-backscheduling is desirable, and if so how much.
   const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
-  unsigned SpecialAddressLatency = ST.getSpecialAddressLatency();
-  unsigned DataLatency = SU->Latency;
 
   for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
        Alias.isValid(); ++Alias) {
@@ -250,43 +268,27 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
       SUnit *UseSU = UseList[i].SU;
       if (UseSU == SU)
         continue;
-      MachineInstr *UseMI = UseSU->getInstr();
+
+      // Adjust the dependence latency using operand def/use information,
+      // then allow the target to perform its own adjustments.
       int UseOp = UseList[i].OpIdx;
-      unsigned LDataLatency = DataLatency;
-      // Optionally add in a special extra latency for nodes that
-      // feed addresses.
-      // TODO: Perhaps we should get rid of
-      // SpecialAddressLatency and just move this into
-      // adjustSchedDependency for the targets that care about it.
-      if (SpecialAddressLatency != 0 && !UnitLatencies &&
-          UseSU != &ExitSU) {
-        const MCInstrDesc &UseMCID = UseMI->getDesc();
-        int RegUseIndex = UseMI->findRegisterUseOperandIdx(*Alias);
-        assert(RegUseIndex >= 0 && "UseMI doesn't use register!");
-        if (RegUseIndex >= 0 &&
-            (UseMI->mayLoad() || UseMI->mayStore()) &&
-            (unsigned)RegUseIndex < UseMCID.getNumOperands() &&
-            UseMCID.OpInfo[RegUseIndex].isLookupPtrRegClass())
-          LDataLatency += SpecialAddressLatency;
-      }
-      // Adjust the dependence latency using operand def/use
-      // information (if any), and then allow the target to
-      // perform its own adjustments.
-      SDep dep(SU, SDep::Data, LDataLatency, *Alias);
-      if (!UnitLatencies) {
-        unsigned Latency =
-          TII->computeOperandLatency(InstrItins, SU->getInstr(), OperIdx,
-                                     (UseOp < 0 ? 0 : UseMI), UseOp);
-        dep.setLatency(Latency);
-        unsigned MinLatency =
-          TII->computeOperandLatency(InstrItins, SU->getInstr(), OperIdx,
-                                     (UseOp < 0 ? 0 : UseMI), UseOp,
-                                     /*FindMin=*/true);
-        dep.setMinLatency(MinLatency);
-
-        ST.adjustSchedDependency(SU, UseSU, dep);
+      MachineInstr *RegUse = 0;
+      SDep Dep;
+      if (UseOp < 0)
+        Dep = SDep(SU, SDep::Artificial);
+      else {
+        Dep = SDep(SU, SDep::Data, *Alias);
+        RegUse = UseSU->getInstr();
+        Dep.setMinLatency(
+          SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
+                                           RegUse, UseOp, /*FindMin=*/true));
       }
-      UseSU->addPred(dep);
+      Dep.setLatency(
+        SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
+                                         RegUse, UseOp, /*FindMin=*/false));
+
+      ST.adjustSchedDependency(SU, UseSU, Dep);
+      UseSU->addPred(Dep);
     }
   }
 }
@@ -318,11 +320,14 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
           (Kind != SDep::Output || !MO.isDead() ||
            !DefSU->getInstr()->registerDefIsDead(*Alias))) {
         if (Kind == SDep::Anti)
-          DefSU->addPred(SDep(SU, Kind, 0, /*Reg=*/*Alias));
+          DefSU->addPred(SDep(SU, Kind, /*Reg=*/*Alias));
         else {
-          unsigned AOLat = TII->getOutputLatency(InstrItins, MI, OperIdx,
-                                                 DefSU->getInstr());
-          DefSU->addPred(SDep(SU, Kind, AOLat, /*Reg=*/*Alias));
+          SDep Dep(SU, Kind, /*Reg=*/*Alias);
+          unsigned OutLatency =
+            SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr());
+          Dep.setMinLatency(OutLatency);
+          Dep.setLatency(OutLatency);
+          DefSU->addPred(Dep);
         }
       }
     }
@@ -341,53 +346,6 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
     // retrieve the existing SUnits list for this register's defs.
     std::vector<PhysRegSUOper> &DefList = Defs[MO.getReg()];
 
-    // If a def is going to wrap back around to the top of the loop,
-    // backschedule it.
-    if (!UnitLatencies && DefList.empty()) {
-      LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(MO.getReg());
-      if (I != LoopRegs.Deps.end()) {
-        const MachineOperand *UseMO = I->second.first;
-        unsigned Count = I->second.second;
-        const MachineInstr *UseMI = UseMO->getParent();
-        unsigned UseMOIdx = UseMO - &UseMI->getOperand(0);
-        const MCInstrDesc &UseMCID = UseMI->getDesc();
-        const TargetSubtargetInfo &ST =
-          TM.getSubtarget<TargetSubtargetInfo>();
-        unsigned SpecialAddressLatency = ST.getSpecialAddressLatency();
-        // TODO: If we knew the total depth of the region here, we could
-        // handle the case where the whole loop is inside the region but
-        // is large enough that the isScheduleHigh trick isn't needed.
-        if (UseMOIdx < UseMCID.getNumOperands()) {
-          // Currently, we only support scheduling regions consisting of
-          // single basic blocks. Check to see if the instruction is in
-          // the same region by checking to see if it has the same parent.
-          if (UseMI->getParent() != MI->getParent()) {
-            unsigned Latency = SU->Latency;
-            if (UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass())
-              Latency += SpecialAddressLatency;
-            // This is a wild guess as to the portion of the latency which
-            // will be overlapped by work done outside the current
-            // scheduling region.
-            Latency -= std::min(Latency, Count);
-            // Add the artificial edge.
-            ExitSU.addPred(SDep(SU, SDep::Order, Latency,
-                                /*Reg=*/0, /*isNormalMemory=*/false,
-                                /*isMustAlias=*/false,
-                                /*isArtificial=*/true));
-          } else if (SpecialAddressLatency > 0 &&
-                     UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) {
-            // The entire loop body is within the current scheduling region
-            // and the latency of this operation is assumed to be greater
-            // than the latency of the loop.
-            // TODO: Recursively mark data-edge predecessors as
-            //       isScheduleHigh too.
-            SU->isScheduleHigh = true;
-          }
-        }
-        LoopRegs.Deps.erase(I);
-      }
-    }
-
     // clear this register's use list
     if (Uses.contains(MO.getReg()))
       Uses[MO.getReg()].clear();
@@ -438,9 +396,12 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
   else {
     SUnit *DefSU = DefI->SU;
     if (DefSU != SU && DefSU != &ExitSU) {
-      unsigned OutLatency = TII->getOutputLatency(InstrItins, MI, OperIdx,
-                                                  DefSU->getInstr());
-      DefSU->addPred(SDep(SU, SDep::Output, OutLatency, Reg));
+      SDep Dep(SU, SDep::Output, Reg);
+      unsigned OutLatency =
+        SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr());
+      Dep.setMinLatency(OutLatency);
+      Dep.setLatency(OutLatency);
+      DefSU->addPred(Dep);
     }
     DefI->SU = SU;
   }
@@ -470,24 +431,17 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
     if (DefSU) {
       // The reaching Def lives within this scheduling region.
       // Create a data dependence.
-      //
-      // TODO: Handle "special" address latencies cleanly.
-      SDep dep(DefSU, SDep::Data, DefSU->Latency, Reg);
-      if (!UnitLatencies) {
-        // Adjust the dependence latency using operand def/use information, then
-        // allow the target to perform its own adjustments.
-        int DefOp = Def->findRegisterDefOperandIdx(Reg);
-        unsigned Latency =
-          TII->computeOperandLatency(InstrItins, Def, DefOp, MI, OperIdx);
-        dep.setLatency(Latency);
-        unsigned MinLatency =
-          TII->computeOperandLatency(InstrItins, Def, DefOp, MI, OperIdx,
-                                     /*FindMin=*/true);
-        dep.setMinLatency(MinLatency);
-
-        const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
-        ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
-      }
+      SDep dep(DefSU, SDep::Data, Reg);
+      // Adjust the dependence latency using operand def/use information, then
+      // allow the target to perform its own adjustments.
+      int DefOp = Def->findRegisterDefOperandIdx(Reg);
+      dep.setLatency(
+        SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx, false));
+      dep.setMinLatency(
+        SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx, true));
+
+      const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+      ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
       SU->addPred(dep);
     }
   }
@@ -495,7 +449,7 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
   // Add antidependence to the following def of the vreg it uses.
   VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg);
   if (DefI != VRegDefs.end() && DefI->SU != SU)
-    DefI->SU->addPred(SDep(SU, SDep::Anti, 0, Reg));
+    DefI->SU->addPred(SDep(SU, SDep::Anti, Reg));
 }
 
 /// Return true if MI is an instruction we are unable to reason about
@@ -520,23 +474,29 @@ static inline bool isUnsafeMemoryObject(MachineInstr *MI,
   if ((*MI->memoperands_begin())->isVolatile() ||
        MI->hasUnmodeledSideEffects())
     return true;
-
   const Value *V = (*MI->memoperands_begin())->getValue();
   if (!V)
     return true;
 
-  V = getUnderlyingObject(V);
-  if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
-    // Similarly to getUnderlyingObjectForInstr:
-    // For now, ignore PseudoSourceValues which may alias LLVM IR values
-    // because the code that uses this function has no way to cope with
-    // such aliases.
-    if (PSV->isAliased(MFI))
+  SmallVector<Value *, 4> Objs;
+  getUnderlyingObjects(V, Objs);
+  for (SmallVector<Value *, 4>::iterator I = Objs.begin(),
+       IE = Objs.end(); I != IE; ++I) {
+    V = *I;
+
+    if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
+      // Similarly to getUnderlyingObjectForInstr:
+      // For now, ignore PseudoSourceValues which may alias LLVM IR values
+      // because the code that uses this function has no way to cope with
+      // such aliases.
+      if (PSV->isAliased(MFI))
+        return true;
+    }
+
+    // Does this pointer refer to a distinct and identifiable object?
+    if (!isIdentifiedObject(V))
       return true;
   }
-  // Does this pointer refer to a distinct and identifiable object?
-  if (!isIdentifiedObject(V))
-    return true;
 
   return false;
 }
@@ -635,8 +595,7 @@ iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI,
   // and stop descending.
   if (*Depth > 200 ||
       MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) {
-    SUb->addPred(SDep(SUa, SDep::Order, /*Latency=*/0, /*Reg=*/0,
-                      /*isNormalMemory=*/true));
+    SUb->addPred(SDep(SUa, SDep::MayAliasMem));
     return *Depth;
   }
   // Track current depth.
@@ -667,9 +626,9 @@ static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI,
     if (SU == *I)
       continue;
     if (MIsNeedChainEdge(AA, MFI, SU->getInstr(), (*I)->getInstr())) {
-      unsigned Latency = ((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0;
-      (*I)->addPred(SDep(SU, SDep::Order, Latency, /*Reg=*/0,
-                         /*isNormalMemory=*/true));
+      SDep Dep(SU, SDep::MayAliasMem);
+      Dep.setLatency(((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0);
+      (*I)->addPred(Dep);
     }
     // Now go through all the chain successors and iterate from them.
     // Keep track of visited nodes.
@@ -692,9 +651,11 @@ void addChainDependency (AliasAnalysis *AA, const MachineFrameInfo *MFI,
   // If this is a false dependency,
   // do not add the edge, but rememeber the rejected node.
   if (!EnableAASchedMI ||
-      MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr()))
-    SUb->addPred(SDep(SUa, SDep::Order, TrueMemOrderLatency, /*Reg=*/0,
-                      isNormalMemory));
+      MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) {
+    SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier);
+    Dep.setLatency(TrueMemOrderLatency);
+    SUb->addPred(Dep);
+  }
   else {
     // Duplicate entries should be ignored.
     RejectList.insert(SUb);
@@ -732,10 +693,7 @@ void ScheduleDAGInstrs::initSUnits() {
     SU->isCommutable = MI->isCommutable();
 
     // Assign the Latency field of SU using target-provided information.
-    if (UnitLatencies)
-      SU->Latency = 1;
-    else
-      computeLatency(SU);
+    SU->Latency = SchedModel.computeInstrLatency(SU->getInstr());
   }
 }
 
@@ -757,8 +715,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
   // so that they can be given more precise dependencies. We track
   // separately the known memory locations that may alias and those
   // that are known not to alias
-  std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs;
-  std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
+  MapVector<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs;
+  MapVector<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
   std::set<SUnit*> RejectMemNodes;
 
   // Remove any stale debug info; sometimes BuildSchedGraph is called again
@@ -782,17 +740,17 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
   addSchedBarrierDeps();
 
   // Walk the list of instructions, from bottom moving up.
-  MachineInstr *PrevMI = NULL;
+  MachineInstr *DbgMI = NULL;
   for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin;
        MII != MIE; --MII) {
     MachineInstr *MI = prior(MII);
-    if (MI && PrevMI) {
-      DbgValues.push_back(std::make_pair(PrevMI, MI));
-      PrevMI = NULL;
+    if (MI && DbgMI) {
+      DbgValues.push_back(std::make_pair(DbgMI, MI));
+      DbgMI = NULL;
     }
 
     if (MI->isDebugValue()) {
-      PrevMI = MI;
+      DbgMI = MI;
       continue;
     }
     if (RPTracker) {
@@ -807,6 +765,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
     assert(SU && "No SUnit mapped to this MI");
 
     // Add register-based dependencies (data, anti, and output).
+    bool HasVRegDef = false;
     for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) {
       const MachineOperand &MO = MI->getOperand(j);
       if (!MO.isReg()) continue;
@@ -817,12 +776,26 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
         addPhysRegDeps(SU, j);
       else {
         assert(!IsPostRA && "Virtual register encountered!");
-        if (MO.isDef())
+        if (MO.isDef()) {
+          HasVRegDef = true;
           addVRegDefDeps(SU, j);
+        }
         else if (MO.readsReg()) // ignore undef operands
           addVRegUseDeps(SU, j);
       }
     }
+    // If we haven't seen any uses in this scheduling region, create a
+    // dependence edge to ExitSU to model the live-out latency. This is required
+    // for vreg defs with no in-region use, and prefetches with no vreg def.
+    //
+    // FIXME: NumDataSuccs would be more precise than NumSuccs here. This
+    // check currently relies on being called before adding chain deps.
+    if (SU->NumSuccs == 0 && SU->Latency > 1
+        && (HasVRegDef || MI->mayLoad())) {
+      SDep Dep(SU, SDep::Artificial);
+      Dep.setLatency(SU->Latency - 1);
+      ExitSU.addPred(Dep);
+    }
 
     // Add chain dependencies.
     // Chain dependencies used to enforce memory order should have
@@ -837,18 +810,21 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
     if (isGlobalMemoryObject(AA, MI)) {
       // Be conservative with these and add dependencies on all memory
       // references, even those that are known to not alias.
-      for (std::map<const Value *, SUnit *>::iterator I =
+      for (MapVector<const Value *, SUnit *>::iterator I =
              NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) {
-        I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+        I->second->addPred(SDep(SU, SDep::Barrier));
       }
-      for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
+      for (MapVector<const Value *, std::vector<SUnit *> >::iterator I =
              NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) {
-        for (unsigned i = 0, e = I->second.size(); i != e; ++i)
-          I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
+        for (unsigned i = 0, e = I->second.size(); i != e; ++i) {
+          SDep Dep(SU, SDep::Barrier);
+          Dep.setLatency(TrueMemOrderLatency);
+          I->second[i]->addPred(Dep);
+        }
       }
       // Add SU to the barrier chain.
       if (BarrierChain)
-        BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+        BarrierChain->addPred(SDep(SU, SDep::Barrier));
       BarrierChain = SU;
       // This is a barrier event that acts as a pivotal node in the DAG,
       // so it is safe to clear list of exposed nodes.
@@ -872,10 +848,10 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
       for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
         addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes,
                            TrueMemOrderLatency);
-      for (std::map<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(),
+      for (MapVector<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(),
            E = AliasMemDefs.end(); I != E; ++I)
         addChainDependency(AA, MFI, SU, I->second, RejectMemNodes);
-      for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
+      for (MapVector<const Value *, std::vector<SUnit *> >::iterator I =
            AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) {
         for (unsigned i = 0, e = I->second.size(); i != e; ++i)
           addChainDependency(AA, MFI, SU, I->second[i], RejectMemNodes,
@@ -887,95 +863,115 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
       AliasMemDefs.clear();
       AliasMemUses.clear();
     } else if (MI->mayStore()) {
-      bool MayAlias = true;
-      if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
+      SmallVector<std::pair<const Value *, bool>, 4> Objs;
+      getUnderlyingObjectsForInstr(MI, MFI, Objs);
+
+      if (Objs.empty()) {
+        // Treat all other stores conservatively.
+        goto new_alias_chain;
+      }
+
+      bool MayAlias = false;
+      for (SmallVector<std::pair<const Value *, bool>, 4>::iterator
+           K = Objs.begin(), KE = Objs.end(); K != KE; ++K) {
+        const Value *V = K->first;
+        bool ThisMayAlias = K->second;
+        if (ThisMayAlias)
+          MayAlias = true;
+
         // A store to a specific PseudoSourceValue. Add precise dependencies.
         // Record the def in MemDefs, first adding a dep if there is
         // an existing def.
-        std::map<const Value *, SUnit *>::iterator I =
-          ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
-        std::map<const Value *, SUnit *>::iterator IE =
-          ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
+        MapVector<const Value *, SUnit *>::iterator I =
+          ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
+        MapVector<const Value *, SUnit *>::iterator IE =
+          ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
         if (I != IE) {
-          addChainDependency(AA, MFI, SU, I->second, RejectMemNodes,
-                             0, true);
+          addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true);
           I->second = SU;
         } else {
-          if (MayAlias)
+          if (ThisMayAlias)
             AliasMemDefs[V] = SU;
           else
             NonAliasMemDefs[V] = SU;
         }
         // Handle the uses in MemUses, if there are any.
-        std::map<const Value *, std::vector<SUnit *> >::iterator J =
-          ((MayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V));
-        std::map<const Value *, std::vector<SUnit *> >::iterator JE =
-          ((MayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
+        MapVector<const Value *, std::vector<SUnit *> >::iterator J =
+          ((ThisMayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V));
+        MapVector<const Value *, std::vector<SUnit *> >::iterator JE =
+          ((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
         if (J != JE) {
           for (unsigned i = 0, e = J->second.size(); i != e; ++i)
             addChainDependency(AA, MFI, SU, J->second[i], RejectMemNodes,
                                TrueMemOrderLatency, true);
           J->second.clear();
         }
-        if (MayAlias) {
-          // Add dependencies from all the PendingLoads, i.e. loads
-          // with no underlying object.
-          for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
-            addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes,
-                               TrueMemOrderLatency);
-          // Add dependence on alias chain, if needed.
-          if (AliasChain)
-            addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes);
-          // But we also should check dependent instructions for the
-          // SU in question.
-          adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
-                          TrueMemOrderLatency);
-        }
-        // Add dependence on barrier chain, if needed.
-        // There is no point to check aliasing on barrier event. Even if
-        // SU and barrier _could_ be reordered, they should not. In addition,
-        // we have lost all RejectMemNodes below barrier.
-        if (BarrierChain)
-          BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
-      } else {
-        // Treat all other stores conservatively.
-        goto new_alias_chain;
       }
+      if (MayAlias) {
+        // Add dependencies from all the PendingLoads, i.e. loads
+        // with no underlying object.
+        for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
+          addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes,
+                             TrueMemOrderLatency);
+        // Add dependence on alias chain, if needed.
+        if (AliasChain)
+          addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes);
+        // But we also should check dependent instructions for the
+        // SU in question.
+        adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
+                        TrueMemOrderLatency);
+      }
+      // Add dependence on barrier chain, if needed.
+      // There is no point to check aliasing on barrier event. Even if
+      // SU and barrier _could_ be reordered, they should not. In addition,
+      // we have lost all RejectMemNodes below barrier.
+      if (BarrierChain)
+        BarrierChain->addPred(SDep(SU, SDep::Barrier));
 
       if (!ExitSU.isPred(SU))
         // Push store's up a bit to avoid them getting in between cmp
         // and branches.
-        ExitSU.addPred(SDep(SU, SDep::Order, 0,
-                            /*Reg=*/0, /*isNormalMemory=*/false,
-                            /*isMustAlias=*/false,
-                            /*isArtificial=*/true));
+        ExitSU.addPred(SDep(SU, SDep::Artificial));
     } else if (MI->mayLoad()) {
       bool MayAlias = true;
       if (MI->isInvariantLoad(AA)) {
         // Invariant load, no chain dependencies needed!
       } else {
-        if (const Value *V =
-            getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
-          // A load from a specific PseudoSourceValue. Add precise dependencies.
-          std::map<const Value *, SUnit *>::iterator I =
-            ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
-          std::map<const Value *, SUnit *>::iterator IE =
-            ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
-          if (I != IE)
-            addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true);
-          if (MayAlias)
-            AliasMemUses[V].push_back(SU);
-          else
-            NonAliasMemUses[V].push_back(SU);
-        } else {
+        SmallVector<std::pair<const Value *, bool>, 4> Objs;
+        getUnderlyingObjectsForInstr(MI, MFI, Objs);
+
+        if (Objs.empty()) {
           // A load with no underlying object. Depend on all
           // potentially aliasing stores.
-          for (std::map<const Value *, SUnit *>::iterator I =
+          for (MapVector<const Value *, SUnit *>::iterator I =
                  AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
             addChainDependency(AA, MFI, SU, I->second, RejectMemNodes);
 
           PendingLoads.push_back(SU);
           MayAlias = true;
+        } else {
+          MayAlias = false;
+        }
+
+        for (SmallVector<std::pair<const Value *, bool>, 4>::iterator
+             J = Objs.begin(), JE = Objs.end(); J != JE; ++J) {
+          const Value *V = J->first;
+          bool ThisMayAlias = J->second;
+
+          if (ThisMayAlias)
+            MayAlias = true;
+
+          // A load from a specific PseudoSourceValue. Add precise dependencies.
+          MapVector<const Value *, SUnit *>::iterator I =
+            ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
+          MapVector<const Value *, SUnit *>::iterator IE =
+            ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
+          if (I != IE)
+            addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true);
+          if (ThisMayAlias)
+            AliasMemUses[V].push_back(SU);
+          else
+            NonAliasMemUses[V].push_back(SU);
         }
         if (MayAlias)
           adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, /*Latency=*/0);
@@ -983,12 +979,12 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
         if (MayAlias && AliasChain)
           addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes);
         if (BarrierChain)
-          BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+          BarrierChain->addPred(SDep(SU, SDep::Barrier));
       }
     }
   }
-  if (PrevMI)
-    FirstDbgValue = PrevMI;
+  if (DbgMI)
+    FirstDbgValue = DbgMI;
 
   Defs.clear();
   Uses.clear();
@@ -996,23 +992,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
   PendingLoads.clear();
 }
 
-void ScheduleDAGInstrs::computeLatency(SUnit *SU) {
-  // Compute the latency for the node. We only provide a default for missing
-  // itineraries. Empty itineraries still have latency properties.
-  if (!InstrItins) {
-    SU->Latency = 1;
-
-    // Simplistic target-independent heuristic: assume that loads take
-    // extra time.
-    if (SU->getInstr()->mayLoad())
-      SU->Latency += 2;
-  } else {
-    SU->Latency = TII->getInstrLatency(InstrItins, SU->getInstr());
-  }
-}
-
 void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   SU->getInstr()->dump();
 #endif
 }
@@ -1034,3 +1015,219 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
 std::string ScheduleDAGInstrs::getDAGName() const {
   return "dag." + BB->getFullName();
 }
+
+//===----------------------------------------------------------------------===//
+// SchedDFSResult Implementation
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+/// \brief Internal state used to compute SchedDFSResult.
+class SchedDFSImpl {
+  SchedDFSResult &R;
+
+  /// Join DAG nodes into equivalence classes by their subtree.
+  IntEqClasses SubtreeClasses;
+  /// List PredSU, SuccSU pairs that represent data edges between subtrees.
+  std::vector<std::pair<const SUnit*, const SUnit*> > ConnectionPairs;
+
+public:
+  SchedDFSImpl(SchedDFSResult &r): R(r), SubtreeClasses(R.DFSData.size()) {}
+
+  /// SubtreID is initialized to zero, set to itself to flag the root of a
+  /// subtree, set to the parent to indicate an interior node,
+  /// then set to a representative subtree ID during finalization.
+  bool isVisited(const SUnit *SU) const {
+    return R.DFSData[SU->NodeNum].SubtreeID;
+  }
+
+  /// Initialize this node's instruction count. We don't need to flag the node
+  /// visited until visitPostorder because the DAG cannot have cycles.
+  void visitPreorder(const SUnit *SU) {
+    R.DFSData[SU->NodeNum].InstrCount = SU->getInstr()->isTransient() ? 0 : 1;
+  }
+
+  /// Mark this node as either the root of a subtree or an interior
+  /// node. Increment the parent node's instruction count.
+  void visitPostorder(const SUnit *SU, const SDep *PredDep, const SUnit *Parent) {
+    R.DFSData[SU->NodeNum].SubtreeID = SU->NodeNum;
+
+    // Join the child to its parent if they are connected via data dependence
+    // and do not exceed the limit.
+    if (!Parent || PredDep->getKind() != SDep::Data)
+      return;
+
+    unsigned PredCnt = R.DFSData[SU->NodeNum].InstrCount;
+    if (PredCnt > R.SubtreeLimit)
+      return;
+
+    R.DFSData[SU->NodeNum].SubtreeID = Parent->NodeNum;
+
+    // Add the recently finished predecessor's bottom-up descendent count.
+    R.DFSData[Parent->NodeNum].InstrCount += PredCnt;
+    SubtreeClasses.join(Parent->NodeNum, SU->NodeNum);
+  }
+
+  /// Determine whether the DFS cross edge should be considered a subtree edge
+  /// or a connection between subtrees.
+  void visitCross(const SDep &PredDep, const SUnit *Succ) {
+    if (PredDep.getKind() == SDep::Data) {
+      // If this is a cross edge to a root, join the subtrees. This happens when
+      // the root was first reached by a non-data dependence.
+      unsigned NodeNum = PredDep.getSUnit()->NodeNum;
+      unsigned PredCnt = R.DFSData[NodeNum].InstrCount;
+      if (R.DFSData[NodeNum].SubtreeID == NodeNum && PredCnt < R.SubtreeLimit) {
+        R.DFSData[NodeNum].SubtreeID = Succ->NodeNum;
+        R.DFSData[Succ->NodeNum].InstrCount += PredCnt;
+        SubtreeClasses.join(Succ->NodeNum, NodeNum);
+        return;
+      }
+    }
+    ConnectionPairs.push_back(std::make_pair(PredDep.getSUnit(), Succ));
+  }
+
+  /// Set each node's subtree ID to the representative ID and record connections
+  /// between trees.
+  void finalize() {
+    SubtreeClasses.compress();
+    R.SubtreeConnections.resize(SubtreeClasses.getNumClasses());
+    R.SubtreeConnectLevels.resize(SubtreeClasses.getNumClasses());
+    DEBUG(dbgs() << R.getNumSubtrees() << " subtrees:\n");
+    for (unsigned Idx = 0, End = R.DFSData.size(); Idx != End; ++Idx) {
+      R.DFSData[Idx].SubtreeID = SubtreeClasses[Idx];
+      DEBUG(dbgs() << "  SU(" << Idx << ") in tree "
+            << R.DFSData[Idx].SubtreeID << '\n');
+    }
+    for (std::vector<std::pair<const SUnit*, const SUnit*> >::const_iterator
+           I = ConnectionPairs.begin(), E = ConnectionPairs.end();
+         I != E; ++I) {
+      unsigned PredTree = SubtreeClasses[I->first->NodeNum];
+      unsigned SuccTree = SubtreeClasses[I->second->NodeNum];
+      if (PredTree == SuccTree)
+        continue;
+      unsigned Depth = I->first->getDepth();
+      addConnection(PredTree, SuccTree, Depth);
+      addConnection(SuccTree, PredTree, Depth);
+    }
+  }
+
+protected:
+  /// Called by finalize() to record a connection between trees.
+  void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth) {
+    if (!Depth)
+      return;
+
+    SmallVectorImpl<SchedDFSResult::Connection> &Connections =
+      R.SubtreeConnections[FromTree];
+    for (SmallVectorImpl<SchedDFSResult::Connection>::iterator
+           I = Connections.begin(), E = Connections.end(); I != E; ++I) {
+      if (I->TreeID == ToTree) {
+        I->Level = std::max(I->Level, Depth);
+        return;
+      }
+    }
+    Connections.push_back(SchedDFSResult::Connection(ToTree, Depth));
+  }
+};
+} // namespace llvm
+
+namespace {
+/// \brief Manage the stack used by a reverse depth-first search over the DAG.
+class SchedDAGReverseDFS {
+  std::vector<std::pair<const SUnit*, SUnit::const_pred_iterator> > DFSStack;
+public:
+  bool isComplete() const { return DFSStack.empty(); }
+
+  void follow(const SUnit *SU) {
+    DFSStack.push_back(std::make_pair(SU, SU->Preds.begin()));
+  }
+  void advance() { ++DFSStack.back().second; }
+
+  const SDep *backtrack() {
+    DFSStack.pop_back();
+    return DFSStack.empty() ? 0 : llvm::prior(DFSStack.back().second);
+  }
+
+  const SUnit *getCurr() const { return DFSStack.back().first; }
+
+  SUnit::const_pred_iterator getPred() const { return DFSStack.back().second; }
+
+  SUnit::const_pred_iterator getPredEnd() const {
+    return getCurr()->Preds.end();
+  }
+};
+} // anonymous
+
+/// Compute an ILP metric for all nodes in the subDAG reachable via depth-first
+/// search from this root.
+void SchedDFSResult::compute(ArrayRef<SUnit *> Roots) {
+  if (!IsBottomUp)
+    llvm_unreachable("Top-down ILP metric is unimplemnted");
+
+  SchedDFSImpl Impl(*this);
+  for (ArrayRef<const SUnit*>::const_iterator
+         RootI = Roots.begin(), RootE = Roots.end(); RootI != RootE; ++RootI) {
+    SchedDAGReverseDFS DFS;
+    Impl.visitPreorder(*RootI);
+    DFS.follow(*RootI);
+    for (;;) {
+      // Traverse the leftmost path as far as possible.
+      while (DFS.getPred() != DFS.getPredEnd()) {
+        const SDep &PredDep = *DFS.getPred();
+        DFS.advance();
+        // If the pred is already valid, skip it. We may preorder visit a node
+        // with InstrCount==0 more than once, but it won't affect heuristics
+        // because we don't care about cross edges to leaf copies.
+        if (Impl.isVisited(PredDep.getSUnit())) {
+          Impl.visitCross(PredDep, DFS.getCurr());
+          continue;
+        }
+        Impl.visitPreorder(PredDep.getSUnit());
+        DFS.follow(PredDep.getSUnit());
+      }
+      // Visit the top of the stack in postorder and backtrack.
+      const SUnit *Child = DFS.getCurr();
+      const SDep *PredDep = DFS.backtrack();
+      Impl.visitPostorder(Child, PredDep, PredDep ? DFS.getCurr() : 0);
+      if (DFS.isComplete())
+        break;
+    }
+  }
+  Impl.finalize();
+}
+
+/// The root of the given SubtreeID was just scheduled. For all subtrees
+/// connected to this tree, record the depth of the connection so that the
+/// nearest connected subtrees can be prioritized.
+void SchedDFSResult::scheduleTree(unsigned SubtreeID) {
+  for (SmallVectorImpl<Connection>::const_iterator
+         I = SubtreeConnections[SubtreeID].begin(),
+         E = SubtreeConnections[SubtreeID].end(); I != E; ++I) {
+    SubtreeConnectLevels[I->TreeID] =
+      std::max(SubtreeConnectLevels[I->TreeID], I->Level);
+    DEBUG(dbgs() << "  Tree: " << I->TreeID
+          << " @" << SubtreeConnectLevels[I->TreeID] << '\n');
+  }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void ILPValue::print(raw_ostream &OS) const {
+  OS << InstrCount << " / " << Length << " = ";
+  if (!Length)
+    OS << "BADILP";
+  else
+    OS << format("%g", ((double)InstrCount / Length));
+}
+
+void ILPValue::dump() const {
+  dbgs() << *this << '\n';
+}
+
+namespace llvm {
+
+raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) {
+  Val.print(OS);
+  return OS;
+}
+
+} // namespace llvm
+#endif // !NDEBUG || LLVM_ENABLE_DUMP
diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp
index 6e781b1..6c50913 100644
--- a/lib/CodeGen/ScheduleDAGPrinter.cpp
+++ b/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -11,19 +11,19 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Constants.h"
-#include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/GraphWriter.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <fstream>
 using namespace llvm;
 
diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
index 5ca22b2..2cd84d6 100644
--- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp
+++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -89,7 +89,7 @@ void ScoreboardHazardRecognizer::Reset() {
   ReservedScoreboard.reset();
 }
 
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void ScoreboardHazardRecognizer::Scoreboard::dump() const {
   dbgs() << "Scoreboard:\n";
 
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index d7fa009..ff00d0d 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -18,22 +18,23 @@
 
 #define DEBUG_TYPE "dagcombine"
 #include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -194,6 +195,7 @@ namespace {
     SDValue visitOR(SDNode *N);
     SDValue visitXOR(SDNode *N);
     SDValue SimplifyVBinOp(SDNode *N);
+    SDValue SimplifyVUnaryOp(SDNode *N);
     SDValue visitSHL(SDNode *N);
     SDValue visitSRA(SDNode *N);
     SDValue visitSRL(SDNode *N);
@@ -269,6 +271,8 @@ namespace {
     SDValue ReduceLoadWidth(SDNode *N);
     SDValue ReduceLoadOpStoreWidth(SDNode *N);
     SDValue TransformFPLoadStorePair(SDNode *N);
+    SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
+    SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
 
     SDValue GetDemandedBits(SDValue V, const APInt &Mask);
 
@@ -288,6 +292,10 @@ namespace {
                  unsigned SrcValueAlign2,
                  const MDNode *TBAAInfo2) const;
 
+    /// isAlias - Return true if there is any possibility that the two addresses
+    /// overlap.
+    bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1);
+
     /// FindAliasInfo - Extracts the relevant alias information from the memory
     /// node.  Returns true if the operand was a load.
     bool FindAliasInfo(SDNode *N,
@@ -300,6 +308,11 @@ namespace {
     /// looking for a better chain (aliasing node.)
     SDValue FindBetterChain(SDNode *N, SDValue Chain);
 
+    /// Merge consecutive store operations into a wide store.
+    /// This optimization uses wide integers or vectors when possible.
+    /// \return True if some memory operations were changed.
+    bool MergeConsecutiveStores(StoreSDNode *N);
+
   public:
     DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
       : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
@@ -385,10 +398,6 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
                                const TargetLowering &TLI,
                                const TargetOptions *Options,
                                unsigned Depth = 0) {
-  // No compile time optimizations on this type.
-  if (Op.getValueType() == MVT::ppcf128)
-    return 0;
-
   // fneg is removable even if it has multiple uses.
   if (Op.getOpcode() == ISD::FNEG) return 2;
 
@@ -1174,7 +1183,7 @@ SDValue DAGCombiner::combine(SDNode *N) {
 
       // Expose the DAG combiner to the target combiner impls.
       TargetLowering::DAGCombinerInfo
-        DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this);
+        DagCombineInfo(DAG, Level, false, this);
 
       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
     }
@@ -1373,6 +1382,12 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
   if (VT.isVector()) {
     SDValue FoldedVOp = SimplifyVBinOp(N);
     if (FoldedVOp.getNode()) return FoldedVOp;
+
+    // fold (add x, 0) -> x, vector edition
+    if (ISD::isBuildVectorAllZeros(N1.getNode()))
+      return N0;
+    if (ISD::isBuildVectorAllZeros(N0.getNode()))
+      return N1;
   }
 
   // fold (add x, undef) -> undef
@@ -1616,6 +1631,10 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   if (VT.isVector()) {
     SDValue FoldedVOp = SimplifyVBinOp(N);
     if (FoldedVOp.getNode()) return FoldedVOp;
+
+    // fold (sub x, 0) -> x, vector edition
+    if (ISD::isBuildVectorAllZeros(N1.getNode()))
+      return N0;
   }
 
   // fold (sub x, x) -> 0
@@ -1643,7 +1662,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
     return N0.getOperand(0);
   // fold C2-(A+C1) -> (C2-C1)-A
   if (N1.getOpcode() == ISD::ADD && N0C && N1C1) {
-    SDValue NewC = DAG.getConstant((N0C->getAPIntValue() - N1C1->getAPIntValue()), VT);
+    SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(),
+                                   VT);
     return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, NewC,
                        N1.getOperand(0));
   }
@@ -2345,16 +2365,19 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
   // we don't want to undo this promotion.
   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
   // on scalars.
-  if ((N0.getOpcode() == ISD::BITCAST || N0.getOpcode() == ISD::SCALAR_TO_VECTOR)
-      && Level == AfterLegalizeTypes) {
+  if ((N0.getOpcode() == ISD::BITCAST ||
+       N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
+      Level == AfterLegalizeTypes) {
     SDValue In0 = N0.getOperand(0);
     SDValue In1 = N1.getOperand(0);
     EVT In0Ty = In0.getValueType();
     EVT In1Ty = In1.getValueType();
-    // If both incoming values are integers, and the original types are the same.
+    DebugLoc DL = N->getDebugLoc();
+    // If both incoming values are integers, and the original types are the
+    // same.
     if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
-      SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), In0Ty, In0, In1);
-      SDValue BC = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, Op);
+      SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
+      SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
       AddToWorkList(Op.getNode());
       return BC;
     }
@@ -2415,6 +2438,18 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
   if (VT.isVector()) {
     SDValue FoldedVOp = SimplifyVBinOp(N);
     if (FoldedVOp.getNode()) return FoldedVOp;
+
+    // fold (and x, 0) -> 0, vector edition
+    if (ISD::isBuildVectorAllZeros(N0.getNode()))
+      return N0;
+    if (ISD::isBuildVectorAllZeros(N1.getNode()))
+      return N1;
+
+    // fold (and x, -1) -> x, vector edition
+    if (ISD::isBuildVectorAllOnes(N0.getNode()))
+      return N1;
+    if (ISD::isBuildVectorAllOnes(N1.getNode()))
+      return N0;
   }
 
   // fold (and x, undef) -> 0
@@ -2598,7 +2633,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
       bool isInteger = LL.getValueType().isInteger();
       ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
       if (Result != ISD::SETCC_INVALID &&
-          (!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType())))
+          (!LegalOperations ||
+           TLI.isCondCodeLegal(Result, LL.getSimpleValueType())))
         return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(),
                             LL, LR, Result);
     }
@@ -2758,7 +2794,6 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
       }
     }
   }
-      
 
   return SDValue();
 }
@@ -2951,7 +2986,8 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
   SDValue N00 = N0.getOperand(0);
   SDValue N01 = N0.getOperand(1);
 
-  if (N1.getOpcode() == ISD::OR) {
+  if (N1.getOpcode() == ISD::OR &&
+      N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
     // (or (or (and), (and)), (or (and), (and)))
     SDValue N000 = N00.getOperand(0);
     if (!isBSwapHWordElement(N000, Parts))
@@ -2994,7 +3030,7 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
   SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT));
   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
     return DAG.getNode(ISD::ROTL, N->getDebugLoc(), VT, BSwap, ShAmt);
-  else if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
+  if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
     return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, BSwap, ShAmt);
   return DAG.getNode(ISD::OR, N->getDebugLoc(), VT,
                      DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, BSwap, ShAmt),
@@ -3013,6 +3049,18 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
   if (VT.isVector()) {
     SDValue FoldedVOp = SimplifyVBinOp(N);
     if (FoldedVOp.getNode()) return FoldedVOp;
+
+    // fold (or x, 0) -> x, vector edition
+    if (ISD::isBuildVectorAllZeros(N0.getNode()))
+      return N1;
+    if (ISD::isBuildVectorAllZeros(N1.getNode()))
+      return N0;
+
+    // fold (or x, -1) -> -1, vector edition
+    if (ISD::isBuildVectorAllOnes(N0.getNode()))
+      return N0;
+    if (ISD::isBuildVectorAllOnes(N1.getNode()))
+      return N1;
   }
 
   // fold (or x, undef) -> -1
@@ -3095,7 +3143,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
       bool isInteger = LL.getValueType().isInteger();
       ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
       if (Result != ISD::SETCC_INVALID &&
-          (!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType())))
+          (!LegalOperations ||
+           TLI.isCondCodeLegal(Result, LL.getSimpleValueType())))
         return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(),
                             LL, LR, Result);
     }
@@ -3212,11 +3261,8 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) {
     if ((LShVal + RShVal) != OpSizeInBits)
       return 0;
 
-    SDValue Rot;
-    if (HasROTL)
-      Rot = DAG.getNode(ISD::ROTL, DL, VT, LHSShiftArg, LHSShiftAmt);
-    else
-      Rot = DAG.getNode(ISD::ROTR, DL, VT, LHSShiftArg, RHSShiftAmt);
+    SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
+                              LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
 
     // If there is an AND of either shifted operand, apply it to the result.
     if (LHSMask.getNode() || RHSMask.getNode()) {
@@ -3249,12 +3295,8 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) {
     if (ConstantSDNode *SUBC =
           dyn_cast<ConstantSDNode>(RHSShiftAmt.getOperand(0))) {
       if (SUBC->getAPIntValue() == OpSizeInBits) {
-        if (HasROTL)
-          return DAG.getNode(ISD::ROTL, DL, VT,
-                             LHSShiftArg, LHSShiftAmt).getNode();
-        else
-          return DAG.getNode(ISD::ROTR, DL, VT,
-                             LHSShiftArg, RHSShiftAmt).getNode();
+        return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
+                           HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode();
       }
     }
   }
@@ -3266,25 +3308,21 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) {
     if (ConstantSDNode *SUBC =
           dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0))) {
       if (SUBC->getAPIntValue() == OpSizeInBits) {
-        if (HasROTR)
-          return DAG.getNode(ISD::ROTR, DL, VT,
-                             LHSShiftArg, RHSShiftAmt).getNode();
-        else
-          return DAG.getNode(ISD::ROTL, DL, VT,
-                             LHSShiftArg, LHSShiftAmt).getNode();
+        return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg,
+                           HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode();
       }
     }
   }
 
   // Look for sign/zext/any-extended or truncate cases:
-  if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND
-       || LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND
-       || LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND
-       || LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
-      (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND
-       || RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND
-       || RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND
-       || RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
+  if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
+       LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
+       LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
+       LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
+      (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
+       RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
+       RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
+       RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
     SDValue LExtOp0 = LHSShiftAmt.getOperand(0);
     SDValue RExtOp0 = RHSShiftAmt.getOperand(0);
     if (RExtOp0.getOpcode() == ISD::SUB &&
@@ -3333,6 +3371,12 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
   if (VT.isVector()) {
     SDValue FoldedVOp = SimplifyVBinOp(N);
     if (FoldedVOp.getNode()) return FoldedVOp;
+
+    // fold (xor x, 0) -> x, vector edition
+    if (ISD::isBuildVectorAllZeros(N0.getNode()))
+      return N1;
+    if (ISD::isBuildVectorAllZeros(N1.getNode()))
+      return N0;
   }
 
   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
@@ -3363,7 +3407,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
                                                isInt);
 
-    if (!LegalOperations || TLI.isCondCodeLegal(NotCC, LHS.getValueType())) {
+    if (!LegalOperations ||
+        TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
       switch (N0.getOpcode()) {
       default:
         llvm_unreachable("Unhandled SetCC Equivalent!");
@@ -4056,7 +4101,8 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
   if (VT.isInteger() &&
       (VT0 == MVT::i1 ||
        (VT0.isInteger() &&
-        TLI.getBooleanContents(false) == TargetLowering::ZeroOrOneBooleanContent)) &&
+        TLI.getBooleanContents(false) ==
+        TargetLowering::ZeroOrOneBooleanContent)) &&
       N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) {
     SDValue XORNode;
     if (VT == VT0)
@@ -4422,20 +4468,18 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
       // If the desired elements are smaller or larger than the source
       // elements we can use a matching integer vector type and then
       // truncate/sign extend
-      else {
-        EVT MatchingElementType =
-          EVT::getIntegerVT(*DAG.getContext(),
-                            N0VT.getScalarType().getSizeInBits());
-        EVT MatchingVectorType =
-          EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
-                           N0VT.getVectorNumElements());
+      EVT MatchingElementType =
+        EVT::getIntegerVT(*DAG.getContext(),
+                          N0VT.getScalarType().getSizeInBits());
+      EVT MatchingVectorType =
+        EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
+                         N0VT.getVectorNumElements());
 
-        if (SVT == MatchingVectorType) {
-          SDValue VsetCC = DAG.getSetCC(N->getDebugLoc(), MatchingVectorType,
-                                 N0.getOperand(0), N0.getOperand(1),
-                                 cast<CondCodeSDNode>(N0.getOperand(2))->get());
-          return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
-        }
+      if (SVT == MatchingVectorType) {
+        SDValue VsetCC = DAG.getSetCC(N->getDebugLoc(), MatchingVectorType,
+                               N0.getOperand(0), N0.getOperand(1),
+                               cast<CondCodeSDNode>(N0.getOperand(2))->get());
+        return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
       }
     }
 
@@ -5029,11 +5073,15 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
       // At this point, we must have a load or else we can't do the transform.
       if (!isa<LoadSDNode>(N0)) return SDValue();
 
+      // Because a SRL must be assumed to *need* to zero-extend the high bits
+      // (as opposed to anyext the high bits), we can't combine the zextload
+      // lowering of SRL and an sextload.
+      if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD)
+        return SDValue();
+
       // If the shift amount is larger than the input type then we're not
       // accessing any of the loaded bytes.  If the load was a zextload/extload
       // then the result of the shift+trunc is zero/undef (handled elsewhere).
-      // If the load was a sextload then the result is a splat of the sign bit
-      // of the extended byte.  This is not worth optimizing for.
       if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
         return SDValue();
     }
@@ -5191,6 +5239,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
                                      LN0->getAlignment());
     CombineTo(N, ExtLoad);
     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+    AddToWorkList(ExtLoad.getNode());
     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   }
   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
@@ -5245,13 +5294,12 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
       // if the source is smaller than the dest, we still need an extend
       return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,
                          N0.getOperand(0));
-    else if (N0.getOperand(0).getValueType().bitsGT(VT))
+    if (N0.getOperand(0).getValueType().bitsGT(VT))
       // if the source is larger than the dest, than we just need the truncate
       return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0));
-    else
-      // if the source and dest are the same type, we can drop both the extend
-      // and the truncate.
-      return N0.getOperand(0);
+    // if the source and dest are the same type, we can drop both the extend
+    // and the truncate.
+    return N0.getOperand(0);
   }
 
   // Fold extract-and-trunc into a narrow extract. For example:
@@ -5311,6 +5359,48 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
     if (Reduced.getNode())
       return Reduced;
   }
+  // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
+  // where ... are all 'undef'.
+  if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
+    SmallVector<EVT, 8> VTs;
+    SDValue V;
+    unsigned Idx = 0;
+    unsigned NumDefs = 0;
+
+    for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
+      SDValue X = N0.getOperand(i);
+      if (X.getOpcode() != ISD::UNDEF) {
+        V = X;
+        Idx = i;
+        NumDefs++;
+      }
+      // Stop if more than one members are non-undef.
+      if (NumDefs > 1)
+        break;
+      VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
+                                     VT.getVectorElementType(),
+                                     X.getValueType().getVectorNumElements()));
+    }
+
+    if (NumDefs == 0)
+      return DAG.getUNDEF(VT);
+
+    if (NumDefs == 1) {
+      assert(V.getNode() && "The single defined operand is empty!");
+      SmallVector<SDValue, 8> Opnds;
+      for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
+        if (i != Idx) {
+          Opnds.push_back(DAG.getUNDEF(VTs[i]));
+          continue;
+        }
+        SDValue NV = DAG.getNode(ISD::TRUNCATE, V.getDebugLoc(), VTs[i], V);
+        AddToWorkList(NV.getNode());
+        Opnds.push_back(NV);
+      }
+      return DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT,
+                         &Opnds[0], Opnds.size());
+    }
+  }
 
   // Simplify the operands using demanded-bits information.
   if (!VT.isVector() &&
@@ -5348,7 +5438,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
       !LD2->isVolatile() &&
       DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) {
     unsigned Align = LD1->getAlignment();
-    unsigned NewAlign = TLI.getTargetData()->
+    unsigned NewAlign = TLI.getDataLayout()->
       getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
 
     if (NewAlign <= Align &&
@@ -5417,7 +5507,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
       !cast<LoadSDNode>(N0)->isVolatile() &&
       (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) {
     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-    unsigned Align = TLI.getTargetData()->
+    unsigned Align = TLI.getDataLayout()->
       getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
     unsigned OrigAlign = LN0->getAlignment();
 
@@ -5440,7 +5530,8 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
   // This often reduces constant pool loads.
   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(VT)) ||
        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(VT))) &&
-      N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector()) {
+      N0.getNode()->hasOneUse() && VT.isInteger() &&
+      !VT.isVector() && !N0.getValueType().isVector()) {
     SDValue NewConv = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), VT,
                                   N0.getOperand(0));
     AddToWorkList(NewConv.getNode());
@@ -5663,7 +5754,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
   }
 
   // fold (fadd c1, c2) -> c1 + c2
-  if (N0CFP && N1CFP && VT != MVT::ppcf128)
+  if (N0CFP && N1CFP)
     return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N1);
   // canonicalize constant to RHS
   if (N0CFP && !N1CFP)
@@ -5674,12 +5765,12 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
     return N0;
   // fold (fadd A, (fneg B)) -> (fsub A, B)
   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
-      isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
+    isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
     return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0,
                        GetNegatedExpression(N1, DAG, LegalOperations));
   // fold (fadd (fneg A), B) -> (fsub B, A)
   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
-      isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
+    isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
     return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1,
                        GetNegatedExpression(N0, DAG, LegalOperations));
 
@@ -5691,6 +5782,18 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
                        DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
                                    N0.getOperand(1), N1));
 
+  // If allow, fold (fadd (fneg x), x) -> 0.0
+  if (DAG.getTarget().Options.UnsafeFPMath &&
+      N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) {
+    return DAG.getConstantFP(0.0, VT);
+  }
+
+    // If allow, fold (fadd x, (fneg x)) -> 0.0
+  if (DAG.getTarget().Options.UnsafeFPMath &&
+      N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) {
+    return DAG.getConstantFP(0.0, VT);
+  }
+
   // In unsafe math mode, we can fold chains of FADD's of the same value
   // into multiplications.  This transform is not safe in general because
   // we are reducing the number of rounding steps.
@@ -5850,7 +5953,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
   }
 
   // fold (fsub c1, c2) -> c1-c2
-  if (N0CFP && N1CFP && VT != MVT::ppcf128)
+  if (N0CFP && N1CFP)
     return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, N1);
   // fold (fsub A, 0) -> A
   if (DAG.getTarget().Options.UnsafeFPMath &&
@@ -5942,7 +6045,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
   }
 
   // fold (fmul c1, c2) -> c1*c2
-  if (N0CFP && N1CFP && VT != MVT::ppcf128)
+  if (N0CFP && N1CFP)
     return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, N1);
   // canonicalize constant to RHS
   if (N0CFP && !N1CFP)
@@ -6000,6 +6103,12 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
   EVT VT = N->getValueType(0);
   DebugLoc dl = N->getDebugLoc();
 
+  if (DAG.getTarget().Options.UnsafeFPMath) {
+    if (N0CFP && N0CFP->isZero())
+      return N2;
+    if (N1CFP && N1CFP->isZero())
+      return N2;
+  }
   if (N0CFP && N0CFP->isExactlyValue(1.0))
     return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N2);
   if (N1CFP && N1CFP->isExactlyValue(1.0))
@@ -6079,11 +6188,11 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
   }
 
   // fold (fdiv c1, c2) -> c1/c2
-  if (N0CFP && N1CFP && VT != MVT::ppcf128)
+  if (N0CFP && N1CFP)
     return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1);
 
   // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
-  if (N1CFP && VT != MVT::ppcf128 && DAG.getTarget().Options.UnsafeFPMath) {
+  if (N1CFP && DAG.getTarget().Options.UnsafeFPMath) {
     // Compute the reciprocal 1.0 / c2.
     APFloat N1APF = N1CFP->getValueAPF();
     APFloat Recip(N1APF.getSemantics(), 1); // 1.0
@@ -6126,7 +6235,7 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
   EVT VT = N->getValueType(0);
 
   // fold (frem c1, c2) -> fmod(c1,c2)
-  if (N0CFP && N1CFP && VT != MVT::ppcf128)
+  if (N0CFP && N1CFP)
     return DAG.getNode(ISD::FREM, N->getDebugLoc(), VT, N0, N1);
 
   return SDValue();
@@ -6139,7 +6248,7 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
   EVT VT = N->getValueType(0);
 
-  if (N0CFP && N1CFP && VT != MVT::ppcf128)  // Constant fold
+  if (N0CFP && N1CFP)  // Constant fold
     return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, N0, N1);
 
   if (N1CFP) {
@@ -6189,7 +6298,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
   EVT OpVT = N0.getValueType();
 
   // fold (sint_to_fp c1) -> c1fp
-  if (N0C && OpVT != MVT::ppcf128 &&
+  if (N0C &&
       // ...but only if the target supports immediate floating-point values
       (!LegalOperations ||
        TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
@@ -6246,7 +6355,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
   EVT OpVT = N0.getValueType();
 
   // fold (uint_to_fp c1) -> c1fp
-  if (N0C && OpVT != MVT::ppcf128 &&
+  if (N0C &&
       // ...but only if the target supports immediate floating-point values
       (!LegalOperations ||
        TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
@@ -6301,7 +6410,7 @@ SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
   EVT VT = N->getValueType(0);
 
   // fold (fp_to_uint c1fp) -> c1
-  if (N0CFP && VT != MVT::ppcf128)
+  if (N0CFP)
     return DAG.getNode(ISD::FP_TO_UINT, N->getDebugLoc(), VT, N0);
 
   return SDValue();
@@ -6314,7 +6423,7 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
   EVT VT = N->getValueType(0);
 
   // fold (fp_round c1fp) -> c1fp
-  if (N0CFP && N0.getValueType() != MVT::ppcf128)
+  if (N0CFP)
     return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0, N1);
 
   // fold (fp_round (fp_extend x)) -> x
@@ -6368,7 +6477,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
     return SDValue();
 
   // fold (fp_extend c1fp) -> c1fp
-  if (N0CFP && VT != MVT::ppcf128)
+  if (N0CFP)
     return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, N0);
 
   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
@@ -6409,28 +6518,9 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
 
-  if (VT.isVector() && !LegalOperations) {
-    // If operand is a BUILD_VECTOR node, see if we can constant fold it.
-    if (N0.getOpcode() == ISD::BUILD_VECTOR) {
-      SmallVector<SDValue, 8> Ops;
-      for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
-        SDValue Op = N0.getOperand(i);
-        if (Op.getOpcode() != ISD::UNDEF &&
-            Op.getOpcode() != ISD::ConstantFP)
-          break;
-        EVT EltVT = Op.getValueType();
-        SDValue FoldOp = DAG.getNode(ISD::FNEG, N0.getDebugLoc(), EltVT, Op);
-        if (FoldOp.getOpcode() != ISD::UNDEF &&
-            FoldOp.getOpcode() != ISD::ConstantFP)
-          break;
-        Ops.push_back(FoldOp);
-        AddToWorkList(FoldOp.getNode());
-      }
-
-      if (Ops.size() == N0.getNumOperands())
-        return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
-                           VT, &Ops[0], Ops.size());
-    }
+  if (VT.isVector()) {
+    SDValue FoldedVOp = SimplifyVUnaryOp(N);
+    if (FoldedVOp.getNode()) return FoldedVOp;
   }
 
   if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
@@ -6474,7 +6564,7 @@ SDValue DAGCombiner::visitFCEIL(SDNode *N) {
   EVT VT = N->getValueType(0);
 
   // fold (fceil c1) -> fceil(c1)
-  if (N0CFP && VT != MVT::ppcf128)
+  if (N0CFP)
     return DAG.getNode(ISD::FCEIL, N->getDebugLoc(), VT, N0);
 
   return SDValue();
@@ -6486,7 +6576,7 @@ SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
   EVT VT = N->getValueType(0);
 
   // fold (ftrunc c1) -> ftrunc(c1)
-  if (N0CFP && VT != MVT::ppcf128)
+  if (N0CFP)
     return DAG.getNode(ISD::FTRUNC, N->getDebugLoc(), VT, N0);
 
   return SDValue();
@@ -6498,7 +6588,7 @@ SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
   EVT VT = N->getValueType(0);
 
   // fold (ffloor c1) -> ffloor(c1)
-  if (N0CFP && VT != MVT::ppcf128)
+  if (N0CFP)
     return DAG.getNode(ISD::FFLOOR, N->getDebugLoc(), VT, N0);
 
   return SDValue();
@@ -6509,8 +6599,13 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   EVT VT = N->getValueType(0);
 
+  if (VT.isVector()) {
+    SDValue FoldedVOp = SimplifyVUnaryOp(N);
+    if (FoldedVOp.getNode()) return FoldedVOp;
+  }
+
   // fold (fabs c1) -> fabs(c1)
-  if (N0CFP && VT != MVT::ppcf128)
+  if (N0CFP)
     return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0);
   // fold (fabs (fabs x)) -> (fabs x)
   if (N0.getOpcode() == ISD::FABS)
@@ -7344,7 +7439,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
     // start at the previous one.
     if (ShAmt % NewBW)
       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
-    APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, ShAmt + NewBW);
+    APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
+                                   std::min(BitWidth, ShAmt + NewBW));
     if ((Imm & Mask) == Imm) {
       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
       if (Opc == ISD::AND)
@@ -7357,7 +7453,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
 
       unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
       Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
-      if (NewAlign < TLI.getTargetData()->getABITypeAlignment(NewVTTy))
+      if (NewAlign < TLI.getDataLayout()->getABITypeAlignment(NewVTTy))
         return SDValue();
 
       SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(),
@@ -7419,7 +7515,7 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
     unsigned LDAlign = LD->getAlignment();
     unsigned STAlign = ST->getAlignment();
     Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
-    unsigned ABIAlign = TLI.getTargetData()->getABITypeAlignment(IntVTTy);
+    unsigned ABIAlign = TLI.getDataLayout()->getABITypeAlignment(IntVTTy);
     if (LDAlign < ABIAlign || STAlign < ABIAlign)
       return SDValue();
 
@@ -7444,6 +7540,477 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
   return SDValue();
 }
 
+/// Returns the base pointer and an integer offset from that object.
+static std::pair<SDValue, int64_t> GetPointerBaseAndOffset(SDValue Ptr) {
+  if (Ptr->getOpcode() == ISD::ADD && isa<ConstantSDNode>(Ptr->getOperand(1))) {
+    int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
+    SDValue Base = Ptr->getOperand(0);
+    return std::make_pair(Base, Offset);
+  }
+
+  return std::make_pair(Ptr, 0);
+}
+
+/// Holds a pointer to an LSBaseSDNode as well as information on where it
+/// is located in a sequence of memory operations connected by a chain.
+struct MemOpLink {
+  MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq):
+    MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { }
+  // Ptr to the mem node.
+  LSBaseSDNode *MemNode;
+  // Offset from the base ptr.
+  int64_t OffsetFromBase;
+  // What is the sequence number of this mem node.
+  // Lowest mem operand in the DAG starts at zero.
+  unsigned SequenceNum;
+};
+
+/// Sorts store nodes in a link according to their offset from a shared
+// base ptr.
+struct ConsecutiveMemoryChainSorter {
+  bool operator()(MemOpLink LHS, MemOpLink RHS) {
+    return LHS.OffsetFromBase < RHS.OffsetFromBase;
+  }
+};
+
+bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
+  EVT MemVT = St->getMemoryVT();
+  int64_t ElementSizeBytes = MemVT.getSizeInBits()/8;
+
+  // Don't merge vectors into wider inputs.
+  if (MemVT.isVector() || !MemVT.isSimple())
+    return false;
+
+  // Perform an early exit check. Do not bother looking at stored values that
+  // are not constants or loads.
+  SDValue StoredVal = St->getValue();
+  bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
+  if (!isa<ConstantSDNode>(StoredVal) && !isa<ConstantFPSDNode>(StoredVal) &&
+      !IsLoadSrc)
+    return false;
+
+  // Only look at ends of store sequences.
+  SDValue Chain = SDValue(St, 1);
+  if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE)
+    return false;
+
+  // This holds the base pointer and the offset in bytes from the base pointer.
+  std::pair<SDValue, int64_t> BasePtr =
+      GetPointerBaseAndOffset(St->getBasePtr());
+
+  // We must have a base and an offset.
+  if (!BasePtr.first.getNode())
+    return false;
+
+  // Do not handle stores to undef base pointers.
+  if (BasePtr.first.getOpcode() == ISD::UNDEF)
+    return false;
+
+  // Save the LoadSDNodes that we find in the chain.
+  // We need to make sure that these nodes do not interfere with
+  // any of the store nodes.
+  SmallVector<LSBaseSDNode*, 8> AliasLoadNodes;
+
+  // Save the StoreSDNodes that we find in the chain.
+  SmallVector<MemOpLink, 8> StoreNodes;
+
+  // Walk up the chain and look for nodes with offsets from the same
+  // base pointer. Stop when reaching an instruction with a different kind
+  // or instruction which has a different base pointer.
+  unsigned Seq = 0;
+  StoreSDNode *Index = St;
+  while (Index) {
+    // If the chain has more than one use, then we can't reorder the mem ops.
+    if (Index != St && !SDValue(Index, 1)->hasOneUse())
+      break;
+
+    // Find the base pointer and offset for this memory node.
+    std::pair<SDValue, int64_t> Ptr =
+      GetPointerBaseAndOffset(Index->getBasePtr());
+
+    // Check that the base pointer is the same as the original one.
+    if (Ptr.first.getNode() != BasePtr.first.getNode())
+      break;
+
+    // Check that the alignment is the same.
+    if (Index->getAlignment() != St->getAlignment())
+      break;
+
+    // The memory operands must not be volatile.
+    if (Index->isVolatile() || Index->isIndexed())
+      break;
+
+    // No truncation.
+    if (StoreSDNode *St = dyn_cast<StoreSDNode>(Index))
+      if (St->isTruncatingStore())
+        break;
+
+    // The stored memory type must be the same.
+    if (Index->getMemoryVT() != MemVT)
+      break;
+
+    // We do not allow unaligned stores because we want to prevent overriding
+    // stores.
+    if (Index->getAlignment()*8 != MemVT.getSizeInBits())
+      break;
+
+    // We found a potential memory operand to merge.
+    StoreNodes.push_back(MemOpLink(Index, Ptr.second, Seq++));
+
+    // Find the next memory operand in the chain. If the next operand in the
+    // chain is a store then move up and continue the scan with the next
+    // memory operand. If the next operand is a load save it and use alias
+    // information to check if it interferes with anything.
+    SDNode *NextInChain = Index->getChain().getNode();
+    while (1) {
+      if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
+        // We found a store node. Use it for the next iteration.
+        Index = STn;
+        break;
+      } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
+        // Save the load node for later. Continue the scan.
+        AliasLoadNodes.push_back(Ldn);
+        NextInChain = Ldn->getChain().getNode();
+        continue;
+      } else {
+        Index = NULL;
+        break;
+      }
+    }
+  }
+
+  // Check if there is anything to merge.
+  if (StoreNodes.size() < 2)
+    return false;
+
+  // Sort the memory operands according to their distance from the base pointer.
+  std::sort(StoreNodes.begin(), StoreNodes.end(),
+            ConsecutiveMemoryChainSorter());
+
+  // Scan the memory operations on the chain and find the first non-consecutive
+  // store memory address.
+  unsigned LastConsecutiveStore = 0;
+  int64_t StartAddress = StoreNodes[0].OffsetFromBase;
+  for (unsigned i = 0, e = StoreNodes.size(); i < e; ++i) {
+
+    // Check that the addresses are consecutive starting from the second
+    // element in the list of stores.
+    if (i > 0) {
+      int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
+      if (CurrAddress - StartAddress != (ElementSizeBytes * i))
+        break;
+    }
+
+    bool Alias = false;
+    // Check if this store interferes with any of the loads that we found.
+    for (unsigned ld = 0, lde = AliasLoadNodes.size(); ld < lde; ++ld)
+      if (isAlias(AliasLoadNodes[ld], StoreNodes[i].MemNode)) {
+        Alias = true;
+        break;
+      }
+    // We found a load that alias with this store. Stop the sequence.
+    if (Alias)
+      break;
+
+    // Mark this node as useful.
+    LastConsecutiveStore = i;
+  }
+
+  // The node with the lowest store address.
+  LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+
+  // Store the constants into memory as one consecutive store.
+  if (!IsLoadSrc) {
+    unsigned LastLegalType = 0;
+    unsigned LastLegalVectorType = 0;
+    bool NonZero = false;
+    for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
+      StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[i].MemNode);
+      SDValue StoredVal = St->getValue();
+
+      if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) {
+        NonZero |= !C->isNullValue();
+      } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) {
+        NonZero |= !C->getConstantFPValue()->isNullValue();
+      } else {
+        // Non constant.
+        break;
+      }
+
+      // Find a legal type for the constant store.
+      unsigned StoreBW = (i+1) * ElementSizeBytes * 8;
+      EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
+      if (TLI.isTypeLegal(StoreTy))
+        LastLegalType = i+1;
+
+      // Find a legal type for the vector store.
+      EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
+      if (TLI.isTypeLegal(Ty))
+        LastLegalVectorType = i + 1;
+    }
+
+    // We only use vectors if the constant is known to be zero and the
+    // function is not marked with the noimplicitfloat attribute.
+    if (NonZero || (DAG.getMachineFunction().getFunction()->getAttributes().
+                    hasAttribute(AttributeSet::FunctionIndex,
+                                 Attribute::NoImplicitFloat)))
+      LastLegalVectorType = 0;
+
+    // Check if we found a legal integer type to store.
+    if (LastLegalType == 0 && LastLegalVectorType == 0)
+      return false;
+
+    bool UseVector = LastLegalVectorType > LastLegalType;
+    unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType;
+
+    // Make sure we have something to merge.
+    if (NumElem < 2)
+      return false;
+
+    unsigned EarliestNodeUsed = 0;
+    for (unsigned i=0; i < NumElem; ++i) {
+      // Find a chain for the new wide-store operand. Notice that some
+      // of the store nodes that we found may not be selected for inclusion
+      // in the wide store. The chain we use needs to be the chain of the
+      // earliest store node which is *used* and replaced by the wide store.
+      if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
+        EarliestNodeUsed = i;
+    }
+
+    // The earliest Node in the DAG.
+    LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
+    DebugLoc DL = StoreNodes[0].MemNode->getDebugLoc();
+
+    SDValue StoredVal;
+    if (UseVector) {
+      // Find a legal type for the vector store.
+      EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
+      assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
+      StoredVal = DAG.getConstant(0, Ty);
+    } else {
+      unsigned StoreBW = NumElem * ElementSizeBytes * 8;
+      APInt StoreInt(StoreBW, 0);
+
+      // Construct a single integer constant which is made of the smaller
+      // constant inputs.
+      bool IsLE = TLI.isLittleEndian();
+      for (unsigned i = 0; i < NumElem ; ++i) {
+        unsigned Idx = IsLE ?(NumElem - 1 - i) : i;
+        StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
+        SDValue Val = St->getValue();
+        StoreInt<<=ElementSizeBytes*8;
+        if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
+          StoreInt|=C->getAPIntValue().zext(StoreBW);
+        } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
+          StoreInt|= C->getValueAPF().bitcastToAPInt().zext(StoreBW);
+        } else {
+          assert(false && "Invalid constant element type");
+        }
+      }
+
+      // Create the new Load and Store operations.
+      EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
+      StoredVal = DAG.getConstant(StoreInt, StoreTy);
+    }
+
+    SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal,
+                                    FirstInChain->getBasePtr(),
+                                    FirstInChain->getPointerInfo(),
+                                    false, false,
+                                    FirstInChain->getAlignment());
+
+    // Replace the first store with the new store
+    CombineTo(EarliestOp, NewStore);
+    // Erase all other stores.
+    for (unsigned i = 0; i < NumElem ; ++i) {
+      if (StoreNodes[i].MemNode == EarliestOp)
+        continue;
+      StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+      // ReplaceAllUsesWith will replace all uses that existed when it was
+      // called, but graph optimizations may cause new ones to appear. For
+      // example, the case in pr14333 looks like
+      //
+      //  St's chain -> St -> another store -> X
+      //
+      // And the only difference from St to the other store is the chain.
+      // When we change it's chain to be St's chain they become identical,
+      // get CSEed and the net result is that X is now a use of St.
+      // Since we know that St is redundant, just iterate.
+      while (!St->use_empty())
+        DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
+      removeFromWorkList(St);
+      DAG.DeleteNode(St);
+    }
+
+    return true;
+  }
+
+  // Below we handle the case of multiple consecutive stores that
+  // come from multiple consecutive loads. We merge them into a single
+  // wide load and a single wide store.
+
+  // Look for load nodes which are used by the stored values.
+  SmallVector<MemOpLink, 8> LoadNodes;
+
+  // Find acceptable loads. Loads need to have the same chain (token factor),
+  // must not be zext, volatile, indexed, and they must be consecutive.
+  SDValue LdBasePtr;
+  for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
+    StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[i].MemNode);
+    LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue());
+    if (!Ld) break;
+
+    // Loads must only have one use.
+    if (!Ld->hasNUsesOfValue(1, 0))
+      break;
+
+    // Check that the alignment is the same as the stores.
+    if (Ld->getAlignment() != St->getAlignment())
+      break;
+
+    // The memory operands must not be volatile.
+    if (Ld->isVolatile() || Ld->isIndexed())
+      break;
+
+    // We do not accept ext loads.
+    if (Ld->getExtensionType() != ISD::NON_EXTLOAD)
+      break;
+
+    // The stored memory type must be the same.
+    if (Ld->getMemoryVT() != MemVT)
+      break;
+
+    std::pair<SDValue, int64_t> LdPtr =
+    GetPointerBaseAndOffset(Ld->getBasePtr());
+
+    // If this is not the first ptr that we check.
+    if (LdBasePtr.getNode()) {
+      // The base ptr must be the same.
+      if (LdPtr.first != LdBasePtr)
+        break;
+    } else {
+      // Check that all other base pointers are the same as this one.
+      LdBasePtr = LdPtr.first;
+    }
+
+    // We found a potential memory operand to merge.
+    LoadNodes.push_back(MemOpLink(Ld, LdPtr.second, 0));
+  }
+
+  if (LoadNodes.size() < 2)
+    return false;
+
+  // Scan the memory operations on the chain and find the first non-consecutive
+  // load memory address. These variables hold the index in the store node
+  // array.
+  unsigned LastConsecutiveLoad = 0;
+  // This variable refers to the size and not index in the array.
+  unsigned LastLegalVectorType = 0;
+  unsigned LastLegalIntegerType = 0;
+  StartAddress = LoadNodes[0].OffsetFromBase;
+  SDValue FirstChain = LoadNodes[0].MemNode->getChain();
+  for (unsigned i = 1; i < LoadNodes.size(); ++i) {
+    // All loads much share the same chain.
+    if (LoadNodes[i].MemNode->getChain() != FirstChain)
+      break;
+    
+    int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
+    if (CurrAddress - StartAddress != (ElementSizeBytes * i))
+      break;
+    LastConsecutiveLoad = i;
+
+    // Find a legal type for the vector store.
+    EVT StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
+    if (TLI.isTypeLegal(StoreTy))
+      LastLegalVectorType = i + 1;
+
+    // Find a legal type for the integer store.
+    unsigned StoreBW = (i+1) * ElementSizeBytes * 8;
+    StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
+    if (TLI.isTypeLegal(StoreTy))
+      LastLegalIntegerType = i + 1;
+  }
+
+  // Only use vector types if the vector type is larger than the integer type.
+  // If they are the same, use integers.
+  bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType;
+  unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType);
+
+  // We add +1 here because the LastXXX variables refer to location while
+  // the NumElem refers to array/index size.
+  unsigned NumElem = std::min(LastConsecutiveStore, LastConsecutiveLoad) + 1;
+  NumElem = std::min(LastLegalType, NumElem);
+
+  if (NumElem < 2)
+    return false;
+
+  // The earliest Node in the DAG.
+  unsigned EarliestNodeUsed = 0;
+  LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
+  for (unsigned i=1; i<NumElem; ++i) {
+    // Find a chain for the new wide-store operand. Notice that some
+    // of the store nodes that we found may not be selected for inclusion
+    // in the wide store. The chain we use needs to be the chain of the
+    // earliest store node which is *used* and replaced by the wide store.
+    if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
+      EarliestNodeUsed = i;
+  }
+
+  // Find if it is better to use vectors or integers to load and store
+  // to memory.
+  EVT JointMemOpVT;
+  if (UseVectorTy) {
+    JointMemOpVT = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
+  } else {
+    unsigned StoreBW = NumElem * ElementSizeBytes * 8;
+    JointMemOpVT = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
+  }
+
+  DebugLoc LoadDL = LoadNodes[0].MemNode->getDebugLoc();
+  DebugLoc StoreDL = StoreNodes[0].MemNode->getDebugLoc();
+
+  LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
+  SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL,
+                                FirstLoad->getChain(),
+                                FirstLoad->getBasePtr(),
+                                FirstLoad->getPointerInfo(),
+                                false, false, false,
+                                FirstLoad->getAlignment());
+
+  SDValue NewStore = DAG.getStore(EarliestOp->getChain(), StoreDL, NewLoad,
+                                  FirstInChain->getBasePtr(),
+                                  FirstInChain->getPointerInfo(), false, false,
+                                  FirstInChain->getAlignment());
+
+  // Replace one of the loads with the new load.
+  LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[0].MemNode);
+  DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
+                                SDValue(NewLoad.getNode(), 1));
+
+  // Remove the rest of the load chains.
+  for (unsigned i = 1; i < NumElem ; ++i) {
+    // Replace all chain users of the old load nodes with the chain of the new
+    // load node.
+    LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
+    DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Ld->getChain());
+  }
+
+  // Replace the first store with the new store.
+  CombineTo(EarliestOp, NewStore);
+  // Erase all other stores.
+  for (unsigned i = 0; i < NumElem ; ++i) {
+    // Remove all Store nodes.
+    if (StoreNodes[i].MemNode == EarliestOp)
+      continue;
+    StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+    DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain());
+    removeFromWorkList(St);
+    DAG.DeleteNode(St);
+  }
+
+  return true;
+}
+
 SDValue DAGCombiner::visitSTORE(SDNode *N) {
   StoreSDNode *ST  = cast<StoreSDNode>(N);
   SDValue Chain = ST->getChain();
@@ -7456,7 +8023,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
       ST->isUnindexed()) {
     unsigned OrigAlign = ST->getAlignment();
     EVT SVT = Value.getOperand(0).getValueType();
-    unsigned Align = TLI.getTargetData()->
+    unsigned Align = TLI.getDataLayout()->
       getABITypeAlignment(SVT.getTypeForEVT(*DAG.getContext()));
     if (Align <= OrigAlign &&
         ((!LegalOperations && !ST->isVolatile()) ||
@@ -7645,6 +8212,24 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
                              ST->getAlignment());
   }
 
+  // Only perform this optimization before the types are legal, because we
+  // don't want to perform this optimization on every DAGCombine invocation.
+  if (!LegalTypes) {
+    bool EverChanged = false;
+
+    do {
+      // There can be multiple store sequences on the same chain.
+      // Keep trying to merge store sequences until we are unable to do so
+      // or until we merge the last store on the chain.
+      bool Changed = MergeConsecutiveStores(ST);
+      EverChanged |= Changed;
+      if (!Changed) break;
+    } while (ST->getOpcode() != ISD::DELETED_NODE);
+
+    if (EverChanged)
+      return SDValue(N, 0);
+  }
+
   return ReduceLoadOpStoreWidth(N);
 }
 
@@ -7723,9 +8308,9 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
 
   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
   // We only perform this optimization before the op legalization phase because
-  // we may introduce new vector instructions which are not backed by TD patterns.
-  // For example on AVX, extracting elements from a wide vector without using
-  // extract_subvector.
+  // we may introduce new vector instructions which are not backed by TD
+  // patterns. For example on AVX, extracting elements from a wide vector
+  // without using extract_subvector.
   if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE
       && ConstEltNo && !LegalOperations) {
     int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
@@ -7844,7 +8429,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
       // Check the resultant load doesn't need a higher alignment than the
       // original load.
       unsigned NewAlign =
-        TLI.getTargetData()
+        TLI.getDataLayout()
             ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));
 
       if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT))
@@ -7909,15 +8494,21 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
   return SDValue();
 }
 
-SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
+// Simplify (build_vec (ext )) to (bitcast (build_vec ))
+SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
+  // We perform this optimization post type-legalization because
+  // the type-legalizer often scalarizes integer-promoted vectors.
+  // Performing this optimization before may create bit-casts which
+  // will be type-legalized to complex code sequences.
+  // We perform this optimization only before the operation legalizer because we
+  // may introduce illegal operations.
+  if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
+    return SDValue();
+
   unsigned NumInScalars = N->getNumOperands();
   DebugLoc dl = N->getDebugLoc();
   EVT VT = N->getValueType(0);
 
-  // A vector built entirely of undefs is undef.
-  if (ISD::allOperandsUndef(N))
-    return DAG.getUNDEF(VT);
-
   // Check to see if this is a BUILD_VECTOR of a bunch of values
   // which come from any_extend or zero_extend nodes. If so, we can create
   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
@@ -7960,64 +8551,142 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
   // In order to have valid types, all of the inputs must be extended from the
   // same source type and all of the inputs must be any or zero extend.
   // Scalar sizes must be a power of two.
-  EVT OutScalarTy = N->getValueType(0).getScalarType();
+  EVT OutScalarTy = VT.getScalarType();
   bool ValidTypes = SourceType != MVT::Other &&
                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
                  isPowerOf2_32(SourceType.getSizeInBits());
 
-  // We perform this optimization post type-legalization because
-  // the type-legalizer often scalarizes integer-promoted vectors.
-  // Performing this optimization before may create bit-casts which
-  // will be type-legalized to complex code sequences.
-  // We perform this optimization only before the operation legalizer because we
-  // may introduce illegal operations.
   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
   // turn into a single shuffle instruction.
-  if ((Level == AfterLegalizeVectorOps || Level == AfterLegalizeTypes) &&
-      ValidTypes) {
-    bool isLE = TLI.isLittleEndian();
-    unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
-    assert(ElemRatio > 1 && "Invalid element size ratio");
-    SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
-                                 DAG.getConstant(0, SourceType);
-
-    unsigned NewBVElems = ElemRatio * N->getValueType(0).getVectorNumElements();
-    SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
-
-    // Populate the new build_vector
-    for (unsigned i=0; i < N->getNumOperands(); ++i) {
-      SDValue Cast = N->getOperand(i);
-      assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
-              Cast.getOpcode() == ISD::ZERO_EXTEND ||
-              Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode");
-      SDValue In;
-      if (Cast.getOpcode() == ISD::UNDEF)
-        In = DAG.getUNDEF(SourceType);
-      else
-        In = Cast->getOperand(0);
-      unsigned Index = isLE ? (i * ElemRatio) :
-                              (i * ElemRatio + (ElemRatio - 1));
+  if (!ValidTypes)
+    return SDValue();
+
+  bool isLE = TLI.isLittleEndian();
+  unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
+  assert(ElemRatio > 1 && "Invalid element size ratio");
+  SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
+                               DAG.getConstant(0, SourceType);
+
+  unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
+  SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
+
+  // Populate the new build_vector
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    SDValue Cast = N->getOperand(i);
+    assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
+            Cast.getOpcode() == ISD::ZERO_EXTEND ||
+            Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode");
+    SDValue In;
+    if (Cast.getOpcode() == ISD::UNDEF)
+      In = DAG.getUNDEF(SourceType);
+    else
+      In = Cast->getOperand(0);
+    unsigned Index = isLE ? (i * ElemRatio) :
+                            (i * ElemRatio + (ElemRatio - 1));
+
+    assert(Index < Ops.size() && "Invalid index");
+    Ops[Index] = In;
+  }
+
+  // The type of the new BUILD_VECTOR node.
+  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
+  assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
+         "Invalid vector size");
+  // Check if the new vector type is legal.
+  if (!isTypeLegal(VecVT)) return SDValue();
+
+  // Make the new BUILD_VECTOR.
+  SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], Ops.size());
+
+  // The new BUILD_VECTOR node has the potential to be further optimized.
+  AddToWorkList(BV.getNode());
+  // Bitcast to the desired type.
+  return DAG.getNode(ISD::BITCAST, dl, VT, BV);
+}
+
+SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
+  EVT VT = N->getValueType(0);
+
+  unsigned NumInScalars = N->getNumOperands();
+  DebugLoc dl = N->getDebugLoc();
+
+  EVT SrcVT = MVT::Other;
+  unsigned Opcode = ISD::DELETED_NODE;
+  unsigned NumDefs = 0;
+
+  for (unsigned i = 0; i != NumInScalars; ++i) {
+    SDValue In = N->getOperand(i);
+    unsigned Opc = In.getOpcode();
+
+    if (Opc == ISD::UNDEF)
+      continue;
 
-      assert(Index < Ops.size() && "Invalid index");
-      Ops[Index] = In;
+    // If all scalar values are floats and converted from integers.
+    if (Opcode == ISD::DELETED_NODE &&
+        (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
+      Opcode = Opc;
     }
 
-    // The type of the new BUILD_VECTOR node.
-    EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
-    assert(VecVT.getSizeInBits() == N->getValueType(0).getSizeInBits() &&
-           "Invalid vector size");
-    // Check if the new vector type is legal.
-    if (!isTypeLegal(VecVT)) return SDValue();
+    if (Opc != Opcode)
+      return SDValue();
+
+    EVT InVT = In.getOperand(0).getValueType();
 
-    // Make the new BUILD_VECTOR.
-    SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
-                                 VecVT, &Ops[0], Ops.size());
+    // If all scalar values are typed differently, bail out. It's chosen to
+    // simplify BUILD_VECTOR of integer types.
+    if (SrcVT == MVT::Other)
+      SrcVT = InVT;
+    if (SrcVT != InVT)
+      return SDValue();
+    NumDefs++;
+  }
+
+  // If the vector has just one element defined, it's not worth to fold it into
+  // a vectorized one.
+  if (NumDefs < 2)
+    return SDValue();
+
+  assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP)
+         && "Should only handle conversion from integer to float.");
+  assert(SrcVT != MVT::Other && "Cannot determine source type!");
+
+  EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
+
+  if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
+    return SDValue();
+
+  SmallVector<SDValue, 8> Opnds;
+  for (unsigned i = 0; i != NumInScalars; ++i) {
+    SDValue In = N->getOperand(i);
 
-    // The new BUILD_VECTOR node has the potential to be further optimized.
-    AddToWorkList(BV.getNode());
-    // Bitcast to the desired type.
-    return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), BV);
+    if (In.getOpcode() == ISD::UNDEF)
+      Opnds.push_back(DAG.getUNDEF(SrcVT));
+    else
+      Opnds.push_back(In.getOperand(0));
   }
+  SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT,
+                           &Opnds[0], Opnds.size());
+  AddToWorkList(BV.getNode());
+
+  return DAG.getNode(Opcode, dl, VT, BV);
+}
+
+SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
+  unsigned NumInScalars = N->getNumOperands();
+  DebugLoc dl = N->getDebugLoc();
+  EVT VT = N->getValueType(0);
+
+  // A vector built entirely of undefs is undef.
+  if (ISD::allOperandsUndef(N))
+    return DAG.getUNDEF(VT);
+
+  SDValue V = reduceBuildVecExtToExtBuildVec(N);
+  if (V.getNode())
+    return V;
+
+  V = reduceBuildVecConvertToConvertBuildVec(N);
+  if (V.getNode())
+    return V;
 
   // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
   // operations.  If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
@@ -8102,14 +8771,15 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
         return SDValue();
 
       // Widen the input vector by adding undef values.
-      VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT,
+      VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
                            VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
     }
 
     // If VecIn2 is unused then change it to undef.
     VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
 
-    // Check that we were able to transform all incoming values to the same type.
+    // Check that we were able to transform all incoming values to the same
+    // type.
     if (VecIn2.getValueType() != VecIn1.getValueType() ||
         VecIn1.getValueType() != VT)
           return SDValue();
@@ -8122,7 +8792,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
     SDValue Ops[2];
     Ops[0] = VecIn1;
     Ops[1] = VecIn2;
-    return DAG.getVectorShuffle(VT, N->getDebugLoc(), Ops[0], Ops[1], &Mask[0]);
+    return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], &Mask[0]);
   }
 
   return SDValue();
@@ -8158,8 +8828,8 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
       return SDValue();
 
     // Only handle cases where both indexes are constants with the same type.
-    ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
-    ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
+    ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
+    ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
 
     if (InsIdx && ExtIdx &&
         InsIdx->getValueType(0).getSizeInBits() <= 64 &&
@@ -8176,6 +8846,21 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
     }
   }
 
+  if (V->getOpcode() == ISD::CONCAT_VECTORS) {
+    // Combine:
+    //    (extract_subvec (concat V1, V2, ...), i)
+    // Into:
+    //    Vi if possible
+    // Only operand 0 is checked as 'concat' assumes all inputs of the same type.
+    if (V->getOperand(0).getValueType() != NVT)
+      return SDValue();
+    unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+    unsigned NumElems = NVT.getVectorNumElements();
+    assert((Idx % NumElems) == 0 &&
+           "IDX in concat is not a multiple of the result vector length.");
+    return V->getOperand(Idx / NumElems);
+  }
+
   return SDValue();
 }
 
@@ -8491,6 +9176,44 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
   return SDValue();
 }
 
+/// SimplifyVUnaryOp - Visit a binary vector operation, like FABS/FNEG.
+SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) {
+  // After legalize, the target may be depending on adds and other
+  // binary ops to provide legal ways to construct constants or other
+  // things. Simplifying them may result in a loss of legality.
+  if (LegalOperations) return SDValue();
+
+  assert(N->getValueType(0).isVector() &&
+         "SimplifyVUnaryOp only works on vectors!");
+
+  SDValue N0 = N->getOperand(0);
+
+  if (N0.getOpcode() != ISD::BUILD_VECTOR)
+    return SDValue();
+
+  // Operand is a BUILD_VECTOR node, see if we can constant fold it.
+  SmallVector<SDValue, 8> Ops;
+  for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
+    SDValue Op = N0.getOperand(i);
+    if (Op.getOpcode() != ISD::UNDEF &&
+        Op.getOpcode() != ISD::ConstantFP)
+      break;
+    EVT EltVT = Op.getValueType();
+    SDValue FoldOp = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), EltVT, Op);
+    if (FoldOp.getOpcode() != ISD::UNDEF &&
+        FoldOp.getOpcode() != ISD::ConstantFP)
+      break;
+    Ops.push_back(FoldOp);
+    AddToWorkList(FoldOp.getNode());
+  }
+
+  if (Ops.size() != N0.getNumOperands())
+    return SDValue();
+
+  return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+                     N0.getValueType(), &Ops[0], Ops.size());
+}
+
 SDValue DAGCombiner::SimplifySelect(DebugLoc DL, SDValue N0,
                                     SDValue N1, SDValue N2){
   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
@@ -8574,6 +9297,10 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
       if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
           (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
         return false;
+      // The loads must not depend on one another.
+      if (LLD->isPredecessorOf(RLD) ||
+          RLD->isPredecessorOf(LLD))
+        return false;
       Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(),
                          LLD->getBasePtr().getValueType(),
                          TheSelect->getOperand(0), LLD->getBasePtr(),
@@ -8693,7 +9420,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
           const_cast<ConstantFP*>(TV->getConstantFPValue())
         };
         Type *FPTy = Elts[0]->getType();
-        const TargetData &TD = *TLI.getTargetData();
+        const DataLayout &TD = *TLI.getDataLayout();
 
         // Create a ConstantArray of the two constants.
         Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
@@ -8808,34 +9535,38 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
       return SDValue();
 
     // Get a SetCC of the condition
-    // FIXME: Should probably make sure that setcc is legal if we ever have a
-    // target where it isn't.
-    SDValue Temp, SCC;
-    // cast from setcc result type to select result type
-    if (LegalTypes) {
-      SCC  = DAG.getSetCC(DL, TLI.getSetCCResultType(N0.getValueType()),
-                          N0, N1, CC);
-      if (N2.getValueType().bitsLT(SCC.getValueType()))
-        Temp = DAG.getZeroExtendInReg(SCC, N2.getDebugLoc(), N2.getValueType());
-      else
+    // NOTE: Don't create a SETCC if it's not legal on this target.
+    if (!LegalOperations ||
+        TLI.isOperationLegal(ISD::SETCC,
+          LegalTypes ? TLI.getSetCCResultType(N0.getValueType()) : MVT::i1)) {
+      SDValue Temp, SCC;
+      // cast from setcc result type to select result type
+      if (LegalTypes) {
+        SCC  = DAG.getSetCC(DL, TLI.getSetCCResultType(N0.getValueType()),
+                            N0, N1, CC);
+        if (N2.getValueType().bitsLT(SCC.getValueType()))
+          Temp = DAG.getZeroExtendInReg(SCC, N2.getDebugLoc(),
+                                        N2.getValueType());
+        else
+          Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(),
+                             N2.getValueType(), SCC);
+      } else {
+        SCC  = DAG.getSetCC(N0.getDebugLoc(), MVT::i1, N0, N1, CC);
         Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(),
                            N2.getValueType(), SCC);
-    } else {
-      SCC  = DAG.getSetCC(N0.getDebugLoc(), MVT::i1, N0, N1, CC);
-      Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(),
-                         N2.getValueType(), SCC);
-    }
+      }
 
-    AddToWorkList(SCC.getNode());
-    AddToWorkList(Temp.getNode());
+      AddToWorkList(SCC.getNode());
+      AddToWorkList(Temp.getNode());
 
-    if (N2C->getAPIntValue() == 1)
-      return Temp;
+      if (N2C->getAPIntValue() == 1)
+        return Temp;
 
-    // shl setcc result by log2 n2c
-    return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
-                       DAG.getConstant(N2C->getAPIntValue().logBase2(),
-                                       getShiftAmountTy(Temp.getValueType())));
+      // shl setcc result by log2 n2c
+      return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
+                         DAG.getConstant(N2C->getAPIntValue().logBase2(),
+                                         getShiftAmountTy(Temp.getValueType())));
+    }
   }
 
   // Check to see if this is the equivalent of setcc
@@ -8918,7 +9649,7 @@ SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0,
                                    SDValue N1, ISD::CondCode Cond,
                                    DebugLoc DL, bool foldBooleans) {
   TargetLowering::DAGCombinerInfo
-    DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this);
+    DagCombineInfo(DAG, Level, false, this);
   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
 }
 
@@ -9061,6 +9792,23 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
   return true;
 }
 
+bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) {
+  SDValue Ptr0, Ptr1;
+  int64_t Size0, Size1;
+  const Value *SrcValue0, *SrcValue1;
+  int SrcValueOffset0, SrcValueOffset1;
+  unsigned SrcValueAlign0, SrcValueAlign1;
+  const MDNode *SrcTBAAInfo0, *SrcTBAAInfo1;
+  FindAliasInfo(Op0, Ptr0, Size0, SrcValue0, SrcValueOffset0,
+                SrcValueAlign0, SrcTBAAInfo0);
+  FindAliasInfo(Op1, Ptr1, Size1, SrcValue1, SrcValueOffset1,
+                SrcValueAlign1, SrcTBAAInfo1);
+  return isAlias(Ptr0, Size0, SrcValue0, SrcValueOffset0,
+                 SrcValueAlign0, SrcTBAAInfo0,
+                 Ptr1, Size1, SrcValue1, SrcValueOffset1,
+                 SrcValueAlign1, SrcTBAAInfo1);
+}
+
 /// FindAliasInfo - Extracts the relevant alias information from the memory
 /// node.  Returns true if the operand was a load.
 bool DAGCombiner::FindAliasInfo(SDNode *N,
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 683fac6..0d90a07 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -40,27 +40,27 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "isel"
-#include "llvm/DebugInfo.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Operator.h"
-#include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/FastISel.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Analysis/Loads.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
 STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by "
@@ -737,15 +737,15 @@ bool FastISel::SelectBitCast(const User *I) {
   }
 
   // Bitcasts of other values become reg-reg copies or BITCAST operators.
-  EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
-  EVT DstVT = TLI.getValueType(I->getType());
-
-  if (SrcVT == MVT::Other || !SrcVT.isSimple() ||
-      DstVT == MVT::Other || !DstVT.isSimple() ||
-      !TLI.isTypeLegal(SrcVT) || !TLI.isTypeLegal(DstVT))
+  EVT SrcEVT = TLI.getValueType(I->getOperand(0)->getType());
+  EVT DstEVT = TLI.getValueType(I->getType());
+  if (SrcEVT == MVT::Other || DstEVT == MVT::Other ||
+      !TLI.isTypeLegal(SrcEVT) || !TLI.isTypeLegal(DstEVT))
     // Unhandled type. Halt "fast" selection and bail.
     return false;
 
+  MVT SrcVT = SrcEVT.getSimpleVT();
+  MVT DstVT = DstEVT.getSimpleVT();
   unsigned Op0 = getRegForValue(I->getOperand(0));
   if (Op0 == 0)
     // Unhandled operand. Halt "fast" selection and bail.
@@ -755,7 +755,7 @@ bool FastISel::SelectBitCast(const User *I) {
 
   // First, try to perform the bitcast by inserting a reg-reg copy.
   unsigned ResultReg = 0;
-  if (SrcVT.getSimpleVT() == DstVT.getSimpleVT()) {
+  if (SrcVT == DstVT) {
     const TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT);
     const TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT);
     // Don't attempt a cross-class copy. It will likely fail.
@@ -768,8 +768,7 @@ bool FastISel::SelectBitCast(const User *I) {
 
   // If the reg-reg copy failed, select a BITCAST opcode.
   if (!ResultReg)
-    ResultReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(),
-                           ISD::BITCAST, Op0, Op0IsKill);
+    ResultReg = FastEmit_r(SrcVT, DstVT, ISD::BITCAST, Op0, Op0IsKill);
 
   if (!ResultReg)
     return false;
@@ -1059,7 +1058,7 @@ FastISel::FastISel(FunctionLoweringInfo &funcInfo,
     MFI(*FuncInfo.MF->getFrameInfo()),
     MCP(*FuncInfo.MF->getConstantPool()),
     TM(FuncInfo.MF->getTarget()),
-    TD(*TM.getTargetData()),
+    TD(*TM.getDataLayout()),
     TII(*TM.getInstrInfo()),
     TLI(*TM.getTargetLowering()),
     TRI(*TM.getRegisterInfo()),
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index b2a2a5c..b46edad 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -13,29 +13,29 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "function-lowering-info"
-#include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
+#include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/CodeGen/Analysis.h"
-#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -66,8 +66,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) {
 
   // Check whether the function can return without sret-demotion.
   SmallVector<ISD::OutputArg, 4> Outs;
-  GetReturnInfo(Fn->getReturnType(),
-                Fn->getAttributes().getRetAttributes(), Outs, TLI);
+  GetReturnInfo(Fn->getReturnType(), Fn->getAttributes(), Outs, TLI);
   CanLowerReturn = TLI.CanLowerReturn(Fn->getCallingConv(), *MF,
                                       Fn->isVarArg(),
                                       Outs, Fn->getContext());
@@ -80,9 +79,9 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) {
     if (const AllocaInst *AI = dyn_cast<AllocaInst>(I))
       if (const ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) {
         Type *Ty = AI->getAllocatedType();
-        uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
+        uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty);
         unsigned Align =
-          std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
+          std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty),
                    AI->getAlignment());
 
         TySize *= CUI->getZExtValue();   // Get total allocated size.
@@ -208,7 +207,7 @@ void FunctionLoweringInfo::clear() {
 }
 
 /// CreateReg - Allocate a single virtual register for the given type.
-unsigned FunctionLoweringInfo::CreateReg(EVT VT) {
+unsigned FunctionLoweringInfo::CreateReg(MVT VT) {
   return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT));
 }
 
@@ -226,7 +225,7 @@ unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) {
   unsigned FirstReg = 0;
   for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
     EVT ValueVT = ValueVTs[Value];
-    EVT RegisterVT = TLI.getRegisterType(Ty->getContext(), ValueVT);
+    MVT RegisterVT = TLI.getRegisterType(Ty->getContext(), ValueVT);
 
     unsigned NumRegs = TLI.getNumRegisters(Ty->getContext(), ValueVT);
     for (unsigned i = 0; i != NumRegs; ++i) {
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 6d2cdea..3b1abd7 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -16,18 +16,18 @@
 #define DEBUG_TYPE "instr-emitter"
 #include "InstrEmitter.h"
 #include "SDNodeDbgValue.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
 /// MinRCSize - Smallest register class we allow when constraining virtual
@@ -99,7 +99,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
   // the CopyToReg'd destination register instead of creating a new vreg.
   bool MatchReg = true;
   const TargetRegisterClass *UseRC = NULL;
-  EVT VT = Node->getValueType(ResNo);
+  MVT VT = Node->getSimpleValueType(ResNo);
 
   // Stick to the preferred register classes for legal types.
   if (TLI->isTypeLegal(VT))
@@ -124,7 +124,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
           SDValue Op = User->getOperand(i);
           if (Op.getNode() != Node || Op.getResNo() != ResNo)
             continue;
-          EVT VT = Node->getValueType(Op.getResNo());
+          MVT VT = Node->getSimpleValueType(Op.getResNo());
           if (VT == MVT::Other || VT == MVT::Glue)
             continue;
           Match = false;
@@ -203,7 +203,8 @@ unsigned InstrEmitter::getDstOfOnlyCopyToRegUse(SDNode *Node,
   return 0;
 }
 
-void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
+void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
+                                       MachineInstrBuilder &MIB,
                                        const MCInstrDesc &II,
                                        bool IsClone, bool IsCloned,
                                        DenseMap<SDValue, unsigned> &VRBaseMap) {
@@ -222,7 +223,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
       unsigned NumResults = CountResults(Node);
       VRBase = cast<RegisterSDNode>(Node->getOperand(i-NumResults))->getReg();
       assert(TargetRegisterInfo::isPhysicalRegister(VRBase));
-      MI->addOperand(MachineOperand::CreateReg(VRBase, true));
+      MIB.addReg(VRBase, RegState::Define);
     }
 
     if (!VRBase && !IsClone && !IsCloned)
@@ -237,7 +238,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
             const TargetRegisterClass *RegRC = MRI->getRegClass(Reg);
             if (RegRC == RC) {
               VRBase = Reg;
-              MI->addOperand(MachineOperand::CreateReg(Reg, true));
+              MIB.addReg(VRBase, RegState::Define);
               break;
             }
           }
@@ -249,7 +250,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
     if (VRBase == 0) {
       assert(RC && "Isn't a register operand!");
       VRBase = MRI->createVirtualRegister(RC);
-      MI->addOperand(MachineOperand::CreateReg(VRBase, true));
+      MIB.addReg(VRBase, RegState::Define);
     }
 
     SDValue Op(Node, i);
@@ -272,7 +273,8 @@ unsigned InstrEmitter::getVR(SDValue Op,
     // IMPLICIT_DEF can produce any type of result so its MCInstrDesc
     // does not include operand register class info.
     if (!VReg) {
-      const TargetRegisterClass *RC = TLI->getRegClassFor(Op.getValueType());
+      const TargetRegisterClass *RC =
+        TLI->getRegClassFor(Op.getSimpleValueType());
       VReg = MRI->createVirtualRegister(RC);
     }
     BuildMI(*MBB, InsertPos, Op.getDebugLoc(),
@@ -290,7 +292,8 @@ unsigned InstrEmitter::getVR(SDValue Op,
 /// specified machine instr. Insert register copies if the register is
 /// not in the required register class.
 void
-InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
+InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB,
+                                 SDValue Op,
                                  unsigned IIOpNum,
                                  const MCInstrDesc *II,
                                  DenseMap<SDValue, unsigned> &VRBaseMap,
@@ -302,7 +305,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
   unsigned VReg = getVR(Op, VRBaseMap);
   assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?");
 
-  const MCInstrDesc &MCID = MI->getDesc();
+  const MCInstrDesc &MCID = MIB->getDesc();
   bool isOptDef = IIOpNum < MCID.getNumOperands() &&
     MCID.OpInfo[IIOpNum].isOptionalDef();
 
@@ -314,8 +317,6 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
     const TargetRegisterClass *DstRC = 0;
     if (IIOpNum < II->getNumOperands())
       DstRC = TRI->getAllocatableClass(TII->getRegClass(*II,IIOpNum,TRI,*MF));
-    assert((DstRC || (MI->isVariadic() && IIOpNum >= MCID.getNumOperands())) &&
-           "Don't have operand info for this instruction!");
     if (DstRC && !MRI->constrainRegClass(VReg, DstRC, MinRCSize)) {
       unsigned NewVReg = MRI->createVirtualRegister(DstRC);
       BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(),
@@ -336,66 +337,63 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
                 !IsDebug &&
                 !(IsClone || IsCloned);
   if (isKill) {
-    unsigned Idx = MI->getNumOperands();
+    unsigned Idx = MIB->getNumOperands();
     while (Idx > 0 &&
-           MI->getOperand(Idx-1).isReg() && MI->getOperand(Idx-1).isImplicit())
+           MIB->getOperand(Idx-1).isReg() &&
+           MIB->getOperand(Idx-1).isImplicit())
       --Idx;
-    bool isTied = MI->getDesc().getOperandConstraint(Idx, MCOI::TIED_TO) != -1;
+    bool isTied = MCID.getOperandConstraint(Idx, MCOI::TIED_TO) != -1;
     if (isTied)
       isKill = false;
   }
 
-  MI->addOperand(MachineOperand::CreateReg(VReg, isOptDef,
-                                           false/*isImp*/, isKill,
-                                           false/*isDead*/, false/*isUndef*/,
-                                           false/*isEarlyClobber*/,
-                                           0/*SubReg*/, IsDebug));
+  MIB.addReg(VReg, getDefRegState(isOptDef) | getKillRegState(isKill) |
+             getDebugRegState(IsDebug));
 }
 
 /// AddOperand - Add the specified operand to the specified machine instr.  II
 /// specifies the instruction information for the node, and IIOpNum is the
 /// operand number (in the II) that we are adding.
-void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
+void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,
+                              SDValue Op,
                               unsigned IIOpNum,
                               const MCInstrDesc *II,
                               DenseMap<SDValue, unsigned> &VRBaseMap,
                               bool IsDebug, bool IsClone, bool IsCloned) {
   if (Op.isMachineOpcode()) {
-    AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap,
+    AddRegisterOperand(MIB, Op, IIOpNum, II, VRBaseMap,
                        IsDebug, IsClone, IsCloned);
   } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
-    MI->addOperand(MachineOperand::CreateImm(C->getSExtValue()));
+    MIB.addImm(C->getSExtValue());
   } else if (ConstantFPSDNode *F = dyn_cast<ConstantFPSDNode>(Op)) {
-    const ConstantFP *CFP = F->getConstantFPValue();
-    MI->addOperand(MachineOperand::CreateFPImm(CFP));
+    MIB.addFPImm(F->getConstantFPValue());
   } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) {
     // Turn additional physreg operands into implicit uses on non-variadic
     // instructions. This is used by call and return instructions passing
     // arguments in registers.
     bool Imp = II && (IIOpNum >= II->getNumOperands() && !II->isVariadic());
-    MI->addOperand(MachineOperand::CreateReg(R->getReg(), false, Imp));
+    MIB.addReg(R->getReg(), getImplRegState(Imp));
   } else if (RegisterMaskSDNode *RM = dyn_cast<RegisterMaskSDNode>(Op)) {
-    MI->addOperand(MachineOperand::CreateRegMask(RM->getRegMask()));
+    MIB.addRegMask(RM->getRegMask());
   } else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) {
-    MI->addOperand(MachineOperand::CreateGA(TGA->getGlobal(), TGA->getOffset(),
-                                            TGA->getTargetFlags()));
+    MIB.addGlobalAddress(TGA->getGlobal(), TGA->getOffset(),
+                         TGA->getTargetFlags());
   } else if (BasicBlockSDNode *BBNode = dyn_cast<BasicBlockSDNode>(Op)) {
-    MI->addOperand(MachineOperand::CreateMBB(BBNode->getBasicBlock()));
+    MIB.addMBB(BBNode->getBasicBlock());
   } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op)) {
-    MI->addOperand(MachineOperand::CreateFI(FI->getIndex()));
+    MIB.addFrameIndex(FI->getIndex());
   } else if (JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op)) {
-    MI->addOperand(MachineOperand::CreateJTI(JT->getIndex(),
-                                             JT->getTargetFlags()));
+    MIB.addJumpTableIndex(JT->getIndex(), JT->getTargetFlags());
   } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op)) {
     int Offset = CP->getOffset();
     unsigned Align = CP->getAlignment();
     Type *Type = CP->getType();
     // MachineConstantPool wants an explicit alignment.
     if (Align == 0) {
-      Align = TM->getTargetData()->getPrefTypeAlignment(Type);
+      Align = TM->getDataLayout()->getPrefTypeAlignment(Type);
       if (Align == 0) {
         // Alignment of vector types.  FIXME!
-        Align = TM->getTargetData()->getTypeAllocSize(Type);
+        Align = TM->getDataLayout()->getTypeAllocSize(Type);
       }
     }
 
@@ -405,29 +403,26 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
       Idx = MCP->getConstantPoolIndex(CP->getMachineCPVal(), Align);
     else
       Idx = MCP->getConstantPoolIndex(CP->getConstVal(), Align);
-    MI->addOperand(MachineOperand::CreateCPI(Idx, Offset,
-                                             CP->getTargetFlags()));
+    MIB.addConstantPoolIndex(Idx, Offset, CP->getTargetFlags());
   } else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {
-    MI->addOperand(MachineOperand::CreateES(ES->getSymbol(),
-                                            ES->getTargetFlags()));
+    MIB.addExternalSymbol(ES->getSymbol(), ES->getTargetFlags());
   } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op)) {
-    MI->addOperand(MachineOperand::CreateBA(BA->getBlockAddress(),
-                                            BA->getTargetFlags()));
+    MIB.addBlockAddress(BA->getBlockAddress(),
+                        BA->getOffset(),
+                        BA->getTargetFlags());
   } else if (TargetIndexSDNode *TI = dyn_cast<TargetIndexSDNode>(Op)) {
-    MI->addOperand(MachineOperand::CreateTargetIndex(TI->getIndex(),
-                                                     TI->getOffset(),
-                                                     TI->getTargetFlags()));
+    MIB.addTargetIndex(TI->getIndex(), TI->getOffset(), TI->getTargetFlags());
   } else {
     assert(Op.getValueType() != MVT::Other &&
            Op.getValueType() != MVT::Glue &&
            "Chain and glue operands should occur at end of operand list!");
-    AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap,
+    AddRegisterOperand(MIB, Op, IIOpNum, II, VRBaseMap,
                        IsDebug, IsClone, IsCloned);
   }
 }
 
 unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx,
-                                          EVT VT, DebugLoc DL) {
+                                          MVT VT, DebugLoc DL) {
   const TargetRegisterClass *VRC = MRI->getRegClass(VReg);
   const TargetRegisterClass *RC = TRI->getSubClassWithSubReg(VRC, SubIdx);
 
@@ -478,7 +473,8 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
     // constraints on the %dst register, COPY can target all legal register
     // classes.
     unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
-    const TargetRegisterClass *TRC = TLI->getRegClassFor(Node->getValueType(0));
+    const TargetRegisterClass *TRC =
+      TLI->getRegClassFor(Node->getSimpleValueType(0));
 
     unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
     MachineInstr *DefMI = MRI->getVRegDef(VReg);
@@ -501,7 +497,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
       // constrain its register class or issue a COPY to a compatible register
       // class.
       VReg = ConstrainForSubReg(VReg, SubIdx,
-                                Node->getOperand(0).getValueType(),
+                                Node->getOperand(0).getSimpleValueType(),
                                 Node->getDebugLoc());
 
       // Create the destreg if it is missing.
@@ -533,7 +529,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
     //
     // There is no constraint on the %src register class.
     //
-    const TargetRegisterClass *SRC = TLI->getRegClassFor(Node->getValueType(0));
+    const TargetRegisterClass *SRC = TLI->getRegClassFor(Node->getSimpleValueType(0));
     SRC = TRI->getSubClassWithSubReg(SRC, SubIdx);
     assert(SRC && "No register class supports VT and SubIdx for INSERT_SUBREG");
 
@@ -541,22 +537,22 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
       VRBase = MRI->createVirtualRegister(SRC);
 
     // Create the insert_subreg or subreg_to_reg machine instruction.
-    MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), TII->get(Opc));
-    MI->addOperand(MachineOperand::CreateReg(VRBase, true));
+    MachineInstrBuilder MIB =
+      BuildMI(*MF, Node->getDebugLoc(), TII->get(Opc), VRBase);
 
     // If creating a subreg_to_reg, then the first input operand
     // is an implicit value immediate, otherwise it's a register
     if (Opc == TargetOpcode::SUBREG_TO_REG) {
       const ConstantSDNode *SD = cast<ConstantSDNode>(N0);
-      MI->addOperand(MachineOperand::CreateImm(SD->getZExtValue()));
+      MIB.addImm(SD->getZExtValue());
     } else
-      AddOperand(MI, N0, 0, 0, VRBaseMap, /*IsDebug=*/false,
+      AddOperand(MIB, N0, 0, 0, VRBaseMap, /*IsDebug=*/false,
                  IsClone, IsCloned);
     // Add the subregster being inserted
-    AddOperand(MI, N1, 0, 0, VRBaseMap, /*IsDebug=*/false,
+    AddOperand(MIB, N1, 0, 0, VRBaseMap, /*IsDebug=*/false,
                IsClone, IsCloned);
-    MI->addOperand(MachineOperand::CreateImm(SubIdx));
-    MBB->insert(InsertPos, MI);
+    MIB.addImm(SubIdx);
+    MBB->insert(InsertPos, MIB);
   } else
     llvm_unreachable("Node is not insert_subreg, extract_subreg, or subreg_to_reg");
 
@@ -597,12 +593,11 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,
   unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
   const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
   unsigned NewVReg = MRI->createVirtualRegister(TRI->getAllocatableClass(RC));
-  MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
-                             TII->get(TargetOpcode::REG_SEQUENCE), NewVReg);
+  const MCInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE);
+  MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), II, NewVReg);
   unsigned NumOps = Node->getNumOperands();
   assert((NumOps & 1) == 1 &&
          "REG_SEQUENCE must have an odd number of operands!");
-  const MCInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE);
   for (unsigned i = 1; i != NumOps; ++i) {
     SDValue Op = Node->getOperand(i);
     if ((i & 1) == 0) {
@@ -621,11 +616,11 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,
         }
       }
     }
-    AddOperand(MI, Op, i+1, &II, VRBaseMap, /*IsDebug=*/false,
+    AddOperand(MIB, Op, i+1, &II, VRBaseMap, /*IsDebug=*/false,
                IsClone, IsCloned);
   }
 
-  MBB->insert(InsertPos, MI);
+  MBB->insert(InsertPos, MIB);
   SDValue Op(Node, 0);
   bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second;
   (void)isNew; // Silence compiler warning.
@@ -662,7 +657,7 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
     if (I==VRBaseMap.end())
       MIB.addReg(0U);       // undef
     else
-      AddOperand(&*MIB, Op, (*MIB).getNumOperands(), &II, VRBaseMap,
+      AddOperand(MIB, Op, (*MIB).getNumOperands(), &II, VRBaseMap,
                  /*IsDebug=*/true, /*IsClone=*/false, /*IsCloned=*/false);
   } else if (SD->getKind() == SDDbgValue::CONST) {
     const Value *V = SD->getConst();
@@ -738,12 +733,12 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
 #endif
 
   // Create the new machine instruction.
-  MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), II);
+  MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), II);
 
   // Add result register values for things that are defined by this
   // instruction.
   if (NumResults)
-    CreateVirtualRegisters(Node, MI, II, IsClone, IsCloned, VRBaseMap);
+    CreateVirtualRegisters(Node, MIB, II, IsClone, IsCloned, VRBaseMap);
 
   // Emit all of the actual operands of this instruction, adding them to the
   // instruction as appropriate.
@@ -752,17 +747,17 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
          "Unable to cope with optional defs and phys regs defs!");
   unsigned NumSkip = HasOptPRefs ? II.getNumDefs() - NumResults : 0;
   for (unsigned i = NumSkip; i != NodeOperands; ++i)
-    AddOperand(MI, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II,
+    AddOperand(MIB, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II,
                VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned);
 
   // Transfer all of the memory reference descriptions of this instruction.
-  MI->setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(),
+  MIB.setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(),
                  cast<MachineSDNode>(Node)->memoperands_end());
 
   // Insert the instruction into position in the block. This needs to
   // happen before any custom inserter hook is called so that the
   // hook knows where in the block to insert the replacement code.
-  MBB->insert(InsertPos, MI);
+  MBB->insert(InsertPos, MIB);
 
   // The MachineInstr may also define physregs instead of virtregs.  These
   // physreg values can reach other instructions in different ways:
@@ -820,13 +815,13 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
 
   // Finally mark unused registers as dead.
   if (!UsedRegs.empty() || II.getImplicitDefs())
-    MI->setPhysRegsDeadExcept(UsedRegs, *TRI);
+    MIB->setPhysRegsDeadExcept(UsedRegs, *TRI);
 
   // Run post-isel target hook to adjust this instruction if needed.
 #ifdef NDEBUG
   if (II.hasPostISelHook())
 #endif
-    TLI->AdjustInstrPostInstrSelection(MI, Node);
+    TLI->AdjustInstrPostInstrSelection(MIB, Node);
 }
 
 /// EmitSpecialNode - Generate machine code for a target-independent node and
@@ -890,19 +885,20 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
       --NumOps;  // Ignore the glue operand.
 
     // Create the inline asm machine instruction.
-    MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
-                               TII->get(TargetOpcode::INLINEASM));
+    MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(),
+                                      TII->get(TargetOpcode::INLINEASM));
 
     // Add the asm string as an external symbol operand.
     SDValue AsmStrV = Node->getOperand(InlineAsm::Op_AsmString);
     const char *AsmStr = cast<ExternalSymbolSDNode>(AsmStrV)->getSymbol();
-    MI->addOperand(MachineOperand::CreateES(AsmStr));
+    MIB.addExternalSymbol(AsmStr);
 
-    // Add the HasSideEffect and isAlignStack bits.
+    // Add the HasSideEffect, isAlignStack, AsmDialect, MayLoad and MayStore
+    // bits.
     int64_t ExtraInfo =
       cast<ConstantSDNode>(Node->getOperand(InlineAsm::Op_ExtraInfo))->
                           getZExtValue();
-    MI->addOperand(MachineOperand::CreateImm(ExtraInfo));
+    MIB.addImm(ExtraInfo);
 
     // Remember to operand index of the group flags.
     SmallVector<unsigned, 8> GroupIdx;
@@ -913,8 +909,8 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
         cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
       const unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
 
-      GroupIdx.push_back(MI->getNumOperands());
-      MI->addOperand(MachineOperand::CreateImm(Flags));
+      GroupIdx.push_back(MIB->getNumOperands());
+      MIB.addImm(Flags);
       ++i;  // Skip the ID value.
 
       switch (InlineAsm::getKind(Flags)) {
@@ -925,20 +921,16 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
           // FIXME: Add dead flags for physical and virtual registers defined.
           // For now, mark physical register defs as implicit to help fast
           // regalloc. This makes inline asm look a lot like calls.
-          MI->addOperand(MachineOperand::CreateReg(Reg, true,
-                       /*isImp=*/ TargetRegisterInfo::isPhysicalRegister(Reg)));
+          MIB.addReg(Reg, RegState::Define |
+                  getImplRegState(TargetRegisterInfo::isPhysicalRegister(Reg)));
         }
         break;
       case InlineAsm::Kind_RegDefEarlyClobber:
       case InlineAsm::Kind_Clobber:
         for (unsigned j = 0; j != NumVals; ++j, ++i) {
           unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
-          MI->addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/ true,
-                         /*isImp=*/ TargetRegisterInfo::isPhysicalRegister(Reg),
-                                                   /*isKill=*/ false,
-                                                   /*isDead=*/ false,
-                                                   /*isUndef=*/false,
-                                                   /*isEarlyClobber=*/ true));
+          MIB.addReg(Reg, RegState::Define | RegState::EarlyClobber |
+                  getImplRegState(TargetRegisterInfo::isPhysicalRegister(Reg)));
         }
         break;
       case InlineAsm::Kind_RegUse:  // Use of register.
@@ -947,7 +939,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
         // The addressing mode has been selected, just add all of the
         // operands to the machine instruction.
         for (unsigned j = 0; j != NumVals; ++j, ++i)
-          AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap,
+          AddOperand(MIB, Node->getOperand(i), 0, 0, VRBaseMap,
                      /*IsDebug=*/false, IsClone, IsCloned);
 
         // Manually set isTied bits.
@@ -957,7 +949,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
             unsigned DefIdx = GroupIdx[DefGroup] + 1;
             unsigned UseIdx = GroupIdx.back() + 1;
             for (unsigned j = 0; j != NumVals; ++j)
-              MI->tieOperands(DefIdx + j, UseIdx + j);
+              MIB->tieOperands(DefIdx + j, UseIdx + j);
           }
         }
         break;
@@ -968,9 +960,9 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
     SDValue MDV = Node->getOperand(InlineAsm::Op_MDNode);
     const MDNode *MD = cast<MDNodeSDNode>(MDV)->getMD();
     if (MD)
-      MI->addOperand(MachineOperand::CreateMetadata(MD));
+      MIB.addMetadata(MD);
 
-    MBB->insert(InsertPos, MI);
+    MBB->insert(InsertPos, MIB);
     break;
   }
   }
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h
index 9eddee9..a9c2203 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -16,12 +16,13 @@
 #ifndef INSTREMITTER_H
 #define INSTREMITTER_H
 
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/SelectionDAG.h"
 
 namespace llvm {
 
+class MachineInstrBuilder;
 class MCInstrDesc;
 class SDDbgValue;
 
@@ -48,7 +49,8 @@ class InstrEmitter {
   unsigned getDstOfOnlyCopyToRegUse(SDNode *Node,
                                     unsigned ResNo) const;
 
-  void CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
+  void CreateVirtualRegisters(SDNode *Node,
+                              MachineInstrBuilder &MIB,
                               const MCInstrDesc &II,
                               bool IsClone, bool IsCloned,
                               DenseMap<SDValue, unsigned> &VRBaseMap);
@@ -61,7 +63,8 @@ class InstrEmitter {
   /// AddRegisterOperand - Add the specified register as an operand to the
   /// specified machine instr. Insert register copies if the register is
   /// not in the required register class.
-  void AddRegisterOperand(MachineInstr *MI, SDValue Op,
+  void AddRegisterOperand(MachineInstrBuilder &MIB,
+                          SDValue Op,
                           unsigned IIOpNum,
                           const MCInstrDesc *II,
                           DenseMap<SDValue, unsigned> &VRBaseMap,
@@ -71,7 +74,8 @@ class InstrEmitter {
   /// specifies the instruction information for the node, and IIOpNum is the
   /// operand number (in the II) that we are adding. IIOpNum and II are used for
   /// assertions only.
-  void AddOperand(MachineInstr *MI, SDValue Op,
+  void AddOperand(MachineInstrBuilder &MIB,
+                  SDValue Op,
                   unsigned IIOpNum,
                   const MCInstrDesc *II,
                   DenseMap<SDValue, unsigned> &VRBaseMap,
@@ -81,7 +85,7 @@ class InstrEmitter {
   /// supports SubIdx sub-registers.  Emit a copy if that isn't possible.
   /// Return the virtual register to use.
   unsigned ConstrainForSubReg(unsigned VReg, unsigned SubIdx,
-                              EVT VT, DebugLoc DL);
+                              MVT VT, DebugLoc DL);
 
   /// EmitSubregNode - Generate machine code for subreg nodes.
   ///
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 7b34170..5eaf67e 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -11,26 +11,27 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/LLVMContext.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -101,7 +102,7 @@ private:
                                                  SDNode *Node, bool isSigned);
   SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32,
                           RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80,
-                          RTLIB::Libcall Call_PPCF128);
+                          RTLIB::Libcall Call_F128, RTLIB::Libcall Call_PPCF128);
   SDValue ExpandIntLibCall(SDNode *Node, bool isSigned,
                            RTLIB::Libcall Call_I8,
                            RTLIB::Libcall Call_I16,
@@ -321,7 +322,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
     // Do a (aligned) store to a stack slot, then copy from the stack slot
     // to the final destination using (unaligned) integer loads and stores.
     EVT StoredVT = ST->getMemoryVT();
-    EVT RegVT =
+    MVT RegVT =
       TLI.getRegisterType(*DAG.getContext(),
                           EVT::getIntegerVT(*DAG.getContext(),
                                             StoredVT.getSizeInBits()));
@@ -447,7 +448,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
 
     // Copy the value to a (aligned) stack slot using (unaligned) integer
     // loads and stores, then do a (aligned) load from the stack slot.
-    EVT RegVT = TLI.getRegisterType(*DAG.getContext(), intVT);
+    MVT RegVT = TLI.getRegisterType(*DAG.getContext(), intVT);
     unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8;
     unsigned RegBytes = RegVT.getSizeInBits() / 8;
     unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
@@ -710,7 +711,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
 
       {
         SDValue Value = ST->getValue();
-        EVT VT = Value.getValueType();
+        MVT VT = Value.getSimpleValueType();
         switch (TLI.getOperationAction(ISD::STORE, VT)) {
         default: llvm_unreachable("This action is not supported yet!");
         case TargetLowering::Legal:
@@ -718,7 +719,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
           // expand it.
           if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
             Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
-            unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty);
+            unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty);
             if (ST->getAlignment() < ABIAlignment)
               ExpandUnalignedStore(cast<StoreSDNode>(Node),
                                    DAG, TLI, this);
@@ -731,9 +732,10 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
           return;
         }
         case TargetLowering::Promote: {
-          assert(VT.isVector() && "Unknown legal promote case!");
-          Value = DAG.getNode(ISD::BITCAST, dl,
-                             TLI.getTypeToPromoteTo(ISD::STORE, VT), Value);
+          MVT NVT = TLI.getTypeToPromoteTo(ISD::STORE, VT);
+          assert(NVT.getSizeInBits() == VT.getSizeInBits() &&
+                 "Can only promote stores to same size type");
+          Value = DAG.getNode(ISD::BITCAST, dl, NVT, Value);
           SDValue Result =
             DAG.getStore(Chain, dl, Value, Ptr,
                          ST->getPointerInfo(), isVolatile,
@@ -817,14 +819,15 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
         SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
         ReplaceNode(SDValue(Node, 0), Result);
       } else {
-        switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) {
+        switch (TLI.getTruncStoreAction(ST->getValue().getSimpleValueType(),
+                                        StVT.getSimpleVT())) {
         default: llvm_unreachable("This action is not supported yet!");
         case TargetLowering::Legal:
           // If this is an unaligned store and the target doesn't support it,
           // expand it.
           if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
             Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
-            unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty);
+            unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty);
             if (ST->getAlignment() < ABIAlignment)
               ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this);
           }
@@ -862,38 +865,36 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
 
   ISD::LoadExtType ExtType = LD->getExtensionType();
   if (ExtType == ISD::NON_EXTLOAD) {
-    EVT VT = Node->getValueType(0);
+    MVT VT = Node->getSimpleValueType(0);
     SDValue RVal = SDValue(Node, 0);
     SDValue RChain = SDValue(Node, 1);
 
     switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
     default: llvm_unreachable("This action is not supported yet!");
     case TargetLowering::Legal:
-             // If this is an unaligned load and the target doesn't support it,
-             // expand it.
-             if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
-               Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
-               unsigned ABIAlignment =
-                 TLI.getTargetData()->getABITypeAlignment(Ty);
-               if (LD->getAlignment() < ABIAlignment){
-                 ExpandUnalignedLoad(cast<LoadSDNode>(Node),
-                                     DAG, TLI, RVal, RChain);
-               }
-             }
-             break;
+      // If this is an unaligned load and the target doesn't support it,
+      // expand it.
+      if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
+        Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+        unsigned ABIAlignment =
+          TLI.getDataLayout()->getABITypeAlignment(Ty);
+        if (LD->getAlignment() < ABIAlignment){
+          ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, RVal, RChain);
+        }
+      }
+      break;
     case TargetLowering::Custom: {
-             SDValue Res = TLI.LowerOperation(RVal, DAG);
-             if (Res.getNode()) {
-               RVal = Res;
-               RChain = Res.getValue(1);
-             }
-             break;
+      SDValue Res = TLI.LowerOperation(RVal, DAG);
+      if (Res.getNode()) {
+        RVal = Res;
+        RChain = Res.getValue(1);
+      }
+      break;
     }
     case TargetLowering::Promote: {
-      // Only promote a load of vector type to another.
-      assert(VT.isVector() && "Cannot promote this load!");
-      // Change base type to a different vector type.
-      EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
+      MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
+      assert(NVT.getSizeInBits() == VT.getSizeInBits() &&
+             "Can only promote loads to same size type");
 
       SDValue Res = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(),
                          LD->isVolatile(), LD->isNonTemporal(),
@@ -1038,7 +1039,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
     Chain = Ch;
   } else {
     bool isCustom = false;
-    switch (TLI.getLoadExtAction(ExtType, SrcVT)) {
+    switch (TLI.getLoadExtAction(ExtType, SrcVT.getSimpleVT())) {
     default: llvm_unreachable("This action is not supported yet!");
     case TargetLowering::Custom:
              isCustom = true;
@@ -1060,7 +1061,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
                  Type *Ty =
                    LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
                  unsigned ABIAlignment =
-                   TLI.getTargetData()->getABITypeAlignment(Ty);
+                   TLI.getDataLayout()->getABITypeAlignment(Ty);
                  if (LD->getAlignment() < ABIAlignment){
                    ExpandUnalignedLoad(cast<LoadSDNode>(Node),
                                        DAG, TLI, Value, Chain);
@@ -1185,7 +1186,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
     unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 :
                          Node->getOpcode() == ISD::SETCC ? 2 : 1;
     unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0;
-    EVT OpVT = Node->getOperand(CompareOperand).getValueType();
+    MVT OpVT = Node->getOperand(CompareOperand).getSimpleValueType();
     ISD::CondCode CCCode =
         cast<CondCodeSDNode>(Node->getOperand(CCOperand))->get();
     Action = TLI.getCondCodeAction(CCCode, OpVT);
@@ -1241,6 +1242,19 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
     if (Action == TargetLowering::Legal)
       Action = TargetLowering::Custom;
     break;
+  case ISD::DEBUGTRAP:
+    Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+    if (Action == TargetLowering::Expand) {
+      // replace ISD::DEBUGTRAP with ISD::TRAP
+      SDValue NewVal;
+      NewVal = DAG.getNode(ISD::TRAP, Node->getDebugLoc(), Node->getVTList(),
+                           Node->getOperand(0));
+      ReplaceNode(Node, NewVal.getNode());
+      LegalizeOp(NewVal.getNode());
+      return;
+    }
+    break;
+
   default:
     if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
       Action = TargetLowering::Legal;
@@ -1579,7 +1593,7 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
                                                  SDValue &LHS, SDValue &RHS,
                                                  SDValue &CC,
                                                  DebugLoc dl) {
-  EVT OpVT = LHS.getValueType();
+  MVT OpVT = LHS.getSimpleValueType();
   ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
   switch (TLI.getCondCodeAction(CCCode, OpVT)) {
   default: llvm_unreachable("Unknown condition code action!");
@@ -1588,26 +1602,71 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
     break;
   case TargetLowering::Expand: {
     ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
+    ISD::CondCode InvCC = ISD::SETCC_INVALID;
     unsigned Opc = 0;
     switch (CCCode) {
     default: llvm_unreachable("Don't know how to expand this condition!");
-    case ISD::SETOEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETO;  Opc = ISD::AND; break;
-    case ISD::SETOGT: CC1 = ISD::SETGT; CC2 = ISD::SETO;  Opc = ISD::AND; break;
-    case ISD::SETOGE: CC1 = ISD::SETGE; CC2 = ISD::SETO;  Opc = ISD::AND; break;
-    case ISD::SETOLT: CC1 = ISD::SETLT; CC2 = ISD::SETO;  Opc = ISD::AND; break;
-    case ISD::SETOLE: CC1 = ISD::SETLE; CC2 = ISD::SETO;  Opc = ISD::AND; break;
-    case ISD::SETONE: CC1 = ISD::SETNE; CC2 = ISD::SETO;  Opc = ISD::AND; break;
-    case ISD::SETUEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETUO; Opc = ISD::OR;  break;
-    case ISD::SETUGT: CC1 = ISD::SETGT; CC2 = ISD::SETUO; Opc = ISD::OR;  break;
-    case ISD::SETUGE: CC1 = ISD::SETGE; CC2 = ISD::SETUO; Opc = ISD::OR;  break;
-    case ISD::SETULT: CC1 = ISD::SETLT; CC2 = ISD::SETUO; Opc = ISD::OR;  break;
-    case ISD::SETULE: CC1 = ISD::SETLE; CC2 = ISD::SETUO; Opc = ISD::OR;  break;
-    case ISD::SETUNE: CC1 = ISD::SETNE; CC2 = ISD::SETUO; Opc = ISD::OR;  break;
-    // FIXME: Implement more expansions.
-    }
-
-    SDValue SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1);
-    SDValue SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2);
+    case ISD::SETO: 
+        assert(TLI.getCondCodeAction(ISD::SETOEQ, OpVT)
+            == TargetLowering::Legal
+            && "If SETO is expanded, SETOEQ must be legal!");
+        CC1 = ISD::SETOEQ; CC2 = ISD::SETOEQ; Opc = ISD::AND; break;
+    case ISD::SETUO:  
+        assert(TLI.getCondCodeAction(ISD::SETUNE, OpVT)
+            == TargetLowering::Legal
+            && "If SETUO is expanded, SETUNE must be legal!");
+        CC1 = ISD::SETUNE; CC2 = ISD::SETUNE; Opc = ISD::OR;  break;
+    case ISD::SETOEQ:
+    case ISD::SETOGT:
+    case ISD::SETOGE:
+    case ISD::SETOLT:
+    case ISD::SETOLE:
+    case ISD::SETONE: 
+    case ISD::SETUEQ: 
+    case ISD::SETUNE: 
+    case ISD::SETUGT: 
+    case ISD::SETUGE: 
+    case ISD::SETULT: 
+    case ISD::SETULE:
+        // If we are floating point, assign and break, otherwise fall through.
+        if (!OpVT.isInteger()) {
+          // We can use the 4th bit to tell if we are the unordered
+          // or ordered version of the opcode.
+          CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
+          Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
+          CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
+          break;
+        }
+        // Fallthrough if we are unsigned integer.
+    case ISD::SETLE:
+    case ISD::SETGT:
+    case ISD::SETGE:
+    case ISD::SETLT:
+    case ISD::SETNE:
+    case ISD::SETEQ:
+      InvCC = ISD::getSetCCSwappedOperands(CCCode);
+      if (TLI.getCondCodeAction(InvCC, OpVT) == TargetLowering::Expand) {
+        // We only support using the inverted operation and not a
+        // different manner of supporting expanding these cases.
+        llvm_unreachable("Don't know how to expand this condition!");
+      }
+      LHS = DAG.getSetCC(dl, VT, RHS, LHS, InvCC);
+      RHS = SDValue();
+      CC = SDValue();
+      return;
+    }
+    
+    SDValue SetCC1, SetCC2;
+    if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
+      // If we aren't the ordered or unorder operation,
+      // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
+      SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1);
+      SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2);
+    } else {
+      // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
+      SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1);
+      SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2);
+    }
     LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
     RHS = SDValue();
     CC  = SDValue();
@@ -1626,7 +1685,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
                                                DebugLoc dl) {
   // Create the stack frame object.
   unsigned SrcAlign =
-    TLI.getTargetData()->getPrefTypeAlignment(SrcOp.getValueType().
+    TLI.getDataLayout()->getPrefTypeAlignment(SrcOp.getValueType().
                                               getTypeForEVT(*DAG.getContext()));
   SDValue FIPtr = DAG.CreateStackTemporary(SlotVT, SrcAlign);
 
@@ -1638,7 +1697,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
   unsigned SlotSize = SlotVT.getSizeInBits();
   unsigned DestSize = DestVT.getSizeInBits();
   Type *DestType = DestVT.getTypeForEVT(*DAG.getContext());
-  unsigned DestAlign = TLI.getTargetData()->getPrefTypeAlignment(DestType);
+  unsigned DestAlign = TLI.getDataLayout()->getPrefTypeAlignment(DestType);
 
   // Emit a store to the stack slot.  Use a truncstore if the input value is
   // later than DestVT.
@@ -1782,6 +1841,26 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
   return ExpandVectorBuildThroughStack(Node);
 }
 
+static bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
+                                SDValue &Chain, const TargetLowering &TLI) {
+  const Function *F = DAG.getMachineFunction().getFunction();
+
+  // Conservatively require the attributes of the call to match those of
+  // the return. Ignore noalias because it doesn't affect the call sequence.
+  Attribute CallerRetAttr = F->getAttributes().getRetAttributes();
+  if (AttrBuilder(CallerRetAttr)
+      .removeAttribute(Attribute::NoAlias).hasAttributes())
+    return false;
+
+  // It's not safe to eliminate the sign / zero extension of the return value.
+  if (CallerRetAttr.hasAttribute(Attribute::ZExt) ||
+      CallerRetAttr.hasAttribute(Attribute::SExt))
+    return false;
+
+  // Check if the only use is a function return node.
+  return TLI.isUsedByReturnOnly(Node, Chain);
+}
+
 // ExpandLibCall - Expand a node into a call to a libcall.  If the result value
 // does not fit into a register, return the lo part and set the hi part to the
 // by-reg argument.  If it does fit into a single register, return the result
@@ -1899,6 +1978,7 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
                                               RTLIB::Libcall Call_F32,
                                               RTLIB::Libcall Call_F64,
                                               RTLIB::Libcall Call_F80,
+                                              RTLIB::Libcall Call_F128,
                                               RTLIB::Libcall Call_PPCF128) {
   RTLIB::Libcall LC;
   switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
@@ -1906,6 +1986,7 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
   case MVT::f32: LC = Call_F32; break;
   case MVT::f64: LC = Call_F64; break;
   case MVT::f80: LC = Call_F80; break;
+  case MVT::f128: LC = Call_F128; break;
   case MVT::ppcf128: LC = Call_PPCF128; break;
   }
   return ExpandLibCall(LC, Node, false);
@@ -2787,7 +2868,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
 
     // Increment the pointer, VAList, to the next vaarg
     Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList,
-                       DAG.getConstant(TLI.getTargetData()->
+                       DAG.getConstant(TLI.getDataLayout()->
                           getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())),
                                        TLI.getPointerTy()));
     // Store the incremented VAList to the legalized pointer
@@ -2975,77 +3056,95 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
   }
   case ISD::FSQRT:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,
-                                      RTLIB::SQRT_F80, RTLIB::SQRT_PPCF128));
+                                      RTLIB::SQRT_F80, RTLIB::SQRT_F128,
+                                      RTLIB::SQRT_PPCF128));
     break;
   case ISD::FSIN:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64,
-                                      RTLIB::SIN_F80, RTLIB::SIN_PPCF128));
+                                      RTLIB::SIN_F80, RTLIB::SIN_F128,
+                                      RTLIB::SIN_PPCF128));
     break;
   case ISD::FCOS:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64,
-                                      RTLIB::COS_F80, RTLIB::COS_PPCF128));
+                                      RTLIB::COS_F80, RTLIB::COS_F128,
+                                      RTLIB::COS_PPCF128));
     break;
   case ISD::FLOG:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64,
-                                      RTLIB::LOG_F80, RTLIB::LOG_PPCF128));
+                                      RTLIB::LOG_F80, RTLIB::LOG_F128,
+                                      RTLIB::LOG_PPCF128));
     break;
   case ISD::FLOG2:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64,
-                                      RTLIB::LOG2_F80, RTLIB::LOG2_PPCF128));
+                                      RTLIB::LOG2_F80, RTLIB::LOG2_F128,
+                                      RTLIB::LOG2_PPCF128));
     break;
   case ISD::FLOG10:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64,
-                                      RTLIB::LOG10_F80, RTLIB::LOG10_PPCF128));
+                                      RTLIB::LOG10_F80, RTLIB::LOG10_F128,
+                                      RTLIB::LOG10_PPCF128));
     break;
   case ISD::FEXP:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64,
-                                      RTLIB::EXP_F80, RTLIB::EXP_PPCF128));
+                                      RTLIB::EXP_F80, RTLIB::EXP_F128,
+                                      RTLIB::EXP_PPCF128));
     break;
   case ISD::FEXP2:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64,
-                                      RTLIB::EXP2_F80, RTLIB::EXP2_PPCF128));
+                                      RTLIB::EXP2_F80, RTLIB::EXP2_F128,
+                                      RTLIB::EXP2_PPCF128));
     break;
   case ISD::FTRUNC:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
-                                      RTLIB::TRUNC_F80, RTLIB::TRUNC_PPCF128));
+                                      RTLIB::TRUNC_F80, RTLIB::TRUNC_F128,
+                                      RTLIB::TRUNC_PPCF128));
     break;
   case ISD::FFLOOR:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
-                                      RTLIB::FLOOR_F80, RTLIB::FLOOR_PPCF128));
+                                      RTLIB::FLOOR_F80, RTLIB::FLOOR_F128,
+                                      RTLIB::FLOOR_PPCF128));
     break;
   case ISD::FCEIL:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64,
-                                      RTLIB::CEIL_F80, RTLIB::CEIL_PPCF128));
+                                      RTLIB::CEIL_F80, RTLIB::CEIL_F128,
+                                      RTLIB::CEIL_PPCF128));
     break;
   case ISD::FRINT:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64,
-                                      RTLIB::RINT_F80, RTLIB::RINT_PPCF128));
+                                      RTLIB::RINT_F80, RTLIB::RINT_F128,
+                                      RTLIB::RINT_PPCF128));
     break;
   case ISD::FNEARBYINT:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32,
                                       RTLIB::NEARBYINT_F64,
                                       RTLIB::NEARBYINT_F80,
+                                      RTLIB::NEARBYINT_F128,
                                       RTLIB::NEARBYINT_PPCF128));
     break;
   case ISD::FPOWI:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64,
-                                      RTLIB::POWI_F80, RTLIB::POWI_PPCF128));
+                                      RTLIB::POWI_F80, RTLIB::POWI_F128,
+                                      RTLIB::POWI_PPCF128));
     break;
   case ISD::FPOW:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64,
-                                      RTLIB::POW_F80, RTLIB::POW_PPCF128));
+                                      RTLIB::POW_F80, RTLIB::POW_F128,
+                                      RTLIB::POW_PPCF128));
     break;
   case ISD::FDIV:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64,
-                                      RTLIB::DIV_F80, RTLIB::DIV_PPCF128));
+                                      RTLIB::DIV_F80, RTLIB::DIV_F128,
+                                      RTLIB::DIV_PPCF128));
     break;
   case ISD::FREM:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
-                                      RTLIB::REM_F80, RTLIB::REM_PPCF128));
+                                      RTLIB::REM_F80, RTLIB::REM_F128,
+                                      RTLIB::REM_PPCF128));
     break;
   case ISD::FMA:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64,
-                                      RTLIB::FMA_F80, RTLIB::FMA_PPCF128));
+                                      RTLIB::FMA_F80, RTLIB::FMA_F128,
+                                      RTLIB::FMA_PPCF128));
     break;
   case ISD::FP16_TO_FP32:
     Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false));
@@ -3109,6 +3208,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     Tmp3 = Node->getOperand(1);
     if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) ||
         (isDivRemLibcallAvailable(Node, isSigned, TLI) &&
+         // If div is legal, it's better to do the normal expansion
+         !TLI.isOperationLegalOrCustom(DivOpc, Node->getValueType(0)) &&
          useDivRem(Node, isSigned, false))) {
       Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1);
     } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) {
@@ -3366,7 +3467,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
 
     EVT PTy = TLI.getPointerTy();
 
-    const TargetData &TD = *TLI.getTargetData();
+    const DataLayout &TD = *TLI.getDataLayout();
     unsigned EntrySize =
       DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD);
 
@@ -3516,13 +3617,13 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
 
 void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
   SmallVector<SDValue, 8> Results;
-  EVT OVT = Node->getValueType(0);
+  MVT OVT = Node->getSimpleValueType(0);
   if (Node->getOpcode() == ISD::UINT_TO_FP ||
       Node->getOpcode() == ISD::SINT_TO_FP ||
       Node->getOpcode() == ISD::SETCC) {
-    OVT = Node->getOperand(0).getValueType();
+    OVT = Node->getOperand(0).getSimpleValueType();
   }
-  EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
+  MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
   DebugLoc dl = Node->getDebugLoc();
   SDValue Tmp1, Tmp2, Tmp3;
   switch (Node->getOpcode()) {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index e393896..92dc5a9 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -1245,32 +1245,30 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
   DEBUG(dbgs() << "Expand float operand: "; N->dump(&DAG); dbgs() << "\n");
   SDValue Res = SDValue();
 
-  if (TLI.getOperationAction(N->getOpcode(), N->getOperand(OpNo).getValueType())
-      == TargetLowering::Custom)
-    Res = TLI.LowerOperation(SDValue(N, 0), DAG);
-
-  if (Res.getNode() == 0) {
-    switch (N->getOpcode()) {
-    default:
-  #ifndef NDEBUG
-      dbgs() << "ExpandFloatOperand Op #" << OpNo << ": ";
-      N->dump(&DAG); dbgs() << "\n";
-  #endif
-      llvm_unreachable("Do not know how to expand this operator's operand!");
-
-    case ISD::BITCAST:         Res = ExpandOp_BITCAST(N); break;
-    case ISD::BUILD_VECTOR:    Res = ExpandOp_BUILD_VECTOR(N); break;
-    case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
-
-    case ISD::BR_CC:      Res = ExpandFloatOp_BR_CC(N); break;
-    case ISD::FP_ROUND:   Res = ExpandFloatOp_FP_ROUND(N); break;
-    case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break;
-    case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break;
-    case ISD::SELECT_CC:  Res = ExpandFloatOp_SELECT_CC(N); break;
-    case ISD::SETCC:      Res = ExpandFloatOp_SETCC(N); break;
-    case ISD::STORE:      Res = ExpandFloatOp_STORE(cast<StoreSDNode>(N),
-                                                    OpNo); break;
-    }
+  // See if the target wants to custom expand this node.
+  if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+    return false;
+
+  switch (N->getOpcode()) {
+  default:
+#ifndef NDEBUG
+    dbgs() << "ExpandFloatOperand Op #" << OpNo << ": ";
+    N->dump(&DAG); dbgs() << "\n";
+#endif
+    llvm_unreachable("Do not know how to expand this operator's operand!");
+
+  case ISD::BITCAST:         Res = ExpandOp_BITCAST(N); break;
+  case ISD::BUILD_VECTOR:    Res = ExpandOp_BUILD_VECTOR(N); break;
+  case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
+
+  case ISD::BR_CC:      Res = ExpandFloatOp_BR_CC(N); break;
+  case ISD::FP_ROUND:   Res = ExpandFloatOp_FP_ROUND(N); break;
+  case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break;
+  case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break;
+  case ISD::SELECT_CC:  Res = ExpandFloatOp_SELECT_CC(N); break;
+  case ISD::SETCC:      Res = ExpandFloatOp_SETCC(N); break;
+  case ISD::STORE:      Res = ExpandFloatOp_STORE(cast<StoreSDNode>(N),
+                                                  OpNo); break;
   }
 
   // If the result is null, the sub-method took care of registering results etc.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index e8e968a..5e33ef1 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -19,7 +19,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "LegalizeTypes.h"
-#include "llvm/DerivedTypes.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
@@ -644,8 +644,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
   EVT SmallVT = LHS.getValueType();
 
   // To determine if the result overflowed in a larger type, we extend the
-  // input to the larger type, do the multiply, then check the high bits of
-  // the result to see if the overflow happened.
+  // input to the larger type, do the multiply (checking if it overflows),
+  // then also check the high bits of the result to see if overflow happened
+  // there.
   if (N->getOpcode() == ISD::SMULO) {
     LHS = SExtPromotedInteger(LHS);
     RHS = SExtPromotedInteger(RHS);
@@ -653,24 +654,31 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
     LHS = ZExtPromotedInteger(LHS);
     RHS = ZExtPromotedInteger(RHS);
   }
-  SDValue Mul = DAG.getNode(ISD::MUL, DL, LHS.getValueType(), LHS, RHS);
+  SDVTList VTs = DAG.getVTList(LHS.getValueType(), N->getValueType(1));
+  SDValue Mul = DAG.getNode(N->getOpcode(), DL, VTs, LHS, RHS);
 
-  // Overflow occurred iff the high part of the result does not
-  // zero/sign-extend the low part.
+  // Overflow occurred if it occurred in the larger type, or if the high part
+  // of the result does not zero/sign-extend the low part.  Check this second
+  // possibility first.
   SDValue Overflow;
   if (N->getOpcode() == ISD::UMULO) {
-    // Unsigned overflow occurred iff the high part is non-zero.
+    // Unsigned overflow occurred if the high part is non-zero.
     SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul,
                              DAG.getIntPtrConstant(SmallVT.getSizeInBits()));
     Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi,
                             DAG.getConstant(0, Hi.getValueType()), ISD::SETNE);
   } else {
-    // Signed overflow occurred iff the high part does not sign extend the low.
+    // Signed overflow occurred if the high part does not sign extend the low.
     SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Mul.getValueType(),
                                Mul, DAG.getValueType(SmallVT));
     Overflow = DAG.getSetCC(DL, N->getValueType(1), SExt, Mul, ISD::SETNE);
   }
 
+  // The only other way for overflow to occur is if the multiplication in the
+  // larger type itself overflowed.
+  Overflow = DAG.getNode(ISD::OR, DL, N->getValueType(1), Overflow,
+                         SDValue(Mul.getNode(), 1));
+
   // Use the calculated overflow everywhere.
   ReplaceValueWith(SDValue(N, 1), Overflow);
   return Mul;
@@ -695,7 +703,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
   EVT VT = N->getValueType(0);
   DebugLoc dl = N->getDebugLoc();
 
-  EVT RegVT = TLI.getRegisterType(*DAG.getContext(), VT);
+  MVT RegVT = TLI.getRegisterType(*DAG.getContext(), VT);
   unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), VT);
   // The argument is passed as NumRegs registers of type RegVT.
 
@@ -2253,32 +2261,35 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N,
 void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
                                           SDValue &Lo, SDValue &Hi) {
   EVT VT = N->getValueType(0);
-  Type *RetTy = VT.getTypeForEVT(*DAG.getContext());
-  EVT PtrVT = TLI.getPointerTy();
-  Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext());
   DebugLoc dl = N->getDebugLoc();
 
   // A divide for UMULO should be faster than a function call.
   if (N->getOpcode() == ISD::UMULO) {
     SDValue LHS = N->getOperand(0), RHS = N->getOperand(1);
-    DebugLoc DL = N->getDebugLoc();
 
-    SDValue MUL = DAG.getNode(ISD::MUL, DL, LHS.getValueType(), LHS, RHS);
+    SDValue MUL = DAG.getNode(ISD::MUL, dl, LHS.getValueType(), LHS, RHS);
     SplitInteger(MUL, Lo, Hi);
 
     // A divide for UMULO will be faster than a function call. Select to
     // make sure we aren't using 0.
     SDValue isZero = DAG.getSetCC(dl, TLI.getSetCCResultType(VT),
-                                  RHS, DAG.getConstant(0, VT), ISD::SETNE);
+                                  RHS, DAG.getConstant(0, VT), ISD::SETEQ);
     SDValue NotZero = DAG.getNode(ISD::SELECT, dl, VT, isZero,
                                   DAG.getConstant(1, VT), RHS);
-    SDValue DIV = DAG.getNode(ISD::UDIV, DL, LHS.getValueType(), MUL, NotZero);
-    SDValue Overflow;
-    Overflow = DAG.getSetCC(DL, N->getValueType(1), DIV, LHS, ISD::SETNE);
+    SDValue DIV = DAG.getNode(ISD::UDIV, dl, VT, MUL, NotZero);
+    SDValue Overflow = DAG.getSetCC(dl, N->getValueType(1), DIV, LHS,
+                                    ISD::SETNE);
+    Overflow = DAG.getNode(ISD::SELECT, dl, N->getValueType(1), isZero,
+                           DAG.getConstant(0, N->getValueType(1)),
+                           Overflow);
     ReplaceValueWith(SDValue(N, 1), Overflow);
     return;
   }
 
+  Type *RetTy = VT.getTypeForEVT(*DAG.getContext());
+  EVT PtrVT = TLI.getPointerTy();
+  Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext());
+  
   // Replace this with a libcall that will check overflow.
   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
   if (VT == MVT::i32)
@@ -2538,7 +2549,7 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
 
   // NOTE: on targets without efficient SELECT of bools, we can always use
   // this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3)
-  TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, false, true, true, NULL);
+  TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, AfterLegalizeTypes, true, NULL);
   SDValue Tmp1, Tmp2;
   Tmp1 = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSLo.getValueType()),
                            LHSLo, RHSLo, LowCC, false, DagCombineInfo, dl);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 39337ff..6aea2d8 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -14,9 +14,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "LegalizeTypes.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Target/TargetData.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 37f0e60..8c53ba3 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -17,12 +17,12 @@
 #define SELECTIONDAG_LEGALIZETYPES_H
 
 #define DEBUG_TYPE "legalize-types"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/Target/TargetLowering.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetLowering.h"
 
 namespace llvm {
 
@@ -578,6 +578,7 @@ private:
 
   // Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>.
   bool SplitVectorOperand(SDNode *N, unsigned OpNo);
+  SDValue SplitVecOp_VSELECT(SDNode *N, unsigned OpNo);
   SDValue SplitVecOp_UnaryOp(SDNode *N);
 
   SDValue SplitVecOp_BITCAST(SDNode *N);
@@ -634,7 +635,7 @@ private:
   SDValue WidenVecRes_InregOp(SDNode *N);
 
   // Widen Vector Operand.
-  bool WidenVectorOperand(SDNode *N, unsigned ResNo);
+  bool WidenVectorOperand(SDNode *N, unsigned OpNo);
   SDValue WidenVecOp_BITCAST(SDNode *N);
   SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N);
   SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 06f6bd6..222d1c0 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -20,7 +20,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "LegalizeTypes.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/IR/DataLayout.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -94,14 +94,48 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
   if (InVT.isVector() && OutVT.isInteger()) {
     // Handle cases like i64 = BITCAST v1i64 on x86, where the operand
     // is legal but the result is not.
-    EVT NVT = EVT::getVectorVT(*DAG.getContext(), NOutVT, 2);
+    unsigned NumElems = 2;
+    EVT ElemVT = NOutVT;
+    EVT NVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElems);
+
+    // If <ElemVT * N> is not a legal type, try <ElemVT/2 * (N*2)>.
+    while (!isTypeLegal(NVT)) {
+      unsigned NewSizeInBits = ElemVT.getSizeInBits() / 2;
+      // If the element size is smaller than byte, bail.
+      if (NewSizeInBits < 8)
+        break;
+      NumElems *= 2;
+      ElemVT = EVT::getIntegerVT(*DAG.getContext(), NewSizeInBits);
+      NVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElems);
+    }
 
     if (isTypeLegal(NVT)) {
       SDValue CastInOp = DAG.getNode(ISD::BITCAST, dl, NVT, InOp);
-      Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp,
-                       DAG.getIntPtrConstant(0));
-      Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp,
-                       DAG.getIntPtrConstant(1));
+
+      SmallVector<SDValue, 8> Vals;
+      for (unsigned i = 0; i < NumElems; ++i)
+        Vals.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ElemVT,
+                                   CastInOp, DAG.getIntPtrConstant(i)));
+
+      // Build Lo, Hi pair by pairing extracted elements if needed.
+      unsigned Slot = 0;
+      for (unsigned e = Vals.size(); e - Slot > 2; Slot += 2, e += 1) {
+        // Each iteration will BUILD_PAIR two nodes and append the result until
+        // there are only two nodes left, i.e. Lo and Hi.
+        SDValue LHS = Vals[Slot];
+        SDValue RHS = Vals[Slot + 1];
+
+        if (TLI.isBigEndian())
+          std::swap(LHS, RHS);
+
+        Vals.push_back(DAG.getNode(ISD::BUILD_PAIR, dl,
+                                   EVT::getIntegerVT(
+                                     *DAG.getContext(),
+                                     LHS.getValueType().getSizeInBits() << 1),
+                                   LHS, RHS));
+      }
+      Lo = Vals[Slot++];
+      Hi = Vals[Slot++];
 
       if (TLI.isBigEndian())
         std::swap(Lo, Hi);
@@ -116,7 +150,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
   // Create the stack frame object.  Make sure it is aligned for both
   // the source and expanded destination types.
   unsigned Alignment =
-    TLI.getTargetData()->getPrefTypeAlignment(NOutVT.
+    TLI.getDataLayout()->getPrefTypeAlignment(NOutVT.
                                               getTypeForEVT(*DAG.getContext()));
   SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment);
   int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 22f8d51..de6bbe3 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -142,9 +142,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   } else if (Op.getOpcode() == ISD::STORE) {
     StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
     EVT StVT = ST->getMemoryVT();
-    EVT ValVT = ST->getValue().getValueType();
+    MVT ValVT = ST->getValue().getSimpleValueType();
     if (StVT.isVector() && ST->isTruncatingStore())
-      switch (TLI.getTruncStoreAction(ValVT, StVT)) {
+      switch (TLI.getTruncStoreAction(ValVT, StVT.getSimpleVT())) {
       default: llvm_unreachable("This action is not supported yet!");
       case TargetLowering::Legal:
         return TranslateLegalizeResults(Op, Result);
@@ -221,6 +221,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case ISD::FRINT:
   case ISD::FNEARBYINT:
   case ISD::FFLOOR:
+  case ISD::FP_ROUND:
+  case ISD::FP_EXTEND:
   case ISD::FMA:
   case ISD::SIGN_EXTEND_INREG:
     QueryType = Node->getValueType(0);
@@ -291,10 +293,10 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) {
   // Vector "promotion" is basically just bitcasting and doing the operation
   // in a different type.  For example, x86 promotes ISD::AND on v2i32 to
   // v1i64.
-  EVT VT = Op.getValueType();
+  MVT VT = Op.getSimpleValueType();
   assert(Op.getNode()->getNumValues() == 1 &&
          "Can't promote a vector with multiple results!");
-  EVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
+  MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
   DebugLoc dl = Op.getDebugLoc();
   SmallVector<SDValue, 4> Operands(Op.getNumOperands());
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 4095728..09a50d9 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -21,7 +21,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "LegalizeTypes.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
@@ -749,7 +749,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
   SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
   Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
   unsigned Alignment =
-    TLI.getTargetData()->getPrefTypeAlignment(VecType);
+    TLI.getDataLayout()->getPrefTypeAlignment(VecType);
   Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT,
                             false, false, 0);
 
@@ -1030,7 +1030,9 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
     case ISD::STORE:
       Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
       break;
-
+    case ISD::VSELECT:
+      Res = SplitVecOp_VSELECT(N, OpNo);
+      break;
     case ISD::CTTZ:
     case ISD::CTLZ:
     case ISD::CTPOP:
@@ -1064,6 +1066,58 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
   return false;
 }
 
+SDValue DAGTypeLegalizer::SplitVecOp_VSELECT(SDNode *N, unsigned OpNo) {
+  // The only possibility for an illegal operand is the mask, since result type
+  // legalization would have handled this node already otherwise.
+  assert(OpNo == 0 && "Illegal operand must be mask");
+
+  SDValue Mask = N->getOperand(0);
+  SDValue Src0 = N->getOperand(1);
+  SDValue Src1 = N->getOperand(2);
+  DebugLoc DL = N->getDebugLoc();
+  EVT MaskVT = Mask.getValueType();
+  assert(MaskVT.isVector() && "VSELECT without a vector mask?");
+
+  SDValue Lo, Hi;
+  GetSplitVector(N->getOperand(0), Lo, Hi);
+  assert(Lo.getValueType() == Hi.getValueType() &&
+         "Lo and Hi have differing types");;
+
+  unsigned LoNumElts = Lo.getValueType().getVectorNumElements();
+  unsigned HiNumElts = Hi.getValueType().getVectorNumElements();
+  assert(LoNumElts == HiNumElts && "Asymmetric vector split?");
+
+  LLVMContext &Ctx = *DAG.getContext();
+  SDValue Zero = DAG.getIntPtrConstant(0);
+  SDValue LoElts = DAG.getIntPtrConstant(LoNumElts);
+  EVT Src0VT = Src0.getValueType();
+  EVT Src0EltTy = Src0VT.getVectorElementType();
+  EVT MaskEltTy = MaskVT.getVectorElementType();
+
+  EVT LoOpVT = EVT::getVectorVT(Ctx, Src0EltTy, LoNumElts);
+  EVT LoMaskVT = EVT::getVectorVT(Ctx, MaskEltTy, LoNumElts);
+  EVT HiOpVT = EVT::getVectorVT(Ctx, Src0EltTy, HiNumElts);
+  EVT HiMaskVT = EVT::getVectorVT(Ctx, MaskEltTy, HiNumElts);
+
+  SDValue LoOp0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoOpVT, Src0, Zero);
+  SDValue LoOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoOpVT, Src1, Zero);
+
+  SDValue HiOp0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiOpVT, Src0, LoElts);
+  SDValue HiOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiOpVT, Src1, LoElts);
+
+  SDValue LoMask =
+    DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoMaskVT, Mask, Zero);
+  SDValue HiMask =
+    DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiMaskVT, Mask, LoElts);
+
+  SDValue LoSelect =
+    DAG.getNode(ISD::VSELECT, DL, LoOpVT, LoMask, LoOp0, LoOp1);
+  SDValue HiSelect =
+    DAG.getNode(ISD::VSELECT, DL, HiOpVT, HiMask, HiOp0, HiOp1);
+
+  return DAG.getNode(ISD::CONCAT_VECTORS, DL, Src0VT, LoSelect, HiSelect);
+}
+
 SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
   // The result has a legal vector type, but the input needs splitting.
   EVT ResVT = N->getValueType(0);
@@ -2082,16 +2136,20 @@ SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) {
 //===----------------------------------------------------------------------===//
 // Widen Vector Operand
 //===----------------------------------------------------------------------===//
-bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) {
-  DEBUG(dbgs() << "Widen node operand " << ResNo << ": ";
+bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
+  DEBUG(dbgs() << "Widen node operand " << OpNo << ": ";
         N->dump(&DAG);
         dbgs() << "\n");
   SDValue Res = SDValue();
 
+  // See if the target wants to custom widen this node.
+  if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+    return false;
+
   switch (N->getOpcode()) {
   default:
 #ifndef NDEBUG
-    dbgs() << "WidenVectorOperand op #" << ResNo << ": ";
+    dbgs() << "WidenVectorOperand op #" << OpNo << ": ";
     N->dump(&DAG);
     dbgs() << "\n";
 #endif
diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index c3794d5..473e138 100644
--- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -21,13 +21,13 @@
 
 #define DEBUG_TYPE "scheduler"
 #include "llvm/CodeGen/ResourcePriorityQueue.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
 
 using namespace llvm;
 
@@ -94,9 +94,9 @@ ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) {
       continue;
 
     for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
-      EVT VT = ScegN->getValueType(i);
+      MVT VT = ScegN->getSimpleValueType(i);
       if (TLI->isTypeLegal(VT)
-         && (TLI->getRegClassFor(VT)->getID() == RCId)) {
+          && (TLI->getRegClassFor(VT)->getID() == RCId)) {
         NumberDeps++;
         break;
       }
@@ -132,9 +132,9 @@ unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU,
 
     for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
       const SDValue &Op = ScegN->getOperand(i);
-      EVT VT = Op.getNode()->getValueType(Op.getResNo());
+      MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
       if (TLI->isTypeLegal(VT)
-         && (TLI->getRegClassFor(VT)->getID() == RCId)) {
+          && (TLI->getRegClassFor(VT)->getID() == RCId)) {
         NumberDeps++;
         break;
       }
@@ -332,7 +332,7 @@ signed ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) {
 
   // Gen estimate.
   for (unsigned i = 0, e = SU->getNode()->getNumValues(); i != e; ++i) {
-      EVT VT = SU->getNode()->getValueType(i);
+      MVT VT = SU->getNode()->getSimpleValueType(i);
       if (TLI->isTypeLegal(VT)
           && TLI->getRegClassFor(VT)
           && TLI->getRegClassFor(VT)->getID() == RCId)
@@ -341,7 +341,7 @@ signed ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) {
   // Kill estimate.
   for (unsigned i = 0, e = SU->getNode()->getNumOperands(); i != e; ++i) {
       const SDValue &Op = SU->getNode()->getOperand(i);
-      EVT VT = Op.getNode()->getValueType(Op.getResNo());
+      MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
       if (isa<ConstantSDNode>(Op.getNode()))
         continue;
 
@@ -485,7 +485,7 @@ void ResourcePriorityQueue::scheduledNode(SUnit *SU) {
   if (ScegN->isMachineOpcode()) {
     // Estimate generated regs.
     for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
-      EVT VT = ScegN->getValueType(i);
+      MVT VT = ScegN->getSimpleValueType(i);
 
       if (TLI->isTypeLegal(VT)) {
         const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
@@ -496,7 +496,7 @@ void ResourcePriorityQueue::scheduledNode(SUnit *SU) {
     // Estimate killed regs.
     for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
       const SDValue &Op = ScegN->getOperand(i);
-      EVT VT = Op.getNode()->getValueType(Op.getResNo());
+      MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
 
       if (TLI->isTypeLegal(VT)) {
         const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
@@ -604,10 +604,8 @@ SUnit *ResourcePriorityQueue::pop() {
   std::vector<SUnit *>::iterator Best = Queue.begin();
   if (!DisableDFASched) {
     signed BestCost = SUSchedulingCost(*Best);
-    for (std::vector<SUnit *>::iterator I = Queue.begin(),
+    for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()),
            E = Queue.end(); I != E; ++I) {
-      if (*I == *Best)
-        continue;
 
       if (SUSchedulingCost(*I) > BestCost) {
         BestCost = SUSchedulingCost(*I);
diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
index 2dcb229..4af7172 100644
--- a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
+++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -15,8 +15,8 @@
 #define LLVM_CODEGEN_SDNODEDBGVALUE_H
 
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/DebugLoc.h"
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/DebugLoc.h"
 
 namespace llvm {
 
diff --git a/lib/CodeGen/SelectionDAG/SDNodeOrdering.h b/lib/CodeGen/SelectionDAG/SDNodeOrdering.h
index f88b26d..d2269f8 100644
--- a/lib/CodeGen/SelectionDAG/SDNodeOrdering.h
+++ b/lib/CodeGen/SelectionDAG/SDNodeOrdering.h
@@ -28,8 +28,8 @@ class SDNode;
 class SDNodeOrdering {
   DenseMap<const SDNode*, unsigned> OrderMap;
 
-  void operator=(const SDNodeOrdering&);   // Do not implement.
-  SDNodeOrdering(const SDNodeOrdering&);   // Do not implement.
+  void operator=(const SDNodeOrdering&) LLVM_DELETED_FUNCTION;
+  SDNodeOrdering(const SDNodeOrdering&) LLVM_DELETED_FUNCTION;
 public:
   SDNodeOrdering() {}
 
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index b7ce48a..d1f36cb 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -12,19 +12,20 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "pre-RA-sched"
-#include "ScheduleDAGSDNodes.h"
-#include "llvm/InlineAsm.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Support/Debug.h"
+#include "InstrEmitter.h"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 STATISTIC(NumUnfolds,    "Number of nodes unfolded");
@@ -34,6 +35,10 @@ STATISTIC(NumPRCopies,   "Number of physical copies");
 static RegisterScheduler
   fastDAGScheduler("fast", "Fast suboptimal list scheduling",
                    createFastDAGScheduler);
+static RegisterScheduler
+  linearizeDAGScheduler("linearize", "Linearize DAG, no scheduling",
+                        createDAGLinearizer);
+
 
 namespace {
   /// FastPriorityQueue - A degenerate priority queue that considers
@@ -331,7 +336,9 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
       }
     }
     if (isNewLoad) {
-      AddPred(NewSU, SDep(LoadSU, SDep::Order, LoadSU->Latency));
+      SDep D(LoadSU, SDep::Barrier);
+      D.setLatency(LoadSU->Latency);
+      AddPred(NewSU, D);
     }
 
     ++NumUnfolds;
@@ -407,9 +414,12 @@ void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
   for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) {
     RemovePred(DelDeps[i].first, DelDeps[i].second);
   }
-
-  AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
-  AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
+  SDep FromDep(SU, SDep::Data, Reg);
+  FromDep.setLatency(SU->Latency);
+  AddPred(CopyFromSU, FromDep);
+  SDep ToDep(CopyFromSU, SDep::Data, 0);
+  ToDep.setLatency(CopyFromSU->Latency);
+  AddPred(CopyToSU, ToDep);
 
   Copies.push_back(CopyFromSU);
   Copies.push_back(CopyToSU);
@@ -586,18 +596,14 @@ void ScheduleDAGFast::ListScheduleBottomUp() {
           InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
           DEBUG(dbgs() << "Adding an edge from SU # " << TrySU->NodeNum
                        << " to SU #" << Copies.front()->NodeNum << "\n");
-          AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
-                              /*Reg=*/0, /*isNormalMemory=*/false,
-                              /*isMustAlias=*/false, /*isArtificial=*/true));
+          AddPred(TrySU, SDep(Copies.front(), SDep::Artificial));
           NewDef = Copies.back();
         }
 
         DEBUG(dbgs() << "Adding an edge from SU # " << NewDef->NodeNum
                      << " to SU #" << TrySU->NodeNum << "\n");
         LiveRegDefs[Reg] = NewDef;
-        AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
-                             /*Reg=*/0, /*isNormalMemory=*/false,
-                             /*isMustAlias=*/false, /*isArtificial=*/true));
+        AddPred(NewDef, SDep(TrySU, SDep::Artificial));
         TrySU->isAvailable = false;
         CurSU = NewDef;
       }
@@ -629,6 +635,155 @@ void ScheduleDAGFast::ListScheduleBottomUp() {
 #endif
 }
 
+
+namespace {
+//===----------------------------------------------------------------------===//
+// ScheduleDAGLinearize - No scheduling scheduler, it simply linearize the
+// DAG in topological order.
+// IMPORTANT: this may not work for targets with phyreg dependency.
+//
+class ScheduleDAGLinearize : public ScheduleDAGSDNodes {
+public:
+  ScheduleDAGLinearize(MachineFunction &mf) : ScheduleDAGSDNodes(mf) {}
+
+  void Schedule();
+
+  MachineBasicBlock *EmitSchedule(MachineBasicBlock::iterator &InsertPos);
+
+private:
+  std::vector<SDNode*> Sequence;
+  DenseMap<SDNode*, SDNode*> GluedMap;  // Cache glue to its user
+
+  void ScheduleNode(SDNode *N);
+};
+} // end anonymous namespace
+
+void ScheduleDAGLinearize::ScheduleNode(SDNode *N) {
+  if (N->getNodeId() != 0)
+    llvm_unreachable(0);
+
+  if (!N->isMachineOpcode() &&
+      (N->getOpcode() == ISD::EntryToken || isPassiveNode(N)))
+    // These nodes do not need to be translated into MIs.
+    return;
+
+  DEBUG(dbgs() << "\n*** Scheduling: ");
+  DEBUG(N->dump(DAG));
+  Sequence.push_back(N);
+
+  unsigned NumOps = N->getNumOperands();
+  if (unsigned NumLeft = NumOps) {
+    SDNode *GluedOpN = 0;
+    do {
+      const SDValue &Op = N->getOperand(NumLeft-1);
+      SDNode *OpN = Op.getNode();
+
+      if (NumLeft == NumOps && Op.getValueType() == MVT::Glue) {
+        // Schedule glue operand right above N.
+        GluedOpN = OpN;
+        assert(OpN->getNodeId() != 0 && "Glue operand not ready?");
+        OpN->setNodeId(0);
+        ScheduleNode(OpN);
+        continue;
+      }
+
+      if (OpN == GluedOpN)
+        // Glue operand is already scheduled.
+        continue;
+
+      DenseMap<SDNode*, SDNode*>::iterator DI = GluedMap.find(OpN);
+      if (DI != GluedMap.end() && DI->second != N)
+        // Users of glues are counted against the glued users.
+        OpN = DI->second;
+
+      unsigned Degree = OpN->getNodeId();
+      assert(Degree > 0 && "Predecessor over-released!");
+      OpN->setNodeId(--Degree);
+      if (Degree == 0)
+        ScheduleNode(OpN);
+    } while (--NumLeft);
+  }
+}
+
+/// findGluedUser - Find the representative use of a glue value by walking
+/// the use chain.
+static SDNode *findGluedUser(SDNode *N) {
+  while (SDNode *Glued = N->getGluedUser())
+    N = Glued;
+  return N;
+}
+
+void ScheduleDAGLinearize::Schedule() {
+  DEBUG(dbgs() << "********** DAG Linearization **********\n");
+
+  SmallVector<SDNode*, 8> Glues;
+  unsigned DAGSize = 0;
+  for (SelectionDAG::allnodes_iterator I = DAG->allnodes_begin(),
+         E = DAG->allnodes_end(); I != E; ++I) {
+    SDNode *N = I;
+
+    // Use node id to record degree.
+    unsigned Degree = N->use_size();
+    N->setNodeId(Degree);
+    unsigned NumVals = N->getNumValues();
+    if (NumVals && N->getValueType(NumVals-1) == MVT::Glue &&
+        N->hasAnyUseOfValue(NumVals-1)) {
+      SDNode *User = findGluedUser(N);
+      if (User) {
+        Glues.push_back(N);
+        GluedMap.insert(std::make_pair(N, User));
+      }
+    }
+
+    if (N->isMachineOpcode() ||
+        (N->getOpcode() != ISD::EntryToken && !isPassiveNode(N)))
+      ++DAGSize;
+  }
+
+  for (unsigned i = 0, e = Glues.size(); i != e; ++i) {
+    SDNode *Glue = Glues[i];
+    SDNode *GUser = GluedMap[Glue];
+    unsigned Degree = Glue->getNodeId();
+    unsigned UDegree = GUser->getNodeId();
+
+    // Glue user must be scheduled together with the glue operand. So other
+    // users of the glue operand must be treated as its users.
+    SDNode *ImmGUser = Glue->getGluedUser();
+    for (SDNode::use_iterator ui = Glue->use_begin(), ue = Glue->use_end();
+         ui != ue; ++ui)
+      if (*ui == ImmGUser)
+        --Degree;
+    GUser->setNodeId(UDegree + Degree);
+    Glue->setNodeId(1);
+  }
+
+  Sequence.reserve(DAGSize);
+  ScheduleNode(DAG->getRoot().getNode());
+}
+
+MachineBasicBlock*
+ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
+  InstrEmitter Emitter(BB, InsertPos);
+  DenseMap<SDValue, unsigned> VRBaseMap;
+
+  DEBUG({
+      dbgs() << "\n*** Final schedule ***\n";
+    });
+
+  // FIXME: Handle dbg_values.
+  unsigned NumNodes = Sequence.size();
+  for (unsigned i = 0; i != NumNodes; ++i) {
+    SDNode *N = Sequence[NumNodes-i-1];
+    DEBUG(N->dump(DAG));
+    Emitter.EmitNode(N, false, false, VRBaseMap);
+  }
+
+  DEBUG(dbgs() << '\n');
+
+  InsertPos = Emitter.getInsertPos();
+  return Emitter.getBlock();
+}
+
 //===----------------------------------------------------------------------===//
 //                         Public Constructor Functions
 //===----------------------------------------------------------------------===//
@@ -637,3 +792,8 @@ llvm::ScheduleDAGSDNodes *
 llvm::createFastDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
   return new ScheduleDAGFast(*IS->MF);
 }
+
+llvm::ScheduleDAGSDNodes *
+llvm::createDAGLinearizer(SelectionDAGISel *IS, CodeGenOpt::Level) {
+  return new ScheduleDAGLinearize(*IS->MF);
+}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 2b86e36..31b9bf3 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -16,22 +16,22 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "pre-RA-sched"
-#include "ScheduleDAGSDNodes.h"
-#include "llvm/InlineAsm.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/InlineAsm.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <climits>
 using namespace llvm;
 
@@ -156,7 +156,7 @@ public:
                     CodeGenOpt::Level OptLevel)
     : ScheduleDAGSDNodes(mf),
       NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
-      Topo(SUnits) {
+      Topo(SUnits, NULL) {
 
     const TargetMachine &tm = mf.getTarget();
     if (DisableSchedCycles || !NeedLatency)
@@ -268,7 +268,7 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
                           const TargetRegisterInfo *TRI,
                           unsigned &RegClass, unsigned &Cost,
                           const MachineFunction &MF) {
-  EVT VT = RegDefPos.GetValue();
+  MVT VT = RegDefPos.GetValue();
 
   // Special handling for untyped values.  These values can only come from
   // the expansion of custom DAG-to-DAG patterns.
@@ -1058,7 +1058,9 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
 
     // Add a data dependency to reflect that NewSU reads the value defined
     // by LoadSU.
-    AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency));
+    SDep D(LoadSU, SDep::Data, 0);
+    D.setLatency(LoadSU->Latency);
+    AddPred(NewSU, D);
 
     if (isNewLoad)
       AvailableQueue->addNode(LoadSU);
@@ -1140,17 +1142,18 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
       // Avoid scheduling the def-side copy before other successors. Otherwise
       // we could introduce another physreg interference on the copy and
       // continue inserting copies indefinitely.
-      SDep D(CopyFromSU, SDep::Order, /*Latency=*/0,
-             /*Reg=*/0, /*isNormalMemory=*/false,
-             /*isMustAlias=*/false, /*isArtificial=*/true);
-      AddPred(SuccSU, D);
+      AddPred(SuccSU, SDep(CopyFromSU, SDep::Artificial));
     }
   }
   for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
     RemovePred(DelDeps[i].first, DelDeps[i].second);
 
-  AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
-  AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
+  SDep FromDep(SU, SDep::Data, Reg);
+  FromDep.setLatency(SU->Latency);
+  AddPred(CopyFromSU, FromDep);
+  SDep ToDep(CopyFromSU, SDep::Data, 0);
+  ToDep.setLatency(CopyFromSU->Latency);
+  AddPred(CopyToSU, ToDep);
 
   AvailableQueue->updateNode(SU);
   AvailableQueue->addNode(CopyFromSU);
@@ -1359,9 +1362,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
         if (!BtSU->isPending)
           AvailableQueue->remove(BtSU);
       }
-      AddPred(TrySU, SDep(BtSU, SDep::Order, /*Latency=*/1,
-                          /*Reg=*/0, /*isNormalMemory=*/false,
-                          /*isMustAlias=*/false, /*isArtificial=*/true));
+      AddPred(TrySU, SDep(BtSU, SDep::Artificial));
 
       // If one or more successors has been unscheduled, then the current
       // node is no longer avaialable. Schedule a successor that's now
@@ -1413,20 +1414,14 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
       InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
       DEBUG(dbgs() << "    Adding an edge from SU #" << TrySU->NodeNum
             << " to SU #" << Copies.front()->NodeNum << "\n");
-      AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
-                          /*Reg=*/0, /*isNormalMemory=*/false,
-                          /*isMustAlias=*/false,
-                          /*isArtificial=*/true));
+      AddPred(TrySU, SDep(Copies.front(), SDep::Artificial));
       NewDef = Copies.back();
     }
 
     DEBUG(dbgs() << "    Adding an edge from SU #" << NewDef->NodeNum
           << " to SU #" << TrySU->NodeNum << "\n");
     LiveRegDefs[Reg] = NewDef;
-    AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
-                         /*Reg=*/0, /*isNormalMemory=*/false,
-                         /*isMustAlias=*/false,
-                         /*isArtificial=*/true));
+    AddPred(NewDef, SDep(TrySU, SDep::Artificial));
     TrySU->isAvailable = false;
     CurSU = NewDef;
   }
@@ -1758,7 +1753,7 @@ public:
     return V;
   }
 
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   void dump(ScheduleDAG *DAG) const {
     // Emulate pop() without clobbering NodeQueueIds.
     std::vector<SUnit*> DumpQueue = Queue;
@@ -1897,7 +1892,7 @@ unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
 //===----------------------------------------------------------------------===//
 
 void RegReductionPQBase::dumpRegPressure() const {
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
          E = TRI->regclass_end(); I != E; ++I) {
     const TargetRegisterClass *RC = *I;
@@ -1944,7 +1939,7 @@ bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
 
   unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
   for (unsigned i = 0; i != NumDefs; ++i) {
-    EVT VT = N->getValueType(i);
+    MVT VT = N->getSimpleValueType(i);
     if (!N->hasAnyUseOfValue(i))
       continue;
     unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
@@ -1978,7 +1973,7 @@ int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
     }
     for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
          RegDefPos.IsValid(); RegDefPos.Advance()) {
-      EVT VT = RegDefPos.GetValue();
+      MVT VT = RegDefPos.GetValue();
       unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
       if (RegPressure[RCId] >= RegLimit[RCId])
         ++PDiff;
@@ -1991,7 +1986,7 @@ int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
 
   unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
   for (unsigned i = 0; i != NumDefs; ++i) {
-    EVT VT = N->getValueType(i);
+    MVT VT = N->getSimpleValueType(i);
     if (!N->hasAnyUseOfValue(i))
       continue;
     unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
@@ -2102,7 +2097,7 @@ void RegReductionPQBase::unscheduledNode(SUnit *SU) {
     const SDNode *PN = PredSU->getNode();
     if (!PN->isMachineOpcode()) {
       if (PN->getOpcode() == ISD::CopyFromReg) {
-        EVT VT = PN->getValueType(0);
+        MVT VT = PN->getSimpleValueType(0);
         unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
         RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
       }
@@ -2114,14 +2109,14 @@ void RegReductionPQBase::unscheduledNode(SUnit *SU) {
     if (POpc == TargetOpcode::EXTRACT_SUBREG ||
         POpc == TargetOpcode::INSERT_SUBREG ||
         POpc == TargetOpcode::SUBREG_TO_REG) {
-      EVT VT = PN->getValueType(0);
+      MVT VT = PN->getSimpleValueType(0);
       unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
       RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
       continue;
     }
     unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
     for (unsigned i = 0; i != NumDefs; ++i) {
-      EVT VT = PN->getValueType(i);
+      MVT VT = PN->getSimpleValueType(i);
       if (!PN->hasAnyUseOfValue(i))
         continue;
       unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
@@ -2138,7 +2133,7 @@ void RegReductionPQBase::unscheduledNode(SUnit *SU) {
   if (SU->NumSuccs && N->isMachineOpcode()) {
     unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
     for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
-      EVT VT = N->getValueType(i);
+      MVT VT = N->getSimpleValueType(i);
       if (VT == MVT::Glue || VT == MVT::Other)
         continue;
       if (!N->hasAnyUseOfValue(i))
@@ -2936,10 +2931,7 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() {
             !scheduleDAG->IsReachable(SuccSU, SU)) {
           DEBUG(dbgs() << "    Adding a pseudo-two-addr edge from SU #"
                        << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
-          scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
-                                        /*Reg=*/0, /*isNormalMemory=*/false,
-                                        /*isMustAlias=*/false,
-                                        /*isArtificial=*/true));
+          scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Artificial));
         }
       }
     }
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 222dc55..b22440d 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -13,26 +13,26 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "pre-RA-sched"
-#include "SDNodeDbgValue.h"
 #include "ScheduleDAGSDNodes.h"
 #include "InstrEmitter.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/MC/MCInstrItineraries.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "SDNodeDbgValue.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 using namespace llvm;
 
 STATISTIC(LoadsClustered, "Number of loads clustered together");
@@ -485,14 +485,15 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
         if(isChain && OpN->getOpcode() == ISD::TokenFactor)
           OpLatency = 0;
 
-        const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
-                               OpLatency, PhysReg);
+        SDep Dep = isChain ? SDep(OpSU, SDep::Barrier)
+          : SDep(OpSU, SDep::Data, PhysReg);
+        Dep.setLatency(OpLatency);
         if (!isChain && !UnitLatencies) {
-          computeOperandLatency(OpN, N, i, const_cast<SDep &>(dep));
-          ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep));
+          computeOperandLatency(OpN, N, i, Dep);
+          ST.adjustSchedDependency(OpSU, SU, Dep);
         }
 
-        if (!SU->addPred(dep) && !dep.isCtrl() && OpSU->NumRegDefsLeft > 1) {
+        if (!SU->addPred(Dep) && !Dep.isCtrl() && OpSU->NumRegDefsLeft > 1) {
           // Multiple register uses are combined in the same SUnit. For example,
           // we could have a set of glued nodes with all their defs consumed by
           // another set of glued nodes. Register pressure tracking sees this as
@@ -561,7 +562,7 @@ void ScheduleDAGSDNodes::RegDefIter::Advance() {
     for (;DefIdx < NodeNumDefs; ++DefIdx) {
       if (!Node->hasAnyUseOfValue(DefIdx))
         continue;
-      ValueType = Node->getValueType(DefIdx);
+      ValueType = Node->getSimpleValueType(DefIdx);
       ++DefIdx;
       return; // Found a normal regdef.
     }
@@ -643,7 +644,7 @@ void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use,
 }
 
 void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   if (!SU->getNode()) {
     dbgs() << "PHYS REG COPY\n";
     return;
@@ -663,7 +664,7 @@ void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
 #endif
 }
 
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void ScheduleDAGSDNodes::dumpSchedule() const {
   for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
     if (SUnit *SU = Sequence[i])
@@ -831,8 +832,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
     }
 
     SmallVector<SDNode *, 4> GluedNodes;
-    for (SDNode *N = SU->getNode()->getGluedNode(); N;
-         N = N->getGluedNode())
+    for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode())
       GluedNodes.push_back(N);
     while (!GluedNodes.empty()) {
       SDNode *N = GluedNodes.back();
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 84e41fc..76067a1 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -114,7 +114,8 @@ namespace llvm {
     /// EmitSchedule - Insert MachineInstrs into the MachineBasicBlock
     /// according to the order specified in Sequence.
     ///
-    MachineBasicBlock *EmitSchedule(MachineBasicBlock::iterator &InsertPos);
+    virtual MachineBasicBlock*
+    EmitSchedule(MachineBasicBlock::iterator &InsertPos);
 
     virtual void dumpNode(const SUnit *SU) const;
 
@@ -134,13 +135,13 @@ namespace llvm {
       const SDNode *Node;
       unsigned DefIdx;
       unsigned NodeNumDefs;
-      EVT ValueType;
+      MVT ValueType;
     public:
       RegDefIter(const SUnit *SU, const ScheduleDAGSDNodes *SD);
 
       bool IsValid() const { return Node != NULL; }
 
-      EVT GetValue() const {
+      MVT GetValue() const {
         assert(IsValid() && "bad iterator");
         return ValueType;
       }
@@ -158,6 +159,12 @@ namespace llvm {
       void InitNodeNumDefs();
     };
 
+  protected:
+    /// ForceUnitLatencies - Return true if all scheduling edges should be given
+    /// a latency value of one.  The default is to return false; schedulers may
+    /// override this as needed.
+    virtual bool forceUnitLatencies() const { return false; }
+
   private:
     /// ClusterNeighboringLoads - Cluster loads from "near" addresses into
     /// combined SUnits.
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index c851291..58aa1fe 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -19,19 +19,19 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "pre-RA-sched"
+#include "llvm/CodeGen/SchedulerRegistry.h"
 #include "ScheduleDAGSDNodes.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/ResourcePriorityQueue.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-#include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/ResourcePriorityQueue.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <climits>
 using namespace llvm;
 
@@ -123,6 +123,8 @@ void ScheduleDAGVLIW::releaseSucc(SUnit *SU, const SDep &D) {
     llvm_unreachable(0);
   }
 #endif
+  assert(!D.isWeak() && "unexpected artificial DAG edge");
+
   --SuccSU->NumPredsLeft;
 
   SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency());
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index d85d41b..6c29c67 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -12,42 +12,43 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/SelectionDAG.h"
-#include "SDNodeOrdering.h"
 #include "SDNodeDbgValue.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalAlias.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Intrinsics.h"
+#include "SDNodeOrdering.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetSelectionDAGInfo.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetIntrinsicInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Mutex.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSelectionDAGInfo.h"
 #include <algorithm>
 #include <cmath>
 using namespace llvm;
@@ -91,11 +92,6 @@ bool ConstantFPSDNode::isValueValidForType(EVT VT,
                                            const APFloat& Val) {
   assert(VT.isFloatingPoint() && "Can only convert between FP types");
 
-  // PPC long double cannot be converted to any other type.
-  if (VT == MVT::ppcf128 ||
-      &Val.getSemantics() == &APFloat::PPCDoubleDouble)
-    return false;
-
   // convert modifies in place, so make a copy.
   APFloat Val2 = APFloat(Val);
   bool losesInfo;
@@ -136,13 +132,11 @@ bool ISD::isBuildVectorAllOnes(const SDNode *N) {
   // constants are.
   SDValue NotZero = N->getOperand(i);
   unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits();
-  if (isa<ConstantSDNode>(NotZero)) {
-    if (cast<ConstantSDNode>(NotZero)->getAPIntValue().countTrailingOnes() <
-        EltSize)
+  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(NotZero)) {
+    if (CN->getAPIntValue().countTrailingOnes() < EltSize)
       return false;
-  } else if (isa<ConstantFPSDNode>(NotZero)) {
-    if (cast<ConstantFPSDNode>(NotZero)->getValueAPF()
-              .bitcastToAPInt().countTrailingOnes() < EltSize)
+  } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(NotZero)) {
+    if (CFPN->getValueAPF().bitcastToAPInt().countTrailingOnes() < EltSize)
       return false;
   } else
     return false;
@@ -179,11 +173,11 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) {
   // Do not accept build_vectors that aren't all constants or which have non-0
   // elements.
   SDValue Zero = N->getOperand(i);
-  if (isa<ConstantSDNode>(Zero)) {
-    if (!cast<ConstantSDNode>(Zero)->isNullValue())
+  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Zero)) {
+    if (!CN->isNullValue())
       return false;
-  } else if (isa<ConstantFPSDNode>(Zero)) {
-    if (!cast<ConstantFPSDNode>(Zero)->getValueAPF().isPosZero())
+  } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(Zero)) {
+    if (!CFPN->getValueAPF().isPosZero())
       return false;
   } else
     return false;
@@ -494,8 +488,10 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
   }
   case ISD::TargetBlockAddress:
   case ISD::BlockAddress: {
-    ID.AddPointer(cast<BlockAddressSDNode>(N)->getBlockAddress());
-    ID.AddInteger(cast<BlockAddressSDNode>(N)->getTargetFlags());
+    const BlockAddressSDNode *BA = cast<BlockAddressSDNode>(N);
+    ID.AddPointer(BA->getBlockAddress());
+    ID.AddInteger(BA->getOffset());
+    ID.AddInteger(BA->getTargetFlags());
     break;
   }
   } // end switch (N->getOpcode())
@@ -883,21 +879,23 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
                    PointerType::get(Type::getInt8Ty(*getContext()), 0) :
                    VT.getTypeForEVT(*getContext());
 
-  return TLI.getTargetData()->getABITypeAlignment(Ty);
+  return TLI.getDataLayout()->getABITypeAlignment(Ty);
 }
 
 // EntryNode could meaningfully have debug info if we can find it...
 SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
   : TM(tm), TLI(*tm.getTargetLowering()), TSI(*tm.getSelectionDAGInfo()),
-    OptLevel(OL), EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)),
+    TTI(0), OptLevel(OL), EntryNode(ISD::EntryToken, DebugLoc(),
+                                    getVTList(MVT::Other)),
     Root(getEntryNode()), Ordering(0), UpdateListeners(0) {
   AllNodes.push_back(&EntryNode);
   Ordering = new SDNodeOrdering();
   DbgInfo = new SDDbgInfo();
 }
 
-void SelectionDAG::init(MachineFunction &mf) {
+void SelectionDAG::init(MachineFunction &mf, const TargetTransformInfo *tti) {
   MF = &mf;
+  TTI = tti;
   Context = &mf.getFunction()->getContext();
 }
 
@@ -1079,7 +1077,8 @@ SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) {
     return getConstantFP(APFloat((float)Val), VT, isTarget);
   else if (EltVT==MVT::f64)
     return getConstantFP(APFloat(Val), VT, isTarget);
-  else if (EltVT==MVT::f80 || EltVT==MVT::f128 || EltVT==MVT::f16) {
+  else if (EltVT==MVT::f80 || EltVT==MVT::f128 || EltVT==MVT::ppcf128 ||
+           EltVT==MVT::f16) {
     bool ignored;
     APFloat apf = APFloat(Val);
     apf.convert(*EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven,
@@ -1173,7 +1172,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
   assert((TargetFlags == 0 || isTarget) &&
          "Cannot set target flags on target-independent globals");
   if (Alignment == 0)
-    Alignment = TLI.getTargetData()->getPrefTypeAlignment(C->getType());
+    Alignment = TLI.getDataLayout()->getPrefTypeAlignment(C->getType());
   unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
@@ -1200,7 +1199,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
   assert((TargetFlags == 0 || isTarget) &&
          "Cannot set target flags on target-independent globals");
   if (Alignment == 0)
-    Alignment = TLI.getTargetData()->getPrefTypeAlignment(C->getType());
+    Alignment = TLI.getDataLayout()->getPrefTypeAlignment(C->getType());
   unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
@@ -1470,6 +1469,7 @@ SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) {
 
 
 SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT,
+                                      int64_t Offset,
                                       bool isTarget,
                                       unsigned char TargetFlags) {
   unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress;
@@ -1477,12 +1477,14 @@ SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT,
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
   ID.AddPointer(BA);
+  ID.AddInteger(Offset);
   ID.AddInteger(TargetFlags);
   void *IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
-  SDNode *N = new (NodeAllocator) BlockAddressSDNode(Opc, VT, BA, TargetFlags);
+  SDNode *N = new (NodeAllocator) BlockAddressSDNode(Opc, VT, BA, Offset,
+                                                     TargetFlags);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -1541,7 +1543,7 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
   unsigned ByteSize = VT.getStoreSize();
   Type *Ty = VT.getTypeForEVT(*getContext());
   unsigned StackAlign =
-  std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), minAlign);
+  std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty), minAlign);
 
   int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
   return getFrameIndex(FrameIdx, TLI.getPointerTy());
@@ -1554,7 +1556,7 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
                             VT2.getStoreSizeInBits())/8;
   Type *Ty1 = VT1.getTypeForEVT(*getContext());
   Type *Ty2 = VT2.getTypeForEVT(*getContext());
-  const TargetData *TD = TLI.getTargetData();
+  const DataLayout *TD = TLI.getDataLayout();
   unsigned Align = std::max(TD->getPrefTypeAlignment(Ty1),
                             TD->getPrefTypeAlignment(Ty2));
 
@@ -1609,10 +1611,6 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1,
   }
   if (ConstantFPSDNode *N1C = dyn_cast<ConstantFPSDNode>(N1.getNode())) {
     if (ConstantFPSDNode *N2C = dyn_cast<ConstantFPSDNode>(N2.getNode())) {
-      // No compile time operations on this type yet.
-      if (N1C->getValueType(0) == MVT::ppcf128)
-        return SDValue();
-
       APFloat::cmpResult R = N1C->getValueAPF().compare(N2C->getValueAPF());
       switch (Cond) {
       default: break;
@@ -2444,8 +2442,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
       return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT);
     case ISD::UINT_TO_FP:
     case ISD::SINT_TO_FP: {
-      // No compile time operations on ppcf128.
-      if (VT == MVT::ppcf128) break;
       APFloat apf(APInt::getNullValue(VT.getSizeInBits()));
       (void)apf.convertFromAPInt(Val,
                                  Opcode==ISD::SINT_TO_FP,
@@ -2454,9 +2450,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
     }
     case ISD::BITCAST:
       if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
-        return getConstantFP(Val.bitsToFloat(), VT);
+        return getConstantFP(APFloat(Val), VT);
       else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
-        return getConstantFP(Val.bitsToDouble(), VT);
+        return getConstantFP(APFloat(Val), VT);
       break;
     case ISD::BSWAP:
       return getConstant(Val.byteSwap(), VT);
@@ -2474,61 +2470,59 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
   // Constant fold unary operations with a floating point constant operand.
   if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Operand.getNode())) {
     APFloat V = C->getValueAPF();    // make copy
-    if (VT != MVT::ppcf128 && Operand.getValueType() != MVT::ppcf128) {
-      switch (Opcode) {
-      case ISD::FNEG:
-        V.changeSign();
+    switch (Opcode) {
+    case ISD::FNEG:
+      V.changeSign();
+      return getConstantFP(V, VT);
+    case ISD::FABS:
+      V.clearSign();
+      return getConstantFP(V, VT);
+    case ISD::FCEIL: {
+      APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive);
+      if (fs == APFloat::opOK || fs == APFloat::opInexact)
         return getConstantFP(V, VT);
-      case ISD::FABS:
-        V.clearSign();
+      break;
+    }
+    case ISD::FTRUNC: {
+      APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero);
+      if (fs == APFloat::opOK || fs == APFloat::opInexact)
         return getConstantFP(V, VT);
-      case ISD::FCEIL: {
-        APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive);
-        if (fs == APFloat::opOK || fs == APFloat::opInexact)
-          return getConstantFP(V, VT);
-        break;
-      }
-      case ISD::FTRUNC: {
-        APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero);
-        if (fs == APFloat::opOK || fs == APFloat::opInexact)
-          return getConstantFP(V, VT);
-        break;
-      }
-      case ISD::FFLOOR: {
-        APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative);
-        if (fs == APFloat::opOK || fs == APFloat::opInexact)
-          return getConstantFP(V, VT);
-        break;
-      }
-      case ISD::FP_EXTEND: {
-        bool ignored;
-        // This can return overflow, underflow, or inexact; we don't care.
-        // FIXME need to be more flexible about rounding mode.
-        (void)V.convert(*EVTToAPFloatSemantics(VT),
-                        APFloat::rmNearestTiesToEven, &ignored);
+      break;
+    }
+    case ISD::FFLOOR: {
+      APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative);
+      if (fs == APFloat::opOK || fs == APFloat::opInexact)
         return getConstantFP(V, VT);
-      }
-      case ISD::FP_TO_SINT:
-      case ISD::FP_TO_UINT: {
-        integerPart x[2];
-        bool ignored;
-        assert(integerPartWidth >= 64);
-        // FIXME need to be more flexible about rounding mode.
-        APFloat::opStatus s = V.convertToInteger(x, VT.getSizeInBits(),
-                              Opcode==ISD::FP_TO_SINT,
-                              APFloat::rmTowardZero, &ignored);
-        if (s==APFloat::opInvalidOp)     // inexact is OK, in fact usual
-          break;
-        APInt api(VT.getSizeInBits(), x);
-        return getConstant(api, VT);
-      }
-      case ISD::BITCAST:
-        if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)
-          return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT);
-        else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
-          return getConstant(V.bitcastToAPInt().getZExtValue(), VT);
+      break;
+    }
+    case ISD::FP_EXTEND: {
+      bool ignored;
+      // This can return overflow, underflow, or inexact; we don't care.
+      // FIXME need to be more flexible about rounding mode.
+      (void)V.convert(*EVTToAPFloatSemantics(VT),
+                      APFloat::rmNearestTiesToEven, &ignored);
+      return getConstantFP(V, VT);
+    }
+    case ISD::FP_TO_SINT:
+    case ISD::FP_TO_UINT: {
+      integerPart x[2];
+      bool ignored;
+      assert(integerPartWidth >= 64);
+      // FIXME need to be more flexible about rounding mode.
+      APFloat::opStatus s = V.convertToInteger(x, VT.getSizeInBits(),
+                            Opcode==ISD::FP_TO_SINT,
+                            APFloat::rmTowardZero, &ignored);
+      if (s==APFloat::opInvalidOp)     // inexact is OK, in fact usual
         break;
-      }
+      APInt api(VT.getSizeInBits(), x);
+      return getConstant(api, VT);
+    }
+    case ISD::BITCAST:
+      if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)
+        return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT);
+      else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
+        return getConstant(V.bitcastToAPInt().getZExtValue(), VT);
+      break;
     }
   }
 
@@ -3049,7 +3043,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
       // Cannonicalize constant to RHS if commutative
       std::swap(N1CFP, N2CFP);
       std::swap(N1, N2);
-    } else if (N2CFP && VT != MVT::ppcf128) {
+    } else if (N2CFP) {
       APFloat V1 = N1CFP->getValueAPF(), V2 = N2CFP->getValueAPF();
       APFloat::opStatus s;
       switch (Opcode) {
@@ -3383,7 +3377,7 @@ static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG,
   unsigned NumVTBytes = VT.getSizeInBits() / 8;
   unsigned NumBytes = std::min(NumVTBytes, unsigned(Str.size()));
 
-  uint64_t Val = 0;
+  APInt Val(NumBytes*8, 0);
   if (TLI.isLittleEndian()) {
     for (unsigned i = 0; i != NumBytes; ++i)
       Val |= (uint64_t)(unsigned char)Str[i] << i*8;
@@ -3392,7 +3386,12 @@ static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG,
       Val |= (uint64_t)(unsigned char)Str[i] << (NumVTBytes-i-1)*8;
   }
 
-  return DAG.getConstant(Val, VT);
+  // If the "cost" of materializing the integer immediate is 1 or free, then
+  // it is cost effective to turn the load into the immediate.
+  const TargetTransformInfo *TTI = DAG.getTargetTransformInfo();
+  if (TTI->getIntImmCost(Val, VT.getTypeForEVT(*DAG.getContext())) < 2)
+    return DAG.getConstant(Val, VT);
+  return SDValue(0, 0);
 }
 
 /// getMemBasePlusOffset - Returns base and offset node for the
@@ -3430,8 +3429,10 @@ static bool isMemSrcFromString(SDValue Src, StringRef &Str) {
 static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
                                      unsigned Limit, uint64_t Size,
                                      unsigned DstAlign, unsigned SrcAlign,
-                                     bool IsZeroVal,
+                                     bool IsMemset,
+                                     bool ZeroMemset,
                                      bool MemcpyStrSrc,
+                                     bool AllowOverlap,
                                      SelectionDAG &DAG,
                                      const TargetLowering &TLI) {
   assert((SrcAlign == 0 || SrcAlign >= DstAlign) &&
@@ -3444,11 +3445,11 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
   // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
   // not need to be loaded.
   EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign,
-                                   IsZeroVal, MemcpyStrSrc,
+                                   IsMemset, ZeroMemset, MemcpyStrSrc,
                                    DAG.getMachineFunction());
 
   if (VT == MVT::Other) {
-    if (DstAlign >= TLI.getTargetData()->getPointerPrefAlignment() ||
+    if (DstAlign >= TLI.getDataLayout()->getPointerPrefAlignment() ||
         TLI.allowsUnalignedMemoryAccesses(VT)) {
       VT = TLI.getPointerTy();
     } else {
@@ -3474,21 +3475,51 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
     unsigned VTSize = VT.getSizeInBits() / 8;
     while (VTSize > Size) {
       // For now, only use non-vector load / store's for the left-over pieces.
+      EVT NewVT = VT;
+      unsigned NewVTSize;
+
+      bool Found = false;
       if (VT.isVector() || VT.isFloatingPoint()) {
-        VT = MVT::i64;
-        while (!TLI.isTypeLegal(VT))
-          VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
-        VTSize = VT.getSizeInBits() / 8;
-      } else {
-        // This can result in a type that is not legal on the target, e.g.
-        // 1 or 2 bytes on PPC.
-        VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
-        VTSize >>= 1;
+        NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
+        if (TLI.isOperationLegalOrCustom(ISD::STORE, NewVT) &&
+            TLI.isSafeMemOpType(NewVT.getSimpleVT()))
+          Found = true;
+        else if (NewVT == MVT::i64 &&
+                 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
+                 TLI.isSafeMemOpType(MVT::f64)) {
+          // i64 is usually not legal on 32-bit targets, but f64 may be.
+          NewVT = MVT::f64;
+          Found = true;
+        }
+      }
+
+      if (!Found) {
+        do {
+          NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
+          if (NewVT == MVT::i8)
+            break;
+        } while (!TLI.isSafeMemOpType(NewVT.getSimpleVT()));
+      }
+      NewVTSize = NewVT.getSizeInBits() / 8;
+
+      // If the new VT cannot cover all of the remaining bits, then consider
+      // issuing a (or a pair of) unaligned and overlapping load / store.
+      // FIXME: Only does this for 64-bit or more since we don't have proper
+      // cost model for unaligned load / store.
+      bool Fast;
+      if (NumMemOps && AllowOverlap &&
+          VTSize >= 8 && NewVTSize < Size &&
+          TLI.allowsUnalignedMemoryAccesses(VT, &Fast) && Fast)
+        VTSize = Size;
+      else {
+        VT = NewVT;
+        VTSize = NewVTSize;
       }
     }
 
     if (++NumMemOps > Limit)
       return false;
+
     MemOps.push_back(VT);
     Size -= VTSize;
   }
@@ -3516,7 +3547,9 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
   bool DstAlignCanChange = false;
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+  bool OptSize =
+    MF.getFunction()->getAttributes().
+      hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
   FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
   if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
     DstAlignCanChange = true;
@@ -3531,12 +3564,12 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
   if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
                                 (DstAlignCanChange ? 0 : Align),
                                 (isZeroStr ? 0 : SrcAlign),
-                                true, CopyFromStr, DAG, TLI))
+                                false, false, CopyFromStr, true, DAG, TLI))
     return SDValue();
 
   if (DstAlignCanChange) {
     Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
-    unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
+    unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty);
     if (NewAlign > Align) {
       // Give the stack frame object a larger alignment if needed.
       if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
@@ -3553,6 +3586,14 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
     unsigned VTSize = VT.getSizeInBits() / 8;
     SDValue Value, Store;
 
+    if (VTSize > Size) {
+      // Issuing an unaligned load / store pair  that overlaps with the previous
+      // pair. Adjust the offset accordingly.
+      assert(i == NumMemOps-1 && i != 0);
+      SrcOff -= VTSize - Size;
+      DstOff -= VTSize - Size;
+    }
+
     if (CopyFromStr &&
         (isZeroStr || (VT.isInteger() && !VT.isVector()))) {
       // It's unlikely a store of a vector immediate can be done in a single
@@ -3561,11 +3602,14 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
       // FIXME: Handle other cases where store of vector immediate is done in
       // a single instruction.
       Value = getMemsetStringVal(VT, dl, DAG, TLI, Str.substr(SrcOff));
-      Store = DAG.getStore(Chain, dl, Value,
-                           getMemBasePlusOffset(Dst, DstOff, DAG),
-                           DstPtrInfo.getWithOffset(DstOff), isVol,
-                           false, Align);
-    } else {
+      if (Value.getNode())
+        Store = DAG.getStore(Chain, dl, Value,
+                             getMemBasePlusOffset(Dst, DstOff, DAG),
+                             DstPtrInfo.getWithOffset(DstOff), isVol,
+                             false, Align);
+    }
+
+    if (!Store.getNode()) {
       // The type might not be legal for the target.  This should only happen
       // if the type is smaller than a legal type, as on PPC, so the right
       // thing to do is generate a LoadExt/StoreTrunc pair.  These simplify
@@ -3585,6 +3629,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
     OutChains.push_back(Store);
     SrcOff += VTSize;
     DstOff += VTSize;
+    Size -= VTSize;
   }
 
   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
@@ -3609,7 +3654,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
   bool DstAlignCanChange = false;
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+  bool OptSize = MF.getFunction()->getAttributes().
+    hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
   FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
   if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
     DstAlignCanChange = true;
@@ -3619,13 +3665,13 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
   unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize);
 
   if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
-                                (DstAlignCanChange ? 0 : Align),
-                                SrcAlign, true, false, DAG, TLI))
+                                (DstAlignCanChange ? 0 : Align), SrcAlign,
+                                false, false, false, false, DAG, TLI))
     return SDValue();
 
   if (DstAlignCanChange) {
     Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
-    unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
+    unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty);
     if (NewAlign > Align) {
       // Give the stack frame object a larger alignment if needed.
       if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
@@ -3687,7 +3733,8 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
   bool DstAlignCanChange = false;
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+  bool OptSize = MF.getFunction()->getAttributes().
+    hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
   FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
   if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
     DstAlignCanChange = true;
@@ -3695,12 +3742,12 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
     isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
   if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize),
                                 Size, (DstAlignCanChange ? 0 : Align), 0,
-                                IsZeroVal, false, DAG, TLI))
+                                true, IsZeroVal, false, true, DAG, TLI))
     return SDValue();
 
   if (DstAlignCanChange) {
     Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
-    unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
+    unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty);
     if (NewAlign > Align) {
       // Give the stack frame object a larger alignment if needed.
       if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
@@ -3722,6 +3769,13 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
 
   for (unsigned i = 0; i < NumMemOps; i++) {
     EVT VT = MemOps[i];
+    unsigned VTSize = VT.getSizeInBits() / 8;
+    if (VTSize > Size) {
+      // Issuing an unaligned load / store pair  that overlaps with the previous
+      // pair. Adjust the offset accordingly.
+      assert(i == NumMemOps-1 && i != 0);
+      DstOff -= VTSize - Size;
+    }
 
     // If this store is smaller than the largest store see whether we can get
     // the smaller value for free with a truncate.
@@ -3740,6 +3794,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
                                  isVol, false, Align);
     OutChains.push_back(Store);
     DstOff += VT.getSizeInBits() / 8;
+    Size -= VTSize;
   }
 
   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
@@ -3794,7 +3849,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
   // Emit a library call.
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
-  Entry.Ty = TLI.getTargetData()->getIntPtrType(*getContext());
+  Entry.Ty = TLI.getDataLayout()->getIntPtrType(*getContext());
   Entry.Node = Dst; Args.push_back(Entry);
   Entry.Node = Src; Args.push_back(Entry);
   Entry.Node = Size; Args.push_back(Entry);
@@ -3849,7 +3904,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
   // Emit a library call.
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
-  Entry.Ty = TLI.getTargetData()->getIntPtrType(*getContext());
+  Entry.Ty = TLI.getDataLayout()->getIntPtrType(*getContext());
   Entry.Node = Dst; Args.push_back(Entry);
   Entry.Node = Src; Args.push_back(Entry);
   Entry.Node = Size; Args.push_back(Entry);
@@ -3898,7 +3953,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
     return Result;
 
   // Emit a library call.
-  Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*getContext());
+  Type *IntPtrTy = TLI.getDataLayout()->getIntPtrType(*getContext());
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
   Entry.Node = Dst; Entry.Ty = IntPtrTy;
@@ -6094,7 +6149,7 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
     unsigned PtrWidth = TLI.getPointerTy().getSizeInBits();
     APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0);
     llvm::ComputeMaskedBits(const_cast<GlobalValue*>(GV), KnownZero, KnownOne,
-                            TLI.getTargetData());
+                            TLI.getDataLayout());
     unsigned AlignBits = KnownZero.countTrailingOnes();
     unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0;
     if (Align)
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 483b051..8c22db3 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -12,51 +12,51 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "isel"
-#include "SDNodeDbgValue.h"
 #include "SelectionDAGBuilder.h"
+#include "SDNodeDbgValue.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Constants.h"
-#include "llvm/CallingConv.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
-#include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/CodeGen/GCMetadata.h"
-#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/IntegersSubsetMapping.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetIntrinsicInfo.h"
 #include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/IntegersSubsetMapping.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -89,7 +89,7 @@ static const unsigned MaxParallelChains = 64;
 
 static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
                                       const SDValue *Parts, unsigned NumParts,
-                                      EVT PartVT, EVT ValueVT);
+                                      MVT PartVT, EVT ValueVT, const Value *V);
 
 /// getCopyFromParts - Create a value that contains the specified legal parts
 /// combined into the value they represent.  If the parts combine to a type
@@ -98,10 +98,12 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
 /// (ISD::AssertSext).
 static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
                                 const SDValue *Parts,
-                                unsigned NumParts, EVT PartVT, EVT ValueVT,
+                                unsigned NumParts, MVT PartVT, EVT ValueVT,
+                                const Value *V,
                                 ISD::NodeType AssertOp = ISD::DELETED_NODE) {
   if (ValueVT.isVector())
-    return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT);
+    return getCopyFromPartsVector(DAG, DL, Parts, NumParts,
+                                  PartVT, ValueVT, V);
 
   assert(NumParts > 0 && "No parts to assemble!");
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -125,9 +127,9 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
 
       if (RoundParts > 2) {
         Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2,
-                              PartVT, HalfVT);
+                              PartVT, HalfVT, V);
         Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2,
-                              RoundParts / 2, PartVT, HalfVT);
+                              RoundParts / 2, PartVT, HalfVT, V);
       } else {
         Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]);
         Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]);
@@ -143,7 +145,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
         unsigned OddParts = NumParts - RoundParts;
         EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
         Hi = getCopyFromParts(DAG, DL,
-                              Parts + RoundParts, OddParts, PartVT, OddVT);
+                              Parts + RoundParts, OddParts, PartVT, OddVT, V);
 
         // Combine the round and odd parts.
         Lo = Val;
@@ -159,7 +161,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
       }
     } else if (PartVT.isFloatingPoint()) {
       // FP split into multiple FP parts (for ppcf128)
-      assert(ValueVT == EVT(MVT::ppcf128) && PartVT == EVT(MVT::f64) &&
+      assert(ValueVT == EVT(MVT::ppcf128) && PartVT == MVT::f64 &&
              "Unexpected split");
       SDValue Lo, Hi;
       Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]);
@@ -172,30 +174,30 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
       assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
              !PartVT.isVector() && "Unexpected split");
       EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
-      Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT);
+      Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V);
     }
   }
 
   // There is now one part, held in Val.  Correct it to match ValueVT.
-  PartVT = Val.getValueType();
+  EVT PartEVT = Val.getValueType();
 
-  if (PartVT == ValueVT)
+  if (PartEVT == ValueVT)
     return Val;
 
-  if (PartVT.isInteger() && ValueVT.isInteger()) {
-    if (ValueVT.bitsLT(PartVT)) {
+  if (PartEVT.isInteger() && ValueVT.isInteger()) {
+    if (ValueVT.bitsLT(PartEVT)) {
       // For a truncate, see if we have any information to
       // indicate whether the truncated bits will always be
       // zero or sign-extension.
       if (AssertOp != ISD::DELETED_NODE)
-        Val = DAG.getNode(AssertOp, DL, PartVT, Val,
+        Val = DAG.getNode(AssertOp, DL, PartEVT, Val,
                           DAG.getValueType(ValueVT));
       return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
     }
     return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val);
   }
 
-  if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
+  if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
     // FP_ROUND's are always exact here.
     if (ValueVT.bitsLT(Val.getValueType()))
       return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val,
@@ -204,20 +206,20 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
     return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
   }
 
-  if (PartVT.getSizeInBits() == ValueVT.getSizeInBits())
+  if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits())
     return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
 
   llvm_unreachable("Unknown mismatch!");
 }
 
-/// getCopyFromParts - Create a value that contains the specified legal parts
-/// combined into the value they represent.  If the parts combine to a type
-/// larger then ValueVT then AssertOp can be used to specify whether the extra
-/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
-/// (ISD::AssertSext).
+/// getCopyFromPartsVector - Create a value that contains the specified legal
+/// parts combined into the value they represent.  If the parts combine to a
+/// type larger then ValueVT then AssertOp can be used to specify whether the
+/// extra bits are known to be zero (ISD::AssertZext) or sign extended from
+/// ValueVT (ISD::AssertSext).
 static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
                                       const SDValue *Parts, unsigned NumParts,
-                                      EVT PartVT, EVT ValueVT) {
+                                      MVT PartVT, EVT ValueVT, const Value *V) {
   assert(ValueVT.isVector() && "Not a vector value");
   assert(NumParts > 0 && "No parts to assemble!");
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -225,7 +227,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
 
   // Handle a multi-element vector.
   if (NumParts > 1) {
-    EVT IntermediateVT, RegisterVT;
+    EVT IntermediateVT;
+    MVT RegisterVT;
     unsigned NumIntermediates;
     unsigned NumRegs =
     TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
@@ -233,7 +236,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
     assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
     NumParts = NumRegs; // Silence a compiler warning.
     assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
-    assert(RegisterVT == Parts[0].getValueType() &&
+    assert(RegisterVT == Parts[0].getSimpleValueType() &&
            "Part type doesn't match part!");
 
     // Assemble the parts into intermediate operands.
@@ -243,7 +246,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
       // as appropriate.
       for (unsigned i = 0; i != NumParts; ++i)
         Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1,
-                                  PartVT, IntermediateVT);
+                                  PartVT, IntermediateVT, V);
     } else if (NumParts > 0) {
       // If the intermediate type was expanded, build the intermediate
       // operands from the parts.
@@ -252,7 +255,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
       unsigned Factor = NumParts / NumIntermediates;
       for (unsigned i = 0; i != NumIntermediates; ++i)
         Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,
-                                  PartVT, IntermediateVT);
+                                  PartVT, IntermediateVT, V);
     }
 
     // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
@@ -263,31 +266,31 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
   }
 
   // There is now one part, held in Val.  Correct it to match ValueVT.
-  PartVT = Val.getValueType();
+  EVT PartEVT = Val.getValueType();
 
-  if (PartVT == ValueVT)
+  if (PartEVT == ValueVT)
     return Val;
 
-  if (PartVT.isVector()) {
+  if (PartEVT.isVector()) {
     // If the element type of the source/dest vectors are the same, but the
     // parts vector has more elements than the value vector, then we have a
     // vector widening case (e.g. <2 x float> -> <4 x float>).  Extract the
     // elements we want.
-    if (PartVT.getVectorElementType() == ValueVT.getVectorElementType()) {
-      assert(PartVT.getVectorNumElements() > ValueVT.getVectorNumElements() &&
+    if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) {
+      assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() &&
              "Cannot narrow, it would be a lossy transformation");
       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
                          DAG.getIntPtrConstant(0));
     }
 
     // Vector/Vector bitcast.
-    if (ValueVT.getSizeInBits() == PartVT.getSizeInBits())
+    if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits())
       return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
 
-    assert(PartVT.getVectorNumElements() == ValueVT.getVectorNumElements() &&
+    assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() &&
       "Cannot handle this kind of promotion");
     // Promoted vector extract
-    bool Smaller = ValueVT.bitsLE(PartVT);
+    bool Smaller = ValueVT.bitsLE(PartEVT);
     return DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
                        DL, ValueVT, Val);
 
@@ -295,17 +298,28 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
 
   // Trivial bitcast if the types are the same size and the destination
   // vector type is legal.
-  if (PartVT.getSizeInBits() == ValueVT.getSizeInBits() &&
+  if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits() &&
       TLI.isTypeLegal(ValueVT))
     return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
 
   // Handle cases such as i8 -> <1 x i1>
-  assert(ValueVT.getVectorNumElements() == 1 &&
-         "Only trivial scalar-to-vector conversions should get here!");
+  if (ValueVT.getVectorNumElements() != 1) {
+    LLVMContext &Ctx = *DAG.getContext();
+    Twine ErrMsg("non-trivial scalar-to-vector conversion");
+    if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) {
+      if (const CallInst *CI = dyn_cast<CallInst>(I))
+        if (isa<InlineAsm>(CI->getCalledValue()))
+          ErrMsg = ErrMsg + ", possible invalid constraint for vector type";
+      Ctx.emitError(I, ErrMsg);
+    } else {
+      Ctx.emitError(ErrMsg);
+    }
+    report_fatal_error("Cannot handle scalar-to-vector conversion!");
+  }
 
   if (ValueVT.getVectorNumElements() == 1 &&
-      ValueVT.getVectorElementType() != PartVT) {
-    bool Smaller = ValueVT.bitsLE(PartVT);
+      ValueVT.getVectorElementType() != PartEVT) {
+    bool Smaller = ValueVT.bitsLE(PartEVT);
     Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
                        DL, ValueVT.getScalarType(), Val);
   }
@@ -313,25 +327,22 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
   return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val);
 }
 
-
-
-
 static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc dl,
                                  SDValue Val, SDValue *Parts, unsigned NumParts,
-                                 EVT PartVT);
+                                 MVT PartVT, const Value *V);
 
 /// getCopyToParts - Create a series of nodes that contain the specified value
 /// split into legal parts.  If the parts contain more bits than Val, then, for
 /// integers, ExtendKind can be used to specify how to generate the extra bits.
 static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
                            SDValue Val, SDValue *Parts, unsigned NumParts,
-                           EVT PartVT,
+                           MVT PartVT, const Value *V,
                            ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
   EVT ValueVT = Val.getValueType();
 
   // Handle the vector case separately.
   if (ValueVT.isVector())
-    return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT);
+    return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V);
 
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   unsigned PartBits = PartVT.getSizeInBits();
@@ -342,7 +353,8 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
     return;
 
   assert(!ValueVT.isVector() && "Vector case handled elsewhere");
-  if (PartVT == ValueVT) {
+  EVT PartEVT = PartVT;
+  if (PartEVT == ValueVT) {
     assert(NumParts == 1 && "No-op copy with multiple parts!");
     Parts[0] = Val;
     return;
@@ -364,7 +376,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
     }
   } else if (PartBits == ValueVT.getSizeInBits()) {
     // Different types of the same size.
-    assert(NumParts == 1 && PartVT != ValueVT);
+    assert(NumParts == 1 && PartEVT != ValueVT);
     Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
   } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
     // If the parts cover less bits than value has, truncate the value.
@@ -383,7 +395,19 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
          "Failed to tile the value with PartVT!");
 
   if (NumParts == 1) {
-    assert(PartVT == ValueVT && "Type conversion failed!");
+    if (PartEVT != ValueVT) {
+      LLVMContext &Ctx = *DAG.getContext();
+      Twine ErrMsg("scalar-to-vector conversion failed");
+      if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) {
+        if (const CallInst *CI = dyn_cast<CallInst>(I))
+          if (isa<InlineAsm>(CI->getCalledValue()))
+            ErrMsg = ErrMsg + ", possible invalid constraint for vector type";
+        Ctx.emitError(I, ErrMsg);
+      } else {
+        Ctx.emitError(ErrMsg);
+      }
+    }
+
     Parts[0] = Val;
     return;
   }
@@ -398,7 +422,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
     unsigned OddParts = NumParts - RoundParts;
     SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
                                  DAG.getIntPtrConstant(RoundBits));
-    getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT);
+    getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V);
 
     if (TLI.isBigEndian())
       // The odd parts were reversed by getCopyToParts - unreverse them.
@@ -444,20 +468,21 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
 /// value split into legal parts.
 static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
                                  SDValue Val, SDValue *Parts, unsigned NumParts,
-                                 EVT PartVT) {
+                                 MVT PartVT, const Value *V) {
   EVT ValueVT = Val.getValueType();
   assert(ValueVT.isVector() && "Not a vector");
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 
   if (NumParts == 1) {
-    if (PartVT == ValueVT) {
+    EVT PartEVT = PartVT;
+    if (PartEVT == ValueVT) {
       // Nothing to do.
     } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
       // Bitconvert vector->vector case.
       Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
     } else if (PartVT.isVector() &&
-               PartVT.getVectorElementType() == ValueVT.getVectorElementType() &&
-               PartVT.getVectorNumElements() > ValueVT.getVectorNumElements()) {
+               PartEVT.getVectorElementType() == ValueVT.getVectorElementType() &&
+               PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements()) {
       EVT ElementVT = PartVT.getVectorElementType();
       // Vector widening case, e.g. <2 x float> -> <4 x float>.  Shuffle in
       // undef elements.
@@ -477,12 +502,12 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
       //SDValue UndefElts = DAG.getUNDEF(VectorTy);
       //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts);
     } else if (PartVT.isVector() &&
-               PartVT.getVectorElementType().bitsGE(
+               PartEVT.getVectorElementType().bitsGE(
                  ValueVT.getVectorElementType()) &&
-               PartVT.getVectorNumElements() == ValueVT.getVectorNumElements()) {
+               PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()) {
 
       // Promoted vector extract
-      bool Smaller = PartVT.bitsLE(ValueVT);
+      bool Smaller = PartEVT.bitsLE(ValueVT);
       Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
                         DL, PartVT, Val);
     } else{
@@ -502,7 +527,8 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
   }
 
   // Handle a multi-element vector.
-  EVT IntermediateVT, RegisterVT;
+  EVT IntermediateVT;
+  MVT RegisterVT;
   unsigned NumIntermediates;
   unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT,
                                                 IntermediateVT,
@@ -530,7 +556,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
     // If the register was not expanded, promote or copy the value,
     // as appropriate.
     for (unsigned i = 0; i != NumParts; ++i)
-      getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT);
+      getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V);
   } else if (NumParts > 0) {
     // If the intermediate type was expanded, split each the value into
     // legal parts.
@@ -538,13 +564,10 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
            "Must expand into a divisible number of parts!");
     unsigned Factor = NumParts / NumIntermediates;
     for (unsigned i = 0; i != NumIntermediates; ++i)
-      getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT);
+      getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT, V);
   }
 }
 
-
-
-
 namespace {
   /// RegsForValue - This struct represents the registers (physical or virtual)
   /// that a particular set of values is assigned, and the type information
@@ -570,7 +593,7 @@ namespace {
     /// getRegisterType member function, however when with physical registers
     /// it is necessary to have a separate record of the types.
     ///
-    SmallVector<EVT, 4> RegVTs;
+    SmallVector<MVT, 4> RegVTs;
 
     /// Regs - This list holds the registers assigned to the values.
     /// Each legal or promoted value requires one register, and each
@@ -581,7 +604,7 @@ namespace {
     RegsForValue() {}
 
     RegsForValue(const SmallVector<unsigned, 4> &regs,
-                 EVT regvt, EVT valuevt)
+                 MVT regvt, EVT valuevt)
       : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
 
     RegsForValue(LLVMContext &Context, const TargetLowering &tli,
@@ -591,7 +614,7 @@ namespace {
       for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
         EVT ValueVT = ValueVTs[Value];
         unsigned NumRegs = tli.getNumRegisters(Context, ValueVT);
-        EVT RegisterVT = tli.getRegisterType(Context, ValueVT);
+        MVT RegisterVT = tli.getRegisterType(Context, ValueVT);
         for (unsigned i = 0; i != NumRegs; ++i)
           Regs.push_back(Reg + i);
         RegVTs.push_back(RegisterVT);
@@ -602,7 +625,7 @@ namespace {
     /// areValueTypesLegal - Return true if types of all the values are legal.
     bool areValueTypesLegal(const TargetLowering &TLI) {
       for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
-        EVT RegisterVT = RegVTs[Value];
+        MVT RegisterVT = RegVTs[Value];
         if (!TLI.isTypeLegal(RegisterVT))
           return false;
       }
@@ -622,14 +645,15 @@ namespace {
     /// If the Flag pointer is NULL, no flag is used.
     SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo,
                             DebugLoc dl,
-                            SDValue &Chain, SDValue *Flag) const;
+                            SDValue &Chain, SDValue *Flag,
+                            const Value *V = 0) const;
 
     /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
     /// specified value into the registers specified by this object.  This uses
     /// Chain/Flag as the input and updates them for the output Chain/Flag.
     /// If the Flag pointer is NULL, no flag is used.
     void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
-                       SDValue &Chain, SDValue *Flag) const;
+                       SDValue &Chain, SDValue *Flag, const Value *V) const;
 
     /// AddInlineAsmOperands - Add this value to the specified inlineasm node
     /// operand list.  This adds the code marker, matching input operand index
@@ -648,7 +672,8 @@ namespace {
 SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
                                       FunctionLoweringInfo &FuncInfo,
                                       DebugLoc dl,
-                                      SDValue &Chain, SDValue *Flag) const {
+                                      SDValue &Chain, SDValue *Flag,
+                                      const Value *V) const {
   // A Value with type {} or [0 x %t] needs no registers.
   if (ValueVTs.empty())
     return SDValue();
@@ -662,7 +687,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
     // Copy the legal parts from the registers.
     EVT ValueVT = ValueVTs[Value];
     unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
-    EVT RegisterVT = RegVTs[Value];
+    MVT RegisterVT = RegVTs[Value];
 
     Parts.resize(NumRegs);
     for (unsigned i = 0; i != NumRegs; ++i) {
@@ -722,7 +747,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
     }
 
     Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
-                                     NumRegs, RegisterVT, ValueVT);
+                                     NumRegs, RegisterVT, ValueVT, V);
     Part += NumRegs;
     Parts.clear();
   }
@@ -737,7 +762,8 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
 /// Chain/Flag as the input and updates them for the output Chain/Flag.
 /// If the Flag pointer is NULL, no flag is used.
 void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
-                                 SDValue &Chain, SDValue *Flag) const {
+                                 SDValue &Chain, SDValue *Flag,
+                                 const Value *V) const {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 
   // Get the list of the values's legal parts.
@@ -746,10 +772,12 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
   for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
     EVT ValueVT = ValueVTs[Value];
     unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
-    EVT RegisterVT = RegVTs[Value];
+    MVT RegisterVT = RegVTs[Value];
+    ISD::NodeType ExtendKind =
+      TLI.isZExtFree(Val, RegisterVT)? ISD::ZERO_EXTEND: ISD::ANY_EXTEND;
 
     getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value),
-                   &Parts[Part], NumParts, RegisterVT);
+                   &Parts[Part], NumParts, RegisterVT, V, ExtendKind);
     Part += NumParts;
   }
 
@@ -812,7 +840,7 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
 
   for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
     unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
-    EVT RegisterVT = RegVTs[Value];
+    MVT RegisterVT = RegVTs[Value];
     for (unsigned i = 0; i != NumRegs; ++i) {
       assert(Reg < Regs.size() && "Mismatch in # registers expected");
       Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT));
@@ -825,7 +853,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa,
   AA = &aa;
   GFI = gfi;
   LibInfo = li;
-  TD = DAG.getTarget().getTargetData();
+  TD = DAG.getTarget().getDataLayout();
   Context = DAG.getContext();
   LPadToCallSiteMap.clear();
 }
@@ -945,7 +973,7 @@ void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
     // Build the switch statement using the Instruction.def file.
 #define HANDLE_INST(NUM, OPCODE, CLASS) \
     case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break;
-#include "llvm/Instruction.def"
+#include "llvm/IR/Instruction.def"
   }
 
   // Assign the ordering to the freshly created DAG nodes.
@@ -994,7 +1022,7 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
     unsigned InReg = It->second;
     RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType());
     SDValue Chain = DAG.getEntryNode();
-    N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL);
+    N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL, V);
     resolveDanglingDebugInfo(V, N);
     return N;
   }
@@ -1149,7 +1177,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
     unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
     RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType());
     SDValue Chain = DAG.getEntryNode();
-    return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL);
+    return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL, V);
   }
 
   llvm_unreachable("Can't get register for value!");
@@ -1205,24 +1233,27 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
         ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
 
         const Function *F = I.getParent()->getParent();
-        if (F->paramHasAttr(0, Attribute::SExt))
+        if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+                                            Attribute::SExt))
           ExtendKind = ISD::SIGN_EXTEND;
-        else if (F->paramHasAttr(0, Attribute::ZExt))
+        else if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+                                                 Attribute::ZExt))
           ExtendKind = ISD::ZERO_EXTEND;
 
         if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
-          VT = TLI.getTypeForExtArgOrReturn(*DAG.getContext(), VT, ExtendKind);
+          VT = TLI.getTypeForExtArgOrReturn(VT.getSimpleVT(), ExtendKind);
 
         unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT);
-        EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT);
+        MVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT);
         SmallVector<SDValue, 4> Parts(NumParts);
         getCopyToParts(DAG, getCurDebugLoc(),
                        SDValue(RetOp.getNode(), RetOp.getResNo() + j),
-                       &Parts[0], NumParts, PartVT, ExtendKind);
+                       &Parts[0], NumParts, PartVT, &I, ExtendKind);
 
         // 'inreg' on function refers to return value
         ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
-        if (F->paramHasAttr(0, Attribute::InReg))
+        if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+                                            Attribute::InReg))
           Flags.setInReg();
 
         // Propagate extension type if any
@@ -1233,7 +1264,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
 
         for (unsigned i = 0; i < NumParts; ++i) {
           Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(),
-                                        /*isfixed=*/true));
+                                        /*isfixed=*/true, 0, 0));
           OutVals.push_back(Parts[i]);
         }
       }
@@ -1736,8 +1767,8 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
     Sub = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), VT);
   }
 
-  B.RegVT = VT;
-  B.Reg = FuncInfo.CreateReg(VT);
+  B.RegVT = VT.getSimpleVT();
+  B.Reg = FuncInfo.CreateReg(B.RegVT);
   SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
                                     B.Reg, Sub);
 
@@ -1771,7 +1802,7 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
                                            unsigned Reg,
                                            BitTestCase &B,
                                            MachineBasicBlock *SwitchBB) {
-  EVT VT = BB.RegVT;
+  MVT VT = BB.RegVT;
   SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(),
                                        Reg, VT);
   SDValue Cmp;
@@ -2093,7 +2124,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR,
   for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I)
     TSize += I->size();
 
-  if (!areJTsAllowed(TLI) || TSize.ult(4))
+  if (!areJTsAllowed(TLI) || TSize.ult(TLI.getMinimumJumpTableEntries()))
     return false;
 
   APInt Range = ComputeRange(First, Last);
@@ -2565,9 +2596,10 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
     if (handleSmallSwitchRange(CR, WorkList, SV, Default, SwitchMBB))
       continue;
 
-    // If the switch has more than 5 blocks, and at least 40% dense, and the
+    // If the switch has more than N blocks, and is at least 40% dense, and the
     // target supports indirect branches, then emit a jump table rather than
     // lowering the switch to a binary tree of conditional branches.
+    // N defaults to 4 and is controlled via TLS.getMinimumJumpTableEntries().
     if (handleJTSwitchCase(CR, WorkList, SV, Default, SwitchMBB))
       continue;
 
@@ -2581,14 +2613,14 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
   MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB;
 
   // Update machine-CFG edges with unique successors.
-  SmallVector<BasicBlock*, 32> succs;
-  succs.reserve(I.getNumSuccessors());
-  for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i)
-    succs.push_back(I.getSuccessor(i));
-  array_pod_sort(succs.begin(), succs.end());
-  succs.erase(std::unique(succs.begin(), succs.end()), succs.end());
-  for (unsigned i = 0, e = succs.size(); i != e; ++i) {
-    MachineBasicBlock *Succ = FuncInfo.MBBMap[succs[i]];
+  SmallSet<BasicBlock*, 32> Done;
+  for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) {
+    BasicBlock *BB = I.getSuccessor(i);
+    bool Inserted = Done.insert(BB);
+    if (!Inserted)
+        continue;
+
+    MachineBasicBlock *Succ = FuncInfo.MBBMap[BB];
     addSuccessorWithWeight(IndirectBrMBB, Succ);
   }
 
@@ -3114,12 +3146,12 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
        OI != E; ++OI) {
     const Value *Idx = *OI;
     if (StructType *StTy = dyn_cast<StructType>(Ty)) {
-      unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
+      unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue();
       if (Field) {
         // N = N + Offset
         uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field);
         N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N,
-                        DAG.getIntPtrConstant(Offset));
+                        DAG.getConstant(Offset, N.getValueType()));
       }
 
       Ty = StTy->getElementType(Field);
@@ -3164,7 +3196,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
                              N.getValueType(), IdxN,
                              DAG.getConstant(Amt, IdxN.getValueType()));
         } else {
-          SDValue Scale = DAG.getConstant(ElementSize, TLI.getPointerTy());
+          SDValue Scale = DAG.getConstant(ElementSize, IdxN.getValueType());
           IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(),
                              N.getValueType(), IdxN, Scale);
         }
@@ -3185,9 +3217,9 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
     return;   // getValue will auto-populate this.
 
   Type *Ty = I.getAllocatedType();
-  uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
+  uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty);
   unsigned Align =
-    std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
+    std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty),
              I.getAlignment());
 
   SDValue AllocSize = getValue(I.getArraySize());
@@ -3664,16 +3696,12 @@ getF32Constant(SelectionDAG &DAG, unsigned Flt) {
   return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32);
 }
 
-/// visitExp - Lower an exp intrinsic. Handles the special sequences for
+/// expandExp - Lower an exp intrinsic. Handles the special sequences for
 /// limited-precision mode.
-void
-SelectionDAGBuilder::visitExp(const CallInst &I) {
-  SDValue result;
-  DebugLoc dl = getCurDebugLoc();
-
-  if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
+static SDValue expandExp(DebugLoc dl, SDValue Op, SelectionDAG &DAG,
+                         const TargetLowering &TLI) {
+  if (Op.getValueType() == MVT::f32 &&
       LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
-    SDValue Op = getValue(I.getArgOperand(0));
 
     // Put the exponent in the right bit position for later addition to the
     // final result:
@@ -3692,6 +3720,7 @@ SelectionDAGBuilder::visitExp(const CallInst &I) {
     IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
                                  DAG.getConstant(23, TLI.getPointerTy()));
 
+    SDValue TwoToFracPartOfX;
     if (LimitFloatPrecision <= 6) {
       // For floating-point precision of 6:
       //
@@ -3705,16 +3734,9 @@ SelectionDAGBuilder::visitExp(const CallInst &I) {
       SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
                                getF32Constant(DAG, 0x3f3c50c8));
       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
-      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
-                               getF32Constant(DAG, 0x3f7f5e7e));
-      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,MVT::i32, t5);
-
-      // Add the exponent into the result in integer domain.
-      SDValue t6 = DAG.getNode(ISD::ADD, dl, MVT::i32,
-                               TwoToFracPartOfX, IntegerPartOfX);
-
-      result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t6);
-    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+      TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                                     getF32Constant(DAG, 0x3f7f5e7e));
+    } else if (LimitFloatPrecision <= 12) {
       // For floating-point precision of 12:
       //
       //   TwoToFractionalPartOfX =
@@ -3731,16 +3753,9 @@ SelectionDAGBuilder::visitExp(const CallInst &I) {
       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
                                getF32Constant(DAG, 0x3f324b07));
       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
-      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
-                               getF32Constant(DAG, 0x3f7ff8fd));
-      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,MVT::i32, t7);
-
-      // Add the exponent into the result in integer domain.
-      SDValue t8 = DAG.getNode(ISD::ADD, dl, MVT::i32,
-                               TwoToFracPartOfX, IntegerPartOfX);
-
-      result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t8);
-    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+      TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+                                     getF32Constant(DAG, 0x3f7ff8fd));
+    } else { // LimitFloatPrecision <= 18
       // For floating-point precision of 18:
       //
       //   TwoToFractionalPartOfX =
@@ -3769,37 +3784,27 @@ SelectionDAGBuilder::visitExp(const CallInst &I) {
       SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
                                 getF32Constant(DAG, 0x3f317234));
       SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
-      SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
-                                getF32Constant(DAG, 0x3f800000));
-      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,
-                                             MVT::i32, t13);
-
-      // Add the exponent into the result in integer domain.
-      SDValue t14 = DAG.getNode(ISD::ADD, dl, MVT::i32,
-                                TwoToFracPartOfX, IntegerPartOfX);
-
-      result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t14);
+      TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+                                     getF32Constant(DAG, 0x3f800000));
     }
-  } else {
-    // No special expansion.
-    result = DAG.getNode(ISD::FEXP, dl,
-                         getValue(I.getArgOperand(0)).getValueType(),
-                         getValue(I.getArgOperand(0)));
+
+    // Add the exponent into the result in integer domain.
+    SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFracPartOfX);
+    return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
+                       DAG.getNode(ISD::ADD, dl, MVT::i32,
+                                   t13, IntegerPartOfX));
   }
 
-  setValue(&I, result);
+  // No special expansion.
+  return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op);
 }
 
-/// visitLog - Lower a log intrinsic. Handles the special sequences for
+/// expandLog - Lower a log intrinsic. Handles the special sequences for
 /// limited-precision mode.
-void
-SelectionDAGBuilder::visitLog(const CallInst &I) {
-  SDValue result;
-  DebugLoc dl = getCurDebugLoc();
-
-  if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
+static SDValue expandLog(DebugLoc dl, SDValue Op, SelectionDAG &DAG,
+                         const TargetLowering &TLI) {
+  if (Op.getValueType() == MVT::f32 &&
       LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
-    SDValue Op = getValue(I.getArgOperand(0));
     SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
 
     // Scale the exponent by log(2) [0.69314718f].
@@ -3811,6 +3816,7 @@ SelectionDAGBuilder::visitLog(const CallInst &I) {
     // exponent of 1.
     SDValue X = GetSignificand(DAG, Op1, dl);
 
+    SDValue LogOfMantissa;
     if (LimitFloatPrecision <= 6) {
       // For floating-point precision of 6:
       //
@@ -3824,12 +3830,9 @@ SelectionDAGBuilder::visitLog(const CallInst &I) {
       SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
                                getF32Constant(DAG, 0x3fb3a2b1));
       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
-      SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
-                                          getF32Constant(DAG, 0x3f949a29));
-
-      result = DAG.getNode(ISD::FADD, dl,
-                           MVT::f32, LogOfExponent, LogOfMantissa);
-    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+      LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+                                  getF32Constant(DAG, 0x3f949a29));
+    } else if (LimitFloatPrecision <= 12) {
       // For floating-point precision of 12:
       //
       //   LogOfMantissa =
@@ -3850,12 +3853,9 @@ SelectionDAGBuilder::visitLog(const CallInst &I) {
       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
                                getF32Constant(DAG, 0x40348e95));
       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
-      SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
-                                          getF32Constant(DAG, 0x3fdef31a));
-
-      result = DAG.getNode(ISD::FADD, dl,
-                           MVT::f32, LogOfExponent, LogOfMantissa);
-    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+      LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+                                  getF32Constant(DAG, 0x3fdef31a));
+    } else { // LimitFloatPrecision <= 18
       // For floating-point precision of 18:
       //
       //   LogOfMantissa =
@@ -3884,32 +3884,23 @@ SelectionDAGBuilder::visitLog(const CallInst &I) {
       SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
                                getF32Constant(DAG, 0x408797cb));
       SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
-      SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
-                                          getF32Constant(DAG, 0x4006dcab));
-
-      result = DAG.getNode(ISD::FADD, dl,
-                           MVT::f32, LogOfExponent, LogOfMantissa);
+      LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
+                                  getF32Constant(DAG, 0x4006dcab));
     }
-  } else {
-    // No special expansion.
-    result = DAG.getNode(ISD::FLOG, dl,
-                         getValue(I.getArgOperand(0)).getValueType(),
-                         getValue(I.getArgOperand(0)));
+
+    return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa);
   }
 
-  setValue(&I, result);
+  // No special expansion.
+  return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op);
 }
 
-/// visitLog2 - Lower a log2 intrinsic. Handles the special sequences for
+/// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for
 /// limited-precision mode.
-void
-SelectionDAGBuilder::visitLog2(const CallInst &I) {
-  SDValue result;
-  DebugLoc dl = getCurDebugLoc();
-
-  if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
+static SDValue expandLog2(DebugLoc dl, SDValue Op, SelectionDAG &DAG,
+                          const TargetLowering &TLI) {
+  if (Op.getValueType() == MVT::f32 &&
       LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
-    SDValue Op = getValue(I.getArgOperand(0));
     SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
 
     // Get the exponent.
@@ -3921,6 +3912,7 @@ SelectionDAGBuilder::visitLog2(const CallInst &I) {
 
     // Different possible minimax approximations of significand in
     // floating-point for various degrees of accuracy over [1,2].
+    SDValue Log2ofMantissa;
     if (LimitFloatPrecision <= 6) {
       // For floating-point precision of 6:
       //
@@ -3932,12 +3924,9 @@ SelectionDAGBuilder::visitLog2(const CallInst &I) {
       SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
                                getF32Constant(DAG, 0x40019463));
       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
-      SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
-                                           getF32Constant(DAG, 0x3fd6633d));
-
-      result = DAG.getNode(ISD::FADD, dl,
-                           MVT::f32, LogOfExponent, Log2ofMantissa);
-    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+      Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+                                   getF32Constant(DAG, 0x3fd6633d));
+    } else if (LimitFloatPrecision <= 12) {
       // For floating-point precision of 12:
       //
       //   Log2ofMantissa =
@@ -3958,12 +3947,9 @@ SelectionDAGBuilder::visitLog2(const CallInst &I) {
       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
                                getF32Constant(DAG, 0x40823e2f));
       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
-      SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
-                                           getF32Constant(DAG, 0x4020d29c));
-
-      result = DAG.getNode(ISD::FADD, dl,
-                           MVT::f32, LogOfExponent, Log2ofMantissa);
-    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+      Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+                                   getF32Constant(DAG, 0x4020d29c));
+    } else { // LimitFloatPrecision <= 18
       // For floating-point precision of 18:
       //
       //   Log2ofMantissa =
@@ -3993,32 +3979,23 @@ SelectionDAGBuilder::visitLog2(const CallInst &I) {
       SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
                                getF32Constant(DAG, 0x40c39dad));
       SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
-      SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
-                                           getF32Constant(DAG, 0x4042902c));
-
-      result = DAG.getNode(ISD::FADD, dl,
-                           MVT::f32, LogOfExponent, Log2ofMantissa);
+      Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
+                                   getF32Constant(DAG, 0x4042902c));
     }
-  } else {
-    // No special expansion.
-    result = DAG.getNode(ISD::FLOG2, dl,
-                         getValue(I.getArgOperand(0)).getValueType(),
-                         getValue(I.getArgOperand(0)));
+
+    return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa);
   }
 
-  setValue(&I, result);
+  // No special expansion.
+  return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op);
 }
 
-/// visitLog10 - Lower a log10 intrinsic. Handles the special sequences for
+/// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for
 /// limited-precision mode.
-void
-SelectionDAGBuilder::visitLog10(const CallInst &I) {
-  SDValue result;
-  DebugLoc dl = getCurDebugLoc();
-
-  if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
+static SDValue expandLog10(DebugLoc dl, SDValue Op, SelectionDAG &DAG,
+                           const TargetLowering &TLI) {
+  if (Op.getValueType() == MVT::f32 &&
       LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
-    SDValue Op = getValue(I.getArgOperand(0));
     SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
 
     // Scale the exponent by log10(2) [0.30102999f].
@@ -4030,6 +4007,7 @@ SelectionDAGBuilder::visitLog10(const CallInst &I) {
     // exponent of 1.
     SDValue X = GetSignificand(DAG, Op1, dl);
 
+    SDValue Log10ofMantissa;
     if (LimitFloatPrecision <= 6) {
       // For floating-point precision of 6:
       //
@@ -4043,12 +4021,9 @@ SelectionDAGBuilder::visitLog10(const CallInst &I) {
       SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
                                getF32Constant(DAG, 0x3f1c0789));
       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
-      SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
-                                            getF32Constant(DAG, 0x3f011300));
-
-      result = DAG.getNode(ISD::FADD, dl,
-                           MVT::f32, LogOfExponent, Log10ofMantissa);
-    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+      Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+                                    getF32Constant(DAG, 0x3f011300));
+    } else if (LimitFloatPrecision <= 12) {
       // For floating-point precision of 12:
       //
       //   Log10ofMantissa =
@@ -4065,12 +4040,9 @@ SelectionDAGBuilder::visitLog10(const CallInst &I) {
       SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
                                getF32Constant(DAG, 0x3f6ae232));
       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
-      SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
-                                            getF32Constant(DAG, 0x3f25f7c3));
-
-      result = DAG.getNode(ISD::FADD, dl,
-                           MVT::f32, LogOfExponent, Log10ofMantissa);
-    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+      Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
+                                    getF32Constant(DAG, 0x3f25f7c3));
+    } else { // LimitFloatPrecision <= 18
       // For floating-point precision of 18:
       //
       //   Log10ofMantissa =
@@ -4095,33 +4067,23 @@ SelectionDAGBuilder::visitLog10(const CallInst &I) {
       SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
                                getF32Constant(DAG, 0x3fc4316c));
       SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
-      SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
-                                            getF32Constant(DAG, 0x3f57ce70));
-
-      result = DAG.getNode(ISD::FADD, dl,
-                           MVT::f32, LogOfExponent, Log10ofMantissa);
+      Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
+                                    getF32Constant(DAG, 0x3f57ce70));
     }
-  } else {
-    // No special expansion.
-    result = DAG.getNode(ISD::FLOG10, dl,
-                         getValue(I.getArgOperand(0)).getValueType(),
-                         getValue(I.getArgOperand(0)));
+
+    return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa);
   }
 
-  setValue(&I, result);
+  // No special expansion.
+  return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op);
 }
 
-/// visitExp2 - Lower an exp2 intrinsic. Handles the special sequences for
+/// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for
 /// limited-precision mode.
-void
-SelectionDAGBuilder::visitExp2(const CallInst &I) {
-  SDValue result;
-  DebugLoc dl = getCurDebugLoc();
-
-  if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
+static SDValue expandExp2(DebugLoc dl, SDValue Op, SelectionDAG &DAG,
+                          const TargetLowering &TLI) {
+  if (Op.getValueType() == MVT::f32 &&
       LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
-    SDValue Op = getValue(I.getArgOperand(0));
-
     SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op);
 
     //   FractionalPartOfX = x - (float)IntegerPartOfX;
@@ -4132,6 +4094,7 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) {
     IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
                                  DAG.getConstant(23, TLI.getPointerTy()));
 
+    SDValue TwoToFractionalPartOfX;
     if (LimitFloatPrecision <= 6) {
       // For floating-point precision of 6:
       //
@@ -4145,15 +4108,9 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) {
       SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
                                getF32Constant(DAG, 0x3f3c50c8));
       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
-      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
-                               getF32Constant(DAG, 0x3f7f5e7e));
-      SDValue t6 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t5);
-      SDValue TwoToFractionalPartOfX =
-        DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
-
-      result = DAG.getNode(ISD::BITCAST, dl,
-                           MVT::f32, TwoToFractionalPartOfX);
-    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+      TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                                           getF32Constant(DAG, 0x3f7f5e7e));
+    } else if (LimitFloatPrecision <= 12) {
       // For floating-point precision of 12:
       //
       //   TwoToFractionalPartOfX =
@@ -4170,15 +4127,9 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) {
       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
                                getF32Constant(DAG, 0x3f324b07));
       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
-      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
-                               getF32Constant(DAG, 0x3f7ff8fd));
-      SDValue t8 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t7);
-      SDValue TwoToFractionalPartOfX =
-        DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
-
-      result = DAG.getNode(ISD::BITCAST, dl,
-                           MVT::f32, TwoToFractionalPartOfX);
-    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+      TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+                                           getF32Constant(DAG, 0x3f7ff8fd));
+    } else { // LimitFloatPrecision <= 18
       // For floating-point precision of 18:
       //
       //   TwoToFractionalPartOfX =
@@ -4206,54 +4157,42 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) {
       SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
                                 getF32Constant(DAG, 0x3f317234));
       SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
-      SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
-                                getF32Constant(DAG, 0x3f800000));
-      SDValue t14 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t13);
-      SDValue TwoToFractionalPartOfX =
-        DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
-
-      result = DAG.getNode(ISD::BITCAST, dl,
-                           MVT::f32, TwoToFractionalPartOfX);
+      TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+                                           getF32Constant(DAG, 0x3f800000));
     }
-  } else {
-    // No special expansion.
-    result = DAG.getNode(ISD::FEXP2, dl,
-                         getValue(I.getArgOperand(0)).getValueType(),
-                         getValue(I.getArgOperand(0)));
+
+    // Add the exponent into the result in integer domain.
+    SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32,
+                              TwoToFractionalPartOfX);
+    return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
+                       DAG.getNode(ISD::ADD, dl, MVT::i32,
+                                   t13, IntegerPartOfX));
   }
 
-  setValue(&I, result);
+  // No special expansion.
+  return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op);
 }
 
 /// visitPow - Lower a pow intrinsic. Handles the special sequences for
 /// limited-precision mode with x == 10.0f.
-void
-SelectionDAGBuilder::visitPow(const CallInst &I) {
-  SDValue result;
-  const Value *Val = I.getArgOperand(0);
-  DebugLoc dl = getCurDebugLoc();
+static SDValue expandPow(DebugLoc dl, SDValue LHS, SDValue RHS,
+                         SelectionDAG &DAG, const TargetLowering &TLI) {
   bool IsExp10 = false;
-
-  if (getValue(Val).getValueType() == MVT::f32 &&
-      getValue(I.getArgOperand(1)).getValueType() == MVT::f32 &&
+  if (LHS.getValueType() == MVT::f32 && LHS.getValueType() == MVT::f32 &&
       LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
-    if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(Val))) {
-      if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
-        APFloat Ten(10.0f);
-        IsExp10 = CFP->getValueAPF().bitwiseIsEqual(Ten);
-      }
+    if (ConstantFPSDNode *LHSC = dyn_cast<ConstantFPSDNode>(LHS)) {
+      APFloat Ten(10.0f);
+      IsExp10 = LHSC->isExactlyValue(Ten);
     }
   }
 
-  if (IsExp10 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
-    SDValue Op = getValue(I.getArgOperand(1));
-
+  if (IsExp10) {
     // Put the exponent in the right bit position for later addition to the
     // final result:
     //
     //   #define LOG2OF10 3.3219281f
     //   IntegerPartOfX = (int32_t)(x * LOG2OF10);
-    SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
+    SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS,
                              getF32Constant(DAG, 0x40549a78));
     SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
 
@@ -4265,6 +4204,7 @@ SelectionDAGBuilder::visitPow(const CallInst &I) {
     IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
                                  DAG.getConstant(23, TLI.getPointerTy()));
 
+    SDValue TwoToFractionalPartOfX;
     if (LimitFloatPrecision <= 6) {
       // For floating-point precision of 6:
       //
@@ -4278,15 +4218,9 @@ SelectionDAGBuilder::visitPow(const CallInst &I) {
       SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
                                getF32Constant(DAG, 0x3f3c50c8));
       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
-      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
-                               getF32Constant(DAG, 0x3f7f5e7e));
-      SDValue t6 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t5);
-      SDValue TwoToFractionalPartOfX =
-        DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
-
-      result = DAG.getNode(ISD::BITCAST, dl,
-                           MVT::f32, TwoToFractionalPartOfX);
-    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+      TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                                           getF32Constant(DAG, 0x3f7f5e7e));
+    } else if (LimitFloatPrecision <= 12) {
       // For floating-point precision of 12:
       //
       //   TwoToFractionalPartOfX =
@@ -4303,15 +4237,9 @@ SelectionDAGBuilder::visitPow(const CallInst &I) {
       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
                                getF32Constant(DAG, 0x3f324b07));
       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
-      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
-                               getF32Constant(DAG, 0x3f7ff8fd));
-      SDValue t8 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t7);
-      SDValue TwoToFractionalPartOfX =
-        DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
-
-      result = DAG.getNode(ISD::BITCAST, dl,
-                           MVT::f32, TwoToFractionalPartOfX);
-    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+      TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+                                           getF32Constant(DAG, 0x3f7ff8fd));
+    } else { // LimitFloatPrecision <= 18
       // For floating-point precision of 18:
       //
       //   TwoToFractionalPartOfX =
@@ -4339,24 +4267,18 @@ SelectionDAGBuilder::visitPow(const CallInst &I) {
       SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
                                 getF32Constant(DAG, 0x3f317234));
       SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
-      SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
-                                getF32Constant(DAG, 0x3f800000));
-      SDValue t14 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t13);
-      SDValue TwoToFractionalPartOfX =
-        DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
-
-      result = DAG.getNode(ISD::BITCAST, dl,
-                           MVT::f32, TwoToFractionalPartOfX);
+      TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+                                           getF32Constant(DAG, 0x3f800000));
     }
-  } else {
-    // No special expansion.
-    result = DAG.getNode(ISD::FPOW, dl,
-                         getValue(I.getArgOperand(0)).getValueType(),
-                         getValue(I.getArgOperand(0)),
-                         getValue(I.getArgOperand(1)));
+
+    SDValue t13 = DAG.getNode(ISD::BITCAST, dl,MVT::i32,TwoToFractionalPartOfX);
+    return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
+                       DAG.getNode(ISD::ADD, dl, MVT::i32,
+                                   t13, IntegerPartOfX));
   }
 
-  setValue(&I, result);
+  // No special expansion.
+  return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS);
 }
 
 
@@ -4377,7 +4299,8 @@ static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS,
       return DAG.getConstantFP(1.0, LHS.getValueType());
 
     const Function *F = DAG.getMachineFunction().getFunction();
-    if (!F->hasFnAttr(Attribute::OptimizeForSize) ||
+    if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                         Attribute::OptimizeForSize) ||
         // If optimizing for size, don't insert too many multiplies.  This
         // inserts up to 5 multiplies.
         CountPopulation_32(Val)+Log2_32(Val) < 7) {
@@ -4850,7 +4773,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits
     // to be zero.
     // We must do this early because v2i32 is not a legal type.
-    DebugLoc dl = getCurDebugLoc();
     SDValue ShOps[2];
     ShOps[0] = ShAmt;
     ShOps[1] = DAG.getConstant(0, MVT::i32);
@@ -4867,7 +4789,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::x86_avx_vinsertf128_ps_256:
   case Intrinsic::x86_avx_vinsertf128_si_256:
   case Intrinsic::x86_avx2_vinserti128: {
-    DebugLoc dl = getCurDebugLoc();
     EVT DestVT = TLI.getValueType(I.getType());
     EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType());
     uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue() & 1) *
@@ -4883,7 +4804,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::x86_avx_vextractf128_ps_256:
   case Intrinsic::x86_avx_vextractf128_si_256:
   case Intrinsic::x86_avx2_vextracti128: {
-    DebugLoc dl = getCurDebugLoc();
     EVT DestVT = TLI.getValueType(I.getType());
     uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(1))->getZExtValue() & 1) *
                    DestVT.getVectorNumElements();
@@ -4917,7 +4837,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     }
     EVT DestVT = TLI.getValueType(I.getType());
     const Value *Op1 = I.getArgOperand(0);
-    Res = DAG.getConvertRndSat(DestVT, getCurDebugLoc(), getValue(Op1),
+    Res = DAG.getConvertRndSat(DestVT, dl, getValue(Op1),
                                DAG.getValueType(DestVT),
                                DAG.getValueType(getValue(Op1).getValueType()),
                                getValue(I.getArgOperand(1)),
@@ -4926,53 +4846,57 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     setValue(&I, Res);
     return 0;
   }
-  case Intrinsic::sqrt:
-    setValue(&I, DAG.getNode(ISD::FSQRT, dl,
-                             getValue(I.getArgOperand(0)).getValueType(),
-                             getValue(I.getArgOperand(0))));
-    return 0;
   case Intrinsic::powi:
     setValue(&I, ExpandPowI(dl, getValue(I.getArgOperand(0)),
                             getValue(I.getArgOperand(1)), DAG));
     return 0;
-  case Intrinsic::sin:
-    setValue(&I, DAG.getNode(ISD::FSIN, dl,
-                             getValue(I.getArgOperand(0)).getValueType(),
-                             getValue(I.getArgOperand(0))));
-    return 0;
-  case Intrinsic::cos:
-    setValue(&I, DAG.getNode(ISD::FCOS, dl,
-                             getValue(I.getArgOperand(0)).getValueType(),
-                             getValue(I.getArgOperand(0))));
-    return 0;
   case Intrinsic::log:
-    visitLog(I);
+    setValue(&I, expandLog(dl, getValue(I.getArgOperand(0)), DAG, TLI));
     return 0;
   case Intrinsic::log2:
-    visitLog2(I);
+    setValue(&I, expandLog2(dl, getValue(I.getArgOperand(0)), DAG, TLI));
     return 0;
   case Intrinsic::log10:
-    visitLog10(I);
+    setValue(&I, expandLog10(dl, getValue(I.getArgOperand(0)), DAG, TLI));
     return 0;
   case Intrinsic::exp:
-    visitExp(I);
+    setValue(&I, expandExp(dl, getValue(I.getArgOperand(0)), DAG, TLI));
     return 0;
   case Intrinsic::exp2:
-    visitExp2(I);
+    setValue(&I, expandExp2(dl, getValue(I.getArgOperand(0)), DAG, TLI));
     return 0;
   case Intrinsic::pow:
-    visitPow(I);
+    setValue(&I, expandPow(dl, getValue(I.getArgOperand(0)),
+                           getValue(I.getArgOperand(1)), DAG, TLI));
     return 0;
+  case Intrinsic::sqrt:
   case Intrinsic::fabs:
-    setValue(&I, DAG.getNode(ISD::FABS, dl,
-                             getValue(I.getArgOperand(0)).getValueType(),
-                             getValue(I.getArgOperand(0))));
-    return 0;
+  case Intrinsic::sin:
+  case Intrinsic::cos:
   case Intrinsic::floor:
-    setValue(&I, DAG.getNode(ISD::FFLOOR, dl,
+  case Intrinsic::ceil:
+  case Intrinsic::trunc:
+  case Intrinsic::rint:
+  case Intrinsic::nearbyint: {
+    unsigned Opcode;
+    switch (Intrinsic) {
+    default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
+    case Intrinsic::sqrt:      Opcode = ISD::FSQRT;      break;
+    case Intrinsic::fabs:      Opcode = ISD::FABS;       break;
+    case Intrinsic::sin:       Opcode = ISD::FSIN;       break;
+    case Intrinsic::cos:       Opcode = ISD::FCOS;       break;
+    case Intrinsic::floor:     Opcode = ISD::FFLOOR;     break;
+    case Intrinsic::ceil:      Opcode = ISD::FCEIL;      break;
+    case Intrinsic::trunc:     Opcode = ISD::FTRUNC;     break;
+    case Intrinsic::rint:      Opcode = ISD::FRINT;      break;
+    case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
+    }
+
+    setValue(&I, DAG.getNode(Opcode, dl,
                              getValue(I.getArgOperand(0)).getValueType(),
                              getValue(I.getArgOperand(0))));
     return 0;
+  }
   case Intrinsic::fma:
     setValue(&I, DAG.getNode(ISD::FMA, dl,
                              getValue(I.getArgOperand(0)).getValueType(),
@@ -4983,7 +4907,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::fmuladd: {
     EVT VT = TLI.getValueType(I.getType());
     if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
-        TLI.isOperationLegal(ISD::FMA, VT) &&
+        TLI.isOperationLegalOrCustom(ISD::FMA, VT) &&
         TLI.isFMAFasterThanMulAndAdd(VT)){
       setValue(&I, DAG.getNode(ISD::FMA, dl,
                                getValue(I.getArgOperand(0)).getValueType(),
@@ -5080,7 +5004,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
 
     // Store the stack protector onto the stack.
-    Res = DAG.getStore(getRoot(), getCurDebugLoc(), Src, FIN,
+    Res = DAG.getStore(getRoot(), dl, Src, FIN,
                        MachinePointerInfo::getFixedStack(FI),
                        true, false, 0);
     setValue(&I, Res);
@@ -5152,10 +5076,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     return 0;
   }
 
+  case Intrinsic::debugtrap:
   case Intrinsic::trap: {
     StringRef TrapFuncName = TM.Options.getTrapFunctionName();
     if (TrapFuncName.empty()) {
-      DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot()));
+      ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ? 
+        ISD::TRAP : ISD::DEBUGTRAP;
+      DAG.setRoot(DAG.getNode(Op, dl,MVT::Other, getRoot()));
       return 0;
     }
     TargetLowering::ArgListTy Args;
@@ -5165,15 +5092,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                  /*isTailCall=*/false,
                  /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
                  DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()),
-                 Args, DAG, getCurDebugLoc());
+                 Args, DAG, dl);
     std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
     DAG.setRoot(Result.second);
     return 0;
   }
-  case Intrinsic::debugtrap: {
-    DAG.setRoot(DAG.getNode(ISD::DEBUGTRAP, dl,MVT::Other, getRoot()));
-    return 0;
-  }
+
   case Intrinsic::uadd_with_overflow:
   case Intrinsic::sadd_with_overflow:
   case Intrinsic::usub_with_overflow:
@@ -5194,7 +5118,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     SDValue Op2 = getValue(I.getArgOperand(1));
 
     SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1);
-    setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2));
+    setValue(&I, DAG.getNode(Op, dl, VTs, Op1, Op2));
     return 0;
   }
   case Intrinsic::prefetch: {
@@ -5273,8 +5197,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
 
   // Check whether the function can return without sret-demotion.
   SmallVector<ISD::OutputArg, 4> Outs;
-  GetReturnInfo(RetTy, CS.getAttributes().getRetAttributes(),
-                Outs, TLI);
+  GetReturnInfo(RetTy, CS.getAttributes(), Outs, TLI);
 
   bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
                                            DAG.getMachineFunction(),
@@ -5285,9 +5208,9 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
   int DemoteStackIdx = -100;
 
   if (!CanLowerReturn) {
-    uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(
+    uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(
                       FTy->getReturnType());
-    unsigned Align  = TLI.getTargetData()->getPrefTypeAlignment(
+    unsigned Align  = TLI.getDataLayout()->getPrefTypeAlignment(
                       FTy->getReturnType());
     MachineFunction &MF = DAG.getMachineFunction();
     DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
@@ -5357,11 +5280,6 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
       !isInTailCallPosition(CS, CS.getAttributes().getRetAttributes(), TLI))
     isTailCall = false;
 
-  // If there's a possibility that fast-isel has already selected some amount
-  // of the current basic block, don't emit a tail call.
-  if (isTailCall && TM.Options.EnableFastISel)
-    isTailCall = false;
-
   TargetLowering::
   CallLoweringInfo CLI(getRoot(), RetTy, FTy, isTailCall, Callee, Args, DAG,
                        getCurDebugLoc(), CS);
@@ -5752,7 +5670,7 @@ public:
   /// MVT::Other.
   EVT getCallOperandValEVT(LLVMContext &Context,
                            const TargetLowering &TLI,
-                           const TargetData *TD) const {
+                           const DataLayout *TD) const {
     if (CallOperandVal == 0) return MVT::Other;
 
     if (isa<BasicBlock>(CallOperandVal))
@@ -5833,7 +5751,7 @@ static void GetRegistersForValue(SelectionDAG &DAG,
       // Try to convert to the first EVT that the reg class contains.  If the
       // types are identical size, use a bitcast to convert (e.g. two differing
       // vector types).
-      EVT RegVT = *PhysReg.second->vt_begin();
+      MVT RegVT = *PhysReg.second->vt_begin();
       if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
         OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL,
                                          RegVT, OpInfo.CallOperand);
@@ -5843,8 +5761,7 @@ static void GetRegistersForValue(SelectionDAG &DAG,
         // bitcast to the corresponding integer type.  This turns an f64 value
         // into i64, which can be passed with two i32 values on a 32-bit
         // machine.
-        RegVT = EVT::getIntegerVT(Context,
-                                  OpInfo.ConstraintVT.getSizeInBits());
+        RegVT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits());
         OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL,
                                          RegVT, OpInfo.CallOperand);
         OpInfo.ConstraintVT = RegVT;
@@ -5854,7 +5771,7 @@ static void GetRegistersForValue(SelectionDAG &DAG,
     NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT);
   }
 
-  EVT RegVT;
+  MVT RegVT;
   EVT ValueVT = OpInfo.ConstraintVT;
 
   // If this is a constraint for a specific physical register, like {r17},
@@ -5928,7 +5845,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
     ConstraintOperands.push_back(SDISelAsmOperandInfo(TargetConstraints[i]));
     SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
 
-    EVT OpVT = MVT::Other;
+    MVT OpVT = MVT::Other;
 
     // Compute the value type for each operand.
     switch (OpInfo.Type) {
@@ -5943,10 +5860,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
       // corresponding argument.
       assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
       if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
-        OpVT = TLI.getValueType(STy->getElementType(ResNo));
+        OpVT = TLI.getSimpleValueType(STy->getElementType(ResNo));
       } else {
         assert(ResNo == 0 && "Asm only has one result!");
-        OpVT = TLI.getValueType(CS.getType());
+        OpVT = TLI.getSimpleValueType(CS.getType());
       }
       ++ResNo;
       break;
@@ -5967,7 +5884,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
         OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
       }
 
-      OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, TD);
+      OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, TD).
+        getSimpleVT();
     }
 
     OpInfo.ConstraintVT = OpVT;
@@ -6056,8 +5974,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
         // Otherwise, create a stack slot and emit a store to it before the
         // asm.
         Type *Ty = OpVal->getType();
-        uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
-        unsigned Align  = TLI.getTargetData()->getPrefTypeAlignment(Ty);
+        uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty);
+        unsigned Align  = TLI.getDataLayout()->getPrefTypeAlignment(Ty);
         MachineFunction &MF = DAG.getMachineFunction();
         int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
         SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
@@ -6105,7 +6023,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
   const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc");
   AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc));
 
-  // Remember the HasSideEffect, AlignStack and AsmDialect bits as operand 3.
+  // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore
+  // bits as operand 3.
   unsigned ExtraInfo = 0;
   if (IA->hasSideEffects())
     ExtraInfo |= InlineAsm::Extra_HasSideEffects;
@@ -6113,6 +6032,27 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
     ExtraInfo |= InlineAsm::Extra_IsAlignStack;
   // Set the asm dialect.
   ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect;
+
+  // Determine if this InlineAsm MayLoad or MayStore based on the constraints.
+  for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
+    TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
+
+    // Compute the constraint code and ConstraintType to use.
+    TLI.ComputeConstraintToUse(OpInfo, SDValue());
+
+    // Ideally, we would only check against memory constraints.  However, the
+    // meaning of an other constraint can be target-specific and we can't easily
+    // reason about it.  Therefore, be conservative and set MayLoad/MayStore
+    // for other constriants as well.
+    if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
+        OpInfo.ConstraintType == TargetLowering::C_Other) {
+      if (OpInfo.Type == InlineAsm::isInput)
+        ExtraInfo |= InlineAsm::Extra_MayLoad;
+      else if (OpInfo.Type == InlineAsm::isOutput)
+        ExtraInfo |= InlineAsm::Extra_MayStore;
+    }
+  }
+
   AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo,
                                                   TLI.getPointerTy()));
 
@@ -6212,7 +6152,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
 
           RegsForValue MatchedRegs;
           MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType());
-          EVT RegVT = AsmNodeOperands[CurOp+1].getValueType();
+          MVT RegVT = AsmNodeOperands[CurOp+1].getSimpleValueType();
           MatchedRegs.RegVTs.push_back(RegVT);
           MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
           for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag);
@@ -6222,7 +6162,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
 
           // Use the produced MatchedRegs object to
           MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
-                                    Chain, &Flag);
+                                    Chain, &Flag, CS.getInstruction());
           MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse,
                                            true, OpInfo.getMatchedOperand(),
                                            DAG, AsmNodeOperands);
@@ -6304,7 +6244,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
       }
 
       OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
-                                        Chain, &Flag);
+                                        Chain, &Flag, CS.getInstruction());
 
       OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0,
                                                DAG, AsmNodeOperands);
@@ -6335,7 +6275,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
   // and set it as the value of the call.
   if (!RetValRegs.Regs.empty()) {
     SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(),
-                                             Chain, &Flag);
+                                             Chain, &Flag, CS.getInstruction());
 
     // FIXME: Why don't we do this for inline asms with MRVs?
     if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) {
@@ -6375,7 +6315,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
     RegsForValue &OutRegs = IndirectStoresToEmit[i].first;
     const Value *Ptr = IndirectStoresToEmit[i].second;
     SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(),
-                                             Chain, &Flag);
+                                             Chain, &Flag, IA);
     StoresToEmit.push_back(std::make_pair(OutVal, Ptr));
   }
 
@@ -6405,7 +6345,7 @@ void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
 }
 
 void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
-  const TargetData &TD = *TLI.getTargetData();
+  const DataLayout &TD = *TLI.getDataLayout();
   SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(),
                            getRoot(), getValue(I.getOperand(0)),
                            DAG.getSrcValue(I.getOperand(0)),
@@ -6451,7 +6391,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
                            Args[i].Node.getResNo() + Value);
       ISD::ArgFlagsTy Flags;
       unsigned OriginalAlignment =
-        getTargetData()->getABITypeAlignment(ArgTy);
+        getDataLayout()->getABITypeAlignment(ArgTy);
 
       if (Args[i].isZExt)
         Flags.setZExt();
@@ -6465,7 +6405,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
         Flags.setByVal();
         PointerType *Ty = cast<PointerType>(Args[i].Ty);
         Type *ElementTy = Ty->getElementType();
-        Flags.setByValSize(getTargetData()->getTypeAllocSize(ElementTy));
+        Flags.setByValSize(getDataLayout()->getTypeAllocSize(ElementTy));
         // For ByVal, alignment should come from FE.  BE will guess if this
         // info is not there but there are cases it cannot get right.
         unsigned FrameAlign;
@@ -6479,7 +6419,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
         Flags.setNest();
       Flags.setOrigAlign(OriginalAlignment);
 
-      EVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT);
+      MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT);
       unsigned NumParts = getNumRegisters(CLI.RetTy->getContext(), VT);
       SmallVector<SDValue, 4> Parts(NumParts);
       ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
@@ -6490,12 +6430,13 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
         ExtendKind = ISD::ZERO_EXTEND;
 
       getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts,
-                     PartVT, ExtendKind);
+                     PartVT, CLI.CS ? CLI.CS->getInstruction() : 0, ExtendKind);
 
       for (unsigned j = 0; j != NumParts; ++j) {
         // if it isn't first piece, alignment must be 1
         ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(),
-                               i < CLI.NumFixedArgs);
+                               i < CLI.NumFixedArgs,
+                               i, j*Parts[j].getValueType().getStoreSize());
         if (NumParts > 1 && j == 0)
           MyFlags.Flags.setSplit();
         else if (j != 0)
@@ -6513,11 +6454,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
   ComputeValueVTs(*this, CLI.RetTy, RetTys);
   for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
     EVT VT = RetTys[I];
-    EVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
+    MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
     unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
     for (unsigned i = 0; i != NumRegs; ++i) {
       ISD::InputArg MyFlags;
-      MyFlags.VT = RegisterVT.getSimpleVT();
+      MyFlags.VT = RegisterVT;
       MyFlags.Used = CLI.IsReturnValueUsed;
       if (CLI.RetSExt)
         MyFlags.Flags.setSExt();
@@ -6567,11 +6508,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
   unsigned CurReg = 0;
   for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
     EVT VT = RetTys[I];
-    EVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
+    MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
     unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
 
     ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],
-                                            NumRegs, RegisterVT, VT,
+                                            NumRegs, RegisterVT, VT, NULL,
                                             AssertOp));
     CurReg += NumRegs;
   }
@@ -6610,7 +6551,7 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
 
   RegsForValue RFV(V->getContext(), TLI, Reg, V->getType());
   SDValue Chain = DAG.getEntryNode();
-  RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0);
+  RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0, V);
   PendingExports.push_back(Chain);
 }
 
@@ -6640,13 +6581,12 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
   const Function &F = *LLVMBB->getParent();
   SelectionDAG &DAG = SDB->DAG;
   DebugLoc dl = SDB->getCurDebugLoc();
-  const TargetData *TD = TLI.getTargetData();
+  const DataLayout *TD = TLI.getDataLayout();
   SmallVector<ISD::InputArg, 16> Ins;
 
   // Check whether the function can return without sret-demotion.
   SmallVector<ISD::OutputArg, 4> Outs;
-  GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
-                Outs, TLI);
+  GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
 
   if (!FuncInfo->CanLowerReturn) {
     // Put in an sret pointer parameter before all the other parameters.
@@ -6657,8 +6597,8 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
     // or one register.
     ISD::ArgFlagsTy Flags;
     Flags.setSRet();
-    EVT RegisterVT = TLI.getRegisterType(*DAG.getContext(), ValueVTs[0]);
-    ISD::InputArg RetArg(Flags, RegisterVT, true);
+    MVT RegisterVT = TLI.getRegisterType(*DAG.getContext(), ValueVTs[0]);
+    ISD::InputArg RetArg(Flags, RegisterVT, true, 0, 0);
     Ins.push_back(RetArg);
   }
 
@@ -6677,15 +6617,15 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
       unsigned OriginalAlignment =
         TD->getABITypeAlignment(ArgTy);
 
-      if (F.paramHasAttr(Idx, Attribute::ZExt))
+      if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt))
         Flags.setZExt();
-      if (F.paramHasAttr(Idx, Attribute::SExt))
+      if (F.getAttributes().hasAttribute(Idx, Attribute::SExt))
         Flags.setSExt();
-      if (F.paramHasAttr(Idx, Attribute::InReg))
+      if (F.getAttributes().hasAttribute(Idx, Attribute::InReg))
         Flags.setInReg();
-      if (F.paramHasAttr(Idx, Attribute::StructRet))
+      if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet))
         Flags.setSRet();
-      if (F.paramHasAttr(Idx, Attribute::ByVal)) {
+      if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) {
         Flags.setByVal();
         PointerType *Ty = cast<PointerType>(I->getType());
         Type *ElementTy = Ty->getElementType();
@@ -6699,14 +6639,15 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
           FrameAlign = TLI.getByValTypeAlignment(ElementTy);
         Flags.setByValAlign(FrameAlign);
       }
-      if (F.paramHasAttr(Idx, Attribute::Nest))
+      if (F.getAttributes().hasAttribute(Idx, Attribute::Nest))
         Flags.setNest();
       Flags.setOrigAlign(OriginalAlignment);
 
-      EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
+      MVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
       unsigned NumRegs = TLI.getNumRegisters(*CurDAG->getContext(), VT);
       for (unsigned i = 0; i != NumRegs; ++i) {
-        ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed);
+        ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed,
+                              Idx-1, i*RegisterVT.getStoreSize());
         if (NumRegs > 1 && i == 0)
           MyFlags.Flags.setSplit();
         // if it isn't first piece, alignment must be 1
@@ -6748,11 +6689,11 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
     // from the sret argument into it.
     SmallVector<EVT, 1> ValueVTs;
     ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
-    EVT VT = ValueVTs[0];
-    EVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
+    MVT VT = ValueVTs[0].getSimpleVT();
+    MVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
     ISD::NodeType AssertOp = ISD::DELETED_NODE;
     SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1,
-                                        RegVT, VT, AssertOp);
+                                        RegVT, VT, NULL, AssertOp);
 
     MachineFunction& MF = SDB->DAG.getMachineFunction();
     MachineRegisterInfo& RegInfo = MF.getRegInfo();
@@ -6781,19 +6722,19 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
 
     for (unsigned Val = 0; Val != NumValues; ++Val) {
       EVT VT = ValueVTs[Val];
-      EVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
+      MVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
       unsigned NumParts = TLI.getNumRegisters(*CurDAG->getContext(), VT);
 
       if (!I->use_empty()) {
         ISD::NodeType AssertOp = ISD::DELETED_NODE;
-        if (F.paramHasAttr(Idx, Attribute::SExt))
+        if (F.getAttributes().hasAttribute(Idx, Attribute::SExt))
           AssertOp = ISD::AssertSext;
-        else if (F.paramHasAttr(Idx, Attribute::ZExt))
+        else if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt))
           AssertOp = ISD::AssertZext;
 
         ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i],
                                              NumParts, PartVT, VT,
-                                             AssertOp));
+                                             NULL, AssertOp));
       }
 
       i += NumParts;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 3b7615a..9188945 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -14,12 +14,12 @@
 #ifndef SELECTIONDAGBUILDER_H
 #define SELECTIONDAGBUILDER_H
 
-#include "llvm/Constants.h"
-#include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <vector>
@@ -66,7 +66,7 @@ class ShuffleVectorInst;
 class SIToFPInst;
 class StoreInst;
 class SwitchInst;
-class TargetData;
+class DataLayout;
 class TargetLibraryInfo;
 class TargetLowering;
 class TruncInst;
@@ -262,7 +262,7 @@ private:
 
   struct BitTestBlock {
     BitTestBlock(APInt F, APInt R, const Value* SV,
-                 unsigned Rg, EVT RgVT, bool E,
+                 unsigned Rg, MVT RgVT, bool E,
                  MachineBasicBlock* P, MachineBasicBlock* D,
                  const BitTestInfo& C):
       First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E),
@@ -271,7 +271,7 @@ private:
     APInt Range;
     const Value *SValue;
     unsigned Reg;
-    EVT RegVT;
+    MVT RegVT;
     bool Emitted;
     MachineBasicBlock *Parent;
     MachineBasicBlock *Default;
@@ -285,7 +285,7 @@ public:
   const TargetMachine &TM;
   const TargetLowering &TLI;
   SelectionDAG &DAG;
-  const TargetData *TD;
+  const DataLayout *TD;
   AliasAnalysis *AA;
   const TargetLibraryInfo *LibInfo;
 
@@ -533,13 +533,6 @@ private:
   const char *visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
   void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
 
-  void visitPow(const CallInst &I);
-  void visitExp2(const CallInst &I);
-  void visitExp(const CallInst &I);
-  void visitLog(const CallInst &I);
-  void visitLog2(const CallInst &I);
-  void visitLog10(const CallInst &I);
-
   void visitVAStart(const CallInst &I);
   void visitVAArg(const VAArgInst &I);
   void visitVAEnd(const CallInst &I);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 75989ad..5701b13 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -11,23 +11,23 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/SelectionDAG.h"
 #include "ScheduleDAGSDNodes.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Assembly/Writer.h"
-#include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetIntrinsicInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/GraphWriter.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/StringExtras.h"
 using namespace llvm;
 
 std::string SDNode::getOperationName(const SelectionDAG *G) const {
@@ -475,11 +475,16 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
     OS << "<" << *M->getMemOperand() << ">";
   } else if (const BlockAddressSDNode *BA =
                dyn_cast<BlockAddressSDNode>(this)) {
+    int64_t offset = BA->getOffset();
     OS << "<";
     WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false);
     OS << ", ";
     WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false);
     OS << ">";
+    if (offset > 0)
+      OS << " + " << offset;
+    else
+      OS << " " << offset;
     if (unsigned int TF = BA->getTargetFlags())
       OS << " [TF=" << TF << ']';
   }
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 7542941..d4e9a50 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -12,23 +12,18 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "isel"
+#include "llvm/CodeGen/SelectionDAGISel.h"
 #include "ScheduleDAGSDNodes.h"
 #include "SelectionDAGBuilder.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/Function.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
-#include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -37,22 +32,29 @@
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
 #include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Timer.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/ADT/Statistic.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -216,8 +218,9 @@ namespace llvm {
   ScheduleDAGSDNodes* createDefaultScheduler(SelectionDAGISel *IS,
                                              CodeGenOpt::Level OptLevel) {
     const TargetLowering &TLI = IS->getTargetLowering();
+    const TargetSubtargetInfo &ST = IS->TM.getSubtarget<TargetSubtargetInfo>();
 
-    if (OptLevel == CodeGenOpt::None ||
+    if (OptLevel == CodeGenOpt::None || ST.enableMachineScheduler() ||
         TLI.getSchedulingPreference() == Sched::Source)
       return createSourceListDAGScheduler(IS, OptLevel);
     if (TLI.getSchedulingPreference() == Sched::RegPressure)
@@ -348,13 +351,14 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   RegInfo = &MF->getRegInfo();
   AA = &getAnalysis<AliasAnalysis>();
   LibInfo = &getAnalysis<TargetLibraryInfo>();
+  TTI = getAnalysisIfAvailable<TargetTransformInfo>();
   GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : 0;
 
   DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
 
   SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), this);
 
-  CurDAG->init(*MF);
+  CurDAG->init(*MF, TTI);
   FuncInfo->set(Fn, *MF);
 
   if (UseMBPI && OptLevel != CodeGenOpt::None)
@@ -474,6 +478,11 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
     MRI.replaceRegWith(From, To);
   }
 
+  // Freeze the set of reserved registers now that MachineFrameInfo has been
+  // set up. All the information required by getReservedRegs() should be
+  // available now.
+  MRI.freezeReservedRegs(*MF);
+
   // Release function-specific state. SDB and CurDAG are already cleared
   // at this point.
   FuncInfo->clear();
@@ -999,12 +1008,12 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
 
       if (AllPredsVisited) {
         for (BasicBlock::const_iterator I = LLVMBB->begin();
-             isa<PHINode>(I); ++I)
-          FuncInfo->ComputePHILiveOutRegInfo(cast<PHINode>(I));
+             const PHINode *PN = dyn_cast<PHINode>(I); ++I)
+          FuncInfo->ComputePHILiveOutRegInfo(PN);
       } else {
         for (BasicBlock::const_iterator I = LLVMBB->begin();
-             isa<PHINode>(I); ++I)
-          FuncInfo->InvalidatePHILiveOutRegInfo(cast<PHINode>(I));
+             const PHINode *PN = dyn_cast<PHINode>(I); ++I)
+          FuncInfo->InvalidatePHILiveOutRegInfo(PN);
       }
 
       FuncInfo->VisitedBBs.insert(LLVMBB);
@@ -1106,19 +1115,21 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
           }
 
           bool HadTailCall = false;
+          MachineBasicBlock::iterator SavedInsertPt = FuncInfo->InsertPt;
           SelectBasicBlock(Inst, BI, HadTailCall);
 
-          // Recompute NumFastIselRemaining as Selection DAG instruction
-          // selection may have handled the call, input args, etc.
-          unsigned RemainingNow = std::distance(Begin, BI);
-          NumFastIselFailures += NumFastIselRemaining - RemainingNow;
-
           // If the call was emitted as a tail call, we're done with the block.
+          // We also need to delete any previously emitted instructions.
           if (HadTailCall) {
+            FastIS->removeDeadCode(SavedInsertPt, FuncInfo->MBB->end());
             --BI;
             break;
           }
 
+          // Recompute NumFastIselRemaining as Selection DAG instruction
+          // selection may have handled the call, input args, etc.
+          unsigned RemainingNow = std::distance(Begin, BI);
+          NumFastIselFailures += NumFastIselRemaining - RemainingNow;
           NumFastIselRemaining = RemainingNow;
           continue;
         }
@@ -1184,14 +1195,12 @@ SelectionDAGISel::FinishBasicBlock() {
       SDB->JTCases.empty() &&
       SDB->BitTestCases.empty()) {
     for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) {
-      MachineInstr *PHI = FuncInfo->PHINodesToUpdate[i].first;
+      MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first);
       assert(PHI->isPHI() &&
              "This is not a machine PHI node that we are updating!");
       if (!FuncInfo->MBB->isSuccessor(PHI->getParent()))
         continue;
-      PHI->addOperand(
-        MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[i].second, false));
-      PHI->addOperand(MachineOperand::CreateMBB(FuncInfo->MBB));
+      PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB);
     }
     return;
   }
@@ -1243,33 +1252,23 @@ SelectionDAGISel::FinishBasicBlock() {
     // Update PHI Nodes
     for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size();
          pi != pe; ++pi) {
-      MachineInstr *PHI = FuncInfo->PHINodesToUpdate[pi].first;
+      MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[pi].first);
       MachineBasicBlock *PHIBB = PHI->getParent();
       assert(PHI->isPHI() &&
              "This is not a machine PHI node that we are updating!");
       // This is "default" BB. We have two jumps to it. From "header" BB and
       // from last "case" BB.
-      if (PHIBB == SDB->BitTestCases[i].Default) {
-        PHI->addOperand(MachineOperand::
-                        CreateReg(FuncInfo->PHINodesToUpdate[pi].second,
-                                  false));
-        PHI->addOperand(MachineOperand::CreateMBB(SDB->BitTestCases[i].Parent));
-        PHI->addOperand(MachineOperand::
-                        CreateReg(FuncInfo->PHINodesToUpdate[pi].second,
-                                  false));
-        PHI->addOperand(MachineOperand::CreateMBB(SDB->BitTestCases[i].Cases.
-                                                  back().ThisBB));
-      }
+      if (PHIBB == SDB->BitTestCases[i].Default)
+        PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second)
+           .addMBB(SDB->BitTestCases[i].Parent)
+           .addReg(FuncInfo->PHINodesToUpdate[pi].second)
+           .addMBB(SDB->BitTestCases[i].Cases.back().ThisBB);
       // One of "cases" BB.
       for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size();
            j != ej; ++j) {
         MachineBasicBlock* cBB = SDB->BitTestCases[i].Cases[j].ThisBB;
-        if (cBB->isSuccessor(PHIBB)) {
-          PHI->addOperand(MachineOperand::
-                          CreateReg(FuncInfo->PHINodesToUpdate[pi].second,
-                                    false));
-          PHI->addOperand(MachineOperand::CreateMBB(cBB));
-        }
+        if (cBB->isSuccessor(PHIBB))
+          PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(cBB);
       }
     }
   }
@@ -1304,25 +1303,17 @@ SelectionDAGISel::FinishBasicBlock() {
     // Update PHI Nodes
     for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size();
          pi != pe; ++pi) {
-      MachineInstr *PHI = FuncInfo->PHINodesToUpdate[pi].first;
+      MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[pi].first);
       MachineBasicBlock *PHIBB = PHI->getParent();
       assert(PHI->isPHI() &&
              "This is not a machine PHI node that we are updating!");
       // "default" BB. We can go there only from header BB.
-      if (PHIBB == SDB->JTCases[i].second.Default) {
-        PHI->addOperand
-          (MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[pi].second,
-                                     false));
-        PHI->addOperand
-          (MachineOperand::CreateMBB(SDB->JTCases[i].first.HeaderBB));
-      }
+      if (PHIBB == SDB->JTCases[i].second.Default)
+        PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second)
+           .addMBB(SDB->JTCases[i].first.HeaderBB);
       // JT BB. Just iterate over successors here
-      if (FuncInfo->MBB->isSuccessor(PHIBB)) {
-        PHI->addOperand
-          (MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[pi].second,
-                                     false));
-        PHI->addOperand(MachineOperand::CreateMBB(FuncInfo->MBB));
-      }
+      if (FuncInfo->MBB->isSuccessor(PHIBB))
+        PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(FuncInfo->MBB);
     }
   }
   SDB->JTCases.clear();
@@ -1330,14 +1321,11 @@ SelectionDAGISel::FinishBasicBlock() {
   // If the switch block involved a branch to one of the actual successors, we
   // need to update PHI nodes in that block.
   for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) {
-    MachineInstr *PHI = FuncInfo->PHINodesToUpdate[i].first;
+    MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first);
     assert(PHI->isPHI() &&
            "This is not a machine PHI node that we are updating!");
-    if (FuncInfo->MBB->isSuccessor(PHI->getParent())) {
-      PHI->addOperand(
-        MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[i].second, false));
-      PHI->addOperand(MachineOperand::CreateMBB(FuncInfo->MBB));
-    }
+    if (FuncInfo->MBB->isSuccessor(PHI->getParent()))
+      PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB);
   }
 
   // If we generated any switch lowering information, build and codegen any
@@ -1373,18 +1361,16 @@ SelectionDAGISel::FinishBasicBlock() {
       // FuncInfo->MBB may have been removed from the CFG if a branch was
       // constant folded.
       if (ThisBB->isSuccessor(FuncInfo->MBB)) {
-        for (MachineBasicBlock::iterator Phi = FuncInfo->MBB->begin();
-             Phi != FuncInfo->MBB->end() && Phi->isPHI();
-             ++Phi) {
+        for (MachineBasicBlock::iterator
+             MBBI = FuncInfo->MBB->begin(), MBBE = FuncInfo->MBB->end();
+             MBBI != MBBE && MBBI->isPHI(); ++MBBI) {
+          MachineInstrBuilder PHI(*MF, MBBI);
           // This value for this PHI node is recorded in PHINodesToUpdate.
           for (unsigned pn = 0; ; ++pn) {
             assert(pn != FuncInfo->PHINodesToUpdate.size() &&
                    "Didn't find PHI entry!");
-            if (FuncInfo->PHINodesToUpdate[pn].first == Phi) {
-              Phi->addOperand(MachineOperand::
-                              CreateReg(FuncInfo->PHINodesToUpdate[pn].second,
-                                        false));
-              Phi->addOperand(MachineOperand::CreateMBB(ThisBB));
+            if (FuncInfo->PHINodesToUpdate[pn].first == PHI) {
+              PHI.addReg(FuncInfo->PHINodesToUpdate[pn].second).addMBB(ThisBB);
               break;
             }
           }
@@ -2004,7 +1990,7 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
   return Res;
 }
 
-/// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
+/// CheckSame - Implements OP_CheckSame.
 LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
 CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
           SDValue N,
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index 3921635..b752b48 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -11,21 +11,21 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/SelectionDAG.h"
 #include "ScheduleDAGSDNodes.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Assembly/Writer.h"
-#include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/GraphWriter.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 namespace llvm {
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index dcaa9ba..34f3bc9 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -12,24 +12,24 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Target/TargetLowering.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/DerivedTypes.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <cctype>
 using namespace llvm;
 
@@ -94,98 +94,124 @@ static void InitLibcallNames(const char **Names) {
   Names[RTLIB::ADD_F32] = "__addsf3";
   Names[RTLIB::ADD_F64] = "__adddf3";
   Names[RTLIB::ADD_F80] = "__addxf3";
+  Names[RTLIB::ADD_F128] = "__addtf3";
   Names[RTLIB::ADD_PPCF128] = "__gcc_qadd";
   Names[RTLIB::SUB_F32] = "__subsf3";
   Names[RTLIB::SUB_F64] = "__subdf3";
   Names[RTLIB::SUB_F80] = "__subxf3";
+  Names[RTLIB::SUB_F128] = "__subtf3";
   Names[RTLIB::SUB_PPCF128] = "__gcc_qsub";
   Names[RTLIB::MUL_F32] = "__mulsf3";
   Names[RTLIB::MUL_F64] = "__muldf3";
   Names[RTLIB::MUL_F80] = "__mulxf3";
+  Names[RTLIB::MUL_F128] = "__multf3";
   Names[RTLIB::MUL_PPCF128] = "__gcc_qmul";
   Names[RTLIB::DIV_F32] = "__divsf3";
   Names[RTLIB::DIV_F64] = "__divdf3";
   Names[RTLIB::DIV_F80] = "__divxf3";
+  Names[RTLIB::DIV_F128] = "__divtf3";
   Names[RTLIB::DIV_PPCF128] = "__gcc_qdiv";
   Names[RTLIB::REM_F32] = "fmodf";
   Names[RTLIB::REM_F64] = "fmod";
   Names[RTLIB::REM_F80] = "fmodl";
+  Names[RTLIB::REM_F128] = "fmodl";
   Names[RTLIB::REM_PPCF128] = "fmodl";
   Names[RTLIB::FMA_F32] = "fmaf";
   Names[RTLIB::FMA_F64] = "fma";
   Names[RTLIB::FMA_F80] = "fmal";
+  Names[RTLIB::FMA_F128] = "fmal";
   Names[RTLIB::FMA_PPCF128] = "fmal";
   Names[RTLIB::POWI_F32] = "__powisf2";
   Names[RTLIB::POWI_F64] = "__powidf2";
   Names[RTLIB::POWI_F80] = "__powixf2";
+  Names[RTLIB::POWI_F128] = "__powitf2";
   Names[RTLIB::POWI_PPCF128] = "__powitf2";
   Names[RTLIB::SQRT_F32] = "sqrtf";
   Names[RTLIB::SQRT_F64] = "sqrt";
   Names[RTLIB::SQRT_F80] = "sqrtl";
+  Names[RTLIB::SQRT_F128] = "sqrtl";
   Names[RTLIB::SQRT_PPCF128] = "sqrtl";
   Names[RTLIB::LOG_F32] = "logf";
   Names[RTLIB::LOG_F64] = "log";
   Names[RTLIB::LOG_F80] = "logl";
+  Names[RTLIB::LOG_F128] = "logl";
   Names[RTLIB::LOG_PPCF128] = "logl";
   Names[RTLIB::LOG2_F32] = "log2f";
   Names[RTLIB::LOG2_F64] = "log2";
   Names[RTLIB::LOG2_F80] = "log2l";
+  Names[RTLIB::LOG2_F128] = "log2l";
   Names[RTLIB::LOG2_PPCF128] = "log2l";
   Names[RTLIB::LOG10_F32] = "log10f";
   Names[RTLIB::LOG10_F64] = "log10";
   Names[RTLIB::LOG10_F80] = "log10l";
+  Names[RTLIB::LOG10_F128] = "log10l";
   Names[RTLIB::LOG10_PPCF128] = "log10l";
   Names[RTLIB::EXP_F32] = "expf";
   Names[RTLIB::EXP_F64] = "exp";
   Names[RTLIB::EXP_F80] = "expl";
+  Names[RTLIB::EXP_F128] = "expl";
   Names[RTLIB::EXP_PPCF128] = "expl";
   Names[RTLIB::EXP2_F32] = "exp2f";
   Names[RTLIB::EXP2_F64] = "exp2";
   Names[RTLIB::EXP2_F80] = "exp2l";
+  Names[RTLIB::EXP2_F128] = "exp2l";
   Names[RTLIB::EXP2_PPCF128] = "exp2l";
   Names[RTLIB::SIN_F32] = "sinf";
   Names[RTLIB::SIN_F64] = "sin";
   Names[RTLIB::SIN_F80] = "sinl";
+  Names[RTLIB::SIN_F128] = "sinl";
   Names[RTLIB::SIN_PPCF128] = "sinl";
   Names[RTLIB::COS_F32] = "cosf";
   Names[RTLIB::COS_F64] = "cos";
   Names[RTLIB::COS_F80] = "cosl";
+  Names[RTLIB::COS_F128] = "cosl";
   Names[RTLIB::COS_PPCF128] = "cosl";
   Names[RTLIB::POW_F32] = "powf";
   Names[RTLIB::POW_F64] = "pow";
   Names[RTLIB::POW_F80] = "powl";
+  Names[RTLIB::POW_F128] = "powl";
   Names[RTLIB::POW_PPCF128] = "powl";
   Names[RTLIB::CEIL_F32] = "ceilf";
   Names[RTLIB::CEIL_F64] = "ceil";
   Names[RTLIB::CEIL_F80] = "ceill";
+  Names[RTLIB::CEIL_F128] = "ceill";
   Names[RTLIB::CEIL_PPCF128] = "ceill";
   Names[RTLIB::TRUNC_F32] = "truncf";
   Names[RTLIB::TRUNC_F64] = "trunc";
   Names[RTLIB::TRUNC_F80] = "truncl";
+  Names[RTLIB::TRUNC_F128] = "truncl";
   Names[RTLIB::TRUNC_PPCF128] = "truncl";
   Names[RTLIB::RINT_F32] = "rintf";
   Names[RTLIB::RINT_F64] = "rint";
   Names[RTLIB::RINT_F80] = "rintl";
+  Names[RTLIB::RINT_F128] = "rintl";
   Names[RTLIB::RINT_PPCF128] = "rintl";
   Names[RTLIB::NEARBYINT_F32] = "nearbyintf";
   Names[RTLIB::NEARBYINT_F64] = "nearbyint";
   Names[RTLIB::NEARBYINT_F80] = "nearbyintl";
+  Names[RTLIB::NEARBYINT_F128] = "nearbyintl";
   Names[RTLIB::NEARBYINT_PPCF128] = "nearbyintl";
   Names[RTLIB::FLOOR_F32] = "floorf";
   Names[RTLIB::FLOOR_F64] = "floor";
   Names[RTLIB::FLOOR_F80] = "floorl";
+  Names[RTLIB::FLOOR_F128] = "floorl";
   Names[RTLIB::FLOOR_PPCF128] = "floorl";
   Names[RTLIB::COPYSIGN_F32] = "copysignf";
   Names[RTLIB::COPYSIGN_F64] = "copysign";
   Names[RTLIB::COPYSIGN_F80] = "copysignl";
+  Names[RTLIB::COPYSIGN_F128] = "copysignl";
   Names[RTLIB::COPYSIGN_PPCF128] = "copysignl";
+  Names[RTLIB::FPEXT_F64_F128] = "__extenddftf2";
+  Names[RTLIB::FPEXT_F32_F128] = "__extendsftf2";
   Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2";
   Names[RTLIB::FPEXT_F16_F32] = "__gnu_h2f_ieee";
   Names[RTLIB::FPROUND_F32_F16] = "__gnu_f2h_ieee";
   Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2";
   Names[RTLIB::FPROUND_F80_F32] = "__truncxfsf2";
+  Names[RTLIB::FPROUND_F128_F32] = "__trunctfsf2";
   Names[RTLIB::FPROUND_PPCF128_F32] = "__trunctfsf2";
   Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2";
+  Names[RTLIB::FPROUND_F128_F64] = "__trunctfdf2";
   Names[RTLIB::FPROUND_PPCF128_F64] = "__trunctfdf2";
   Names[RTLIB::FPTOSINT_F32_I8] = "__fixsfqi";
   Names[RTLIB::FPTOSINT_F32_I16] = "__fixsfhi";
@@ -200,6 +226,9 @@ static void InitLibcallNames(const char **Names) {
   Names[RTLIB::FPTOSINT_F80_I32] = "__fixxfsi";
   Names[RTLIB::FPTOSINT_F80_I64] = "__fixxfdi";
   Names[RTLIB::FPTOSINT_F80_I128] = "__fixxfti";
+  Names[RTLIB::FPTOSINT_F128_I32] = "__fixtfsi";
+  Names[RTLIB::FPTOSINT_F128_I64] = "__fixtfdi";
+  Names[RTLIB::FPTOSINT_F128_I128] = "__fixtfti";
   Names[RTLIB::FPTOSINT_PPCF128_I32] = "__fixtfsi";
   Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi";
   Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti";
@@ -216,49 +245,66 @@ static void InitLibcallNames(const char **Names) {
   Names[RTLIB::FPTOUINT_F80_I32] = "__fixunsxfsi";
   Names[RTLIB::FPTOUINT_F80_I64] = "__fixunsxfdi";
   Names[RTLIB::FPTOUINT_F80_I128] = "__fixunsxfti";
+  Names[RTLIB::FPTOUINT_F128_I32] = "__fixunstfsi";
+  Names[RTLIB::FPTOUINT_F128_I64] = "__fixunstfdi";
+  Names[RTLIB::FPTOUINT_F128_I128] = "__fixunstfti";
   Names[RTLIB::FPTOUINT_PPCF128_I32] = "__fixunstfsi";
   Names[RTLIB::FPTOUINT_PPCF128_I64] = "__fixunstfdi";
   Names[RTLIB::FPTOUINT_PPCF128_I128] = "__fixunstfti";
   Names[RTLIB::SINTTOFP_I32_F32] = "__floatsisf";
   Names[RTLIB::SINTTOFP_I32_F64] = "__floatsidf";
   Names[RTLIB::SINTTOFP_I32_F80] = "__floatsixf";
+  Names[RTLIB::SINTTOFP_I32_F128] = "__floatsitf";
   Names[RTLIB::SINTTOFP_I32_PPCF128] = "__floatsitf";
   Names[RTLIB::SINTTOFP_I64_F32] = "__floatdisf";
   Names[RTLIB::SINTTOFP_I64_F64] = "__floatdidf";
   Names[RTLIB::SINTTOFP_I64_F80] = "__floatdixf";
+  Names[RTLIB::SINTTOFP_I64_F128] = "__floatditf";
   Names[RTLIB::SINTTOFP_I64_PPCF128] = "__floatditf";
   Names[RTLIB::SINTTOFP_I128_F32] = "__floattisf";
   Names[RTLIB::SINTTOFP_I128_F64] = "__floattidf";
   Names[RTLIB::SINTTOFP_I128_F80] = "__floattixf";
+  Names[RTLIB::SINTTOFP_I128_F128] = "__floattitf";
   Names[RTLIB::SINTTOFP_I128_PPCF128] = "__floattitf";
   Names[RTLIB::UINTTOFP_I32_F32] = "__floatunsisf";
   Names[RTLIB::UINTTOFP_I32_F64] = "__floatunsidf";
   Names[RTLIB::UINTTOFP_I32_F80] = "__floatunsixf";
+  Names[RTLIB::UINTTOFP_I32_F128] = "__floatunsitf";
   Names[RTLIB::UINTTOFP_I32_PPCF128] = "__floatunsitf";
   Names[RTLIB::UINTTOFP_I64_F32] = "__floatundisf";
   Names[RTLIB::UINTTOFP_I64_F64] = "__floatundidf";
   Names[RTLIB::UINTTOFP_I64_F80] = "__floatundixf";
+  Names[RTLIB::UINTTOFP_I64_F128] = "__floatunditf";
   Names[RTLIB::UINTTOFP_I64_PPCF128] = "__floatunditf";
   Names[RTLIB::UINTTOFP_I128_F32] = "__floatuntisf";
   Names[RTLIB::UINTTOFP_I128_F64] = "__floatuntidf";
   Names[RTLIB::UINTTOFP_I128_F80] = "__floatuntixf";
+  Names[RTLIB::UINTTOFP_I128_F128] = "__floatuntitf";
   Names[RTLIB::UINTTOFP_I128_PPCF128] = "__floatuntitf";
   Names[RTLIB::OEQ_F32] = "__eqsf2";
   Names[RTLIB::OEQ_F64] = "__eqdf2";
+  Names[RTLIB::OEQ_F128] = "__eqtf2";
   Names[RTLIB::UNE_F32] = "__nesf2";
   Names[RTLIB::UNE_F64] = "__nedf2";
+  Names[RTLIB::UNE_F128] = "__netf2";
   Names[RTLIB::OGE_F32] = "__gesf2";
   Names[RTLIB::OGE_F64] = "__gedf2";
+  Names[RTLIB::OGE_F128] = "__getf2";
   Names[RTLIB::OLT_F32] = "__ltsf2";
   Names[RTLIB::OLT_F64] = "__ltdf2";
+  Names[RTLIB::OLT_F128] = "__lttf2";
   Names[RTLIB::OLE_F32] = "__lesf2";
   Names[RTLIB::OLE_F64] = "__ledf2";
+  Names[RTLIB::OLE_F128] = "__letf2";
   Names[RTLIB::OGT_F32] = "__gtsf2";
   Names[RTLIB::OGT_F64] = "__gtdf2";
+  Names[RTLIB::OGT_F128] = "__gttf2";
   Names[RTLIB::UO_F32] = "__unordsf2";
   Names[RTLIB::UO_F64] = "__unorddf2";
+  Names[RTLIB::UO_F128] = "__unordtf2";
   Names[RTLIB::O_F32] = "__unordsf2";
   Names[RTLIB::O_F64] = "__unorddf2";
+  Names[RTLIB::O_F128] = "__unordtf2";
   Names[RTLIB::MEMCPY] = "memcpy";
   Names[RTLIB::MEMMOVE] = "memmove";
   Names[RTLIB::MEMSET] = "memset";
@@ -311,6 +357,11 @@ RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
   if (OpVT == MVT::f32) {
     if (RetVT == MVT::f64)
       return FPEXT_F32_F64;
+    if (RetVT == MVT::f128)
+      return FPEXT_F32_F128;
+  } else if (OpVT == MVT::f64) {
+    if (RetVT == MVT::f128)
+      return FPEXT_F64_F128;
   }
 
   return UNKNOWN_LIBCALL;
@@ -324,11 +375,15 @@ RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) {
       return FPROUND_F64_F32;
     if (OpVT == MVT::f80)
       return FPROUND_F80_F32;
+    if (OpVT == MVT::f128)
+      return FPROUND_F128_F32;
     if (OpVT == MVT::ppcf128)
       return FPROUND_PPCF128_F32;
   } else if (RetVT == MVT::f64) {
     if (OpVT == MVT::f80)
       return FPROUND_F80_F64;
+    if (OpVT == MVT::f128)
+      return FPROUND_F128_F64;
     if (OpVT == MVT::ppcf128)
       return FPROUND_PPCF128_F64;
   }
@@ -368,6 +423,13 @@ RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) {
       return FPTOSINT_F80_I64;
     if (RetVT == MVT::i128)
       return FPTOSINT_F80_I128;
+  } else if (OpVT == MVT::f128) {
+    if (RetVT == MVT::i32)
+      return FPTOSINT_F128_I32;
+    if (RetVT == MVT::i64)
+      return FPTOSINT_F128_I64;
+    if (RetVT == MVT::i128)
+      return FPTOSINT_F128_I128;
   } else if (OpVT == MVT::ppcf128) {
     if (RetVT == MVT::i32)
       return FPTOSINT_PPCF128_I32;
@@ -411,6 +473,13 @@ RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) {
       return FPTOUINT_F80_I64;
     if (RetVT == MVT::i128)
       return FPTOUINT_F80_I128;
+  } else if (OpVT == MVT::f128) {
+    if (RetVT == MVT::i32)
+      return FPTOUINT_F128_I32;
+    if (RetVT == MVT::i64)
+      return FPTOUINT_F128_I64;
+    if (RetVT == MVT::i128)
+      return FPTOUINT_F128_I128;
   } else if (OpVT == MVT::ppcf128) {
     if (RetVT == MVT::i32)
       return FPTOUINT_PPCF128_I32;
@@ -428,29 +497,35 @@ RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) {
   if (OpVT == MVT::i32) {
     if (RetVT == MVT::f32)
       return SINTTOFP_I32_F32;
-    else if (RetVT == MVT::f64)
+    if (RetVT == MVT::f64)
       return SINTTOFP_I32_F64;
-    else if (RetVT == MVT::f80)
+    if (RetVT == MVT::f80)
       return SINTTOFP_I32_F80;
-    else if (RetVT == MVT::ppcf128)
+    if (RetVT == MVT::f128)
+      return SINTTOFP_I32_F128;
+    if (RetVT == MVT::ppcf128)
       return SINTTOFP_I32_PPCF128;
   } else if (OpVT == MVT::i64) {
     if (RetVT == MVT::f32)
       return SINTTOFP_I64_F32;
-    else if (RetVT == MVT::f64)
+    if (RetVT == MVT::f64)
       return SINTTOFP_I64_F64;
-    else if (RetVT == MVT::f80)
+    if (RetVT == MVT::f80)
       return SINTTOFP_I64_F80;
-    else if (RetVT == MVT::ppcf128)
+    if (RetVT == MVT::f128)
+      return SINTTOFP_I64_F128;
+    if (RetVT == MVT::ppcf128)
       return SINTTOFP_I64_PPCF128;
   } else if (OpVT == MVT::i128) {
     if (RetVT == MVT::f32)
       return SINTTOFP_I128_F32;
-    else if (RetVT == MVT::f64)
+    if (RetVT == MVT::f64)
       return SINTTOFP_I128_F64;
-    else if (RetVT == MVT::f80)
+    if (RetVT == MVT::f80)
       return SINTTOFP_I128_F80;
-    else if (RetVT == MVT::ppcf128)
+    if (RetVT == MVT::f128)
+      return SINTTOFP_I128_F128;
+    if (RetVT == MVT::ppcf128)
       return SINTTOFP_I128_PPCF128;
   }
   return UNKNOWN_LIBCALL;
@@ -462,29 +537,35 @@ RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) {
   if (OpVT == MVT::i32) {
     if (RetVT == MVT::f32)
       return UINTTOFP_I32_F32;
-    else if (RetVT == MVT::f64)
+    if (RetVT == MVT::f64)
       return UINTTOFP_I32_F64;
-    else if (RetVT == MVT::f80)
+    if (RetVT == MVT::f80)
       return UINTTOFP_I32_F80;
-    else if (RetVT == MVT::ppcf128)
+    if (RetVT == MVT::f128)
+      return UINTTOFP_I32_F128;
+    if (RetVT == MVT::ppcf128)
       return UINTTOFP_I32_PPCF128;
   } else if (OpVT == MVT::i64) {
     if (RetVT == MVT::f32)
       return UINTTOFP_I64_F32;
-    else if (RetVT == MVT::f64)
+    if (RetVT == MVT::f64)
       return UINTTOFP_I64_F64;
-    else if (RetVT == MVT::f80)
+    if (RetVT == MVT::f80)
       return UINTTOFP_I64_F80;
-    else if (RetVT == MVT::ppcf128)
+    if (RetVT == MVT::f128)
+      return UINTTOFP_I64_F128;
+    if (RetVT == MVT::ppcf128)
       return UINTTOFP_I64_PPCF128;
   } else if (OpVT == MVT::i128) {
     if (RetVT == MVT::f32)
       return UINTTOFP_I128_F32;
-    else if (RetVT == MVT::f64)
+    if (RetVT == MVT::f64)
       return UINTTOFP_I128_F64;
-    else if (RetVT == MVT::f80)
+    if (RetVT == MVT::f80)
       return UINTTOFP_I128_F80;
-    else if (RetVT == MVT::ppcf128)
+    if (RetVT == MVT::f128)
+      return UINTTOFP_I128_F128;
+    if (RetVT == MVT::ppcf128)
       return UINTTOFP_I128_PPCF128;
   }
   return UNKNOWN_LIBCALL;
@@ -496,26 +577,34 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
   memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL);
   CCs[RTLIB::OEQ_F32] = ISD::SETEQ;
   CCs[RTLIB::OEQ_F64] = ISD::SETEQ;
+  CCs[RTLIB::OEQ_F128] = ISD::SETEQ;
   CCs[RTLIB::UNE_F32] = ISD::SETNE;
   CCs[RTLIB::UNE_F64] = ISD::SETNE;
+  CCs[RTLIB::UNE_F128] = ISD::SETNE;
   CCs[RTLIB::OGE_F32] = ISD::SETGE;
   CCs[RTLIB::OGE_F64] = ISD::SETGE;
+  CCs[RTLIB::OGE_F128] = ISD::SETGE;
   CCs[RTLIB::OLT_F32] = ISD::SETLT;
   CCs[RTLIB::OLT_F64] = ISD::SETLT;
+  CCs[RTLIB::OLT_F128] = ISD::SETLT;
   CCs[RTLIB::OLE_F32] = ISD::SETLE;
   CCs[RTLIB::OLE_F64] = ISD::SETLE;
+  CCs[RTLIB::OLE_F128] = ISD::SETLE;
   CCs[RTLIB::OGT_F32] = ISD::SETGT;
   CCs[RTLIB::OGT_F64] = ISD::SETGT;
+  CCs[RTLIB::OGT_F128] = ISD::SETGT;
   CCs[RTLIB::UO_F32] = ISD::SETNE;
   CCs[RTLIB::UO_F64] = ISD::SETNE;
+  CCs[RTLIB::UO_F128] = ISD::SETNE;
   CCs[RTLIB::O_F32] = ISD::SETEQ;
   CCs[RTLIB::O_F64] = ISD::SETEQ;
+  CCs[RTLIB::O_F128] = ISD::SETEQ;
 }
 
 /// NOTE: The constructor takes ownership of TLOF.
 TargetLowering::TargetLowering(const TargetMachine &tm,
                                const TargetLoweringObjectFile *tlof)
-  : TM(tm), TD(TM.getTargetData()), TLOF(*tlof) {
+  : TM(tm), TD(TM.getDataLayout()), TLOF(*tlof) {
   // All operations default to being supported.
   memset(OpActions, 0, sizeof(OpActions));
   memset(LoadExtActions, 0, sizeof(LoadExtActions));
@@ -547,6 +636,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
   setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
   setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
   setOperationAction(ISD::ConstantFP, MVT::f80, Expand);
+  setOperationAction(ISD::ConstantFP, MVT::f128, Expand);
 
   // These library functions default to expand.
   setOperationAction(ISD::FLOG ,  MVT::f16, Expand);
@@ -579,12 +669,27 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
   setOperationAction(ISD::FCEIL,  MVT::f64, Expand);
   setOperationAction(ISD::FRINT,  MVT::f64, Expand);
   setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
+  setOperationAction(ISD::FLOG ,  MVT::f128, Expand);
+  setOperationAction(ISD::FLOG2,  MVT::f128, Expand);
+  setOperationAction(ISD::FLOG10, MVT::f128, Expand);
+  setOperationAction(ISD::FEXP ,  MVT::f128, Expand);
+  setOperationAction(ISD::FEXP2,  MVT::f128, Expand);
+  setOperationAction(ISD::FFLOOR, MVT::f128, Expand);
+  setOperationAction(ISD::FNEARBYINT, MVT::f128, Expand);
+  setOperationAction(ISD::FCEIL,  MVT::f128, Expand);
+  setOperationAction(ISD::FRINT,  MVT::f128, Expand);
+  setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
 
   // Default ISD::TRAP to expand (which turns it into abort).
   setOperationAction(ISD::TRAP, MVT::Other, Expand);
 
+  // On most systems, DEBUGTRAP and TRAP have no difference. The "Expand"
+  // here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP.
+  //
+  setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand);
+
   IsLittleEndian = TD->isLittleEndian();
-  PointerTy = MVT::getIntegerVT(8*TD->getPointerSize());
+  PointerTy = MVT::getIntegerVT(8*TD->getPointerSize(0));
   memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
   memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray));
   maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8;
@@ -613,6 +718,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
   ShouldFoldAtomicFences = false;
   InsertFencesForAtomic = false;
   SupportJumpTables = true;
+  MinimumJumpTableEntries = 4;
 
   InitLibcallNames(LibcallRoutineNames);
   InitCmpLibcallCCs(CmpLibcallCCs);
@@ -624,7 +730,7 @@ TargetLowering::~TargetLowering() {
 }
 
 MVT TargetLowering::getShiftAmountTy(EVT LHSTy) const {
-  return MVT::getIntegerVT(8*TD->getPointerSize());
+  return MVT::getIntegerVT(8*TD->getPointerSize(0));
 }
 
 /// canOpTrap - Returns true if the operation can trap for the value type.
@@ -647,7 +753,7 @@ bool TargetLowering::canOpTrap(unsigned Op, EVT VT) const {
 
 static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
                                           unsigned &NumIntermediates,
-                                          EVT &RegisterVT,
+                                          MVT &RegisterVT,
                                           TargetLowering *TLI) {
   // Figure out the right, legal destination reg to copy into.
   unsigned NumElts = VT.getVectorNumElements();
@@ -682,7 +788,7 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
   if (!isPowerOf2_32(NewVTSize))
     NewVTSize = NextPowerOf2(NewVTSize);
 
-  EVT DestVT = TLI->getRegisterType(NewVT);
+  MVT DestVT = TLI->getRegisterType(NewVT);
   RegisterVT = DestVT;
   if (EVT(DestVT).bitsLT(NewVT))    // Value is expanded, e.g. i64 -> i16.
     return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());
@@ -706,9 +812,9 @@ bool TargetLowering::isLegalRC(const TargetRegisterClass *RC) const {
 /// findRepresentativeClass - Return the largest legal super-reg register class
 /// of the register class for the specified type and its associated "cost".
 std::pair<const TargetRegisterClass*, uint8_t>
-TargetLowering::findRepresentativeClass(EVT VT) const {
+TargetLowering::findRepresentativeClass(MVT VT) const {
   const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
-  const TargetRegisterClass *RC = RegClassForVT[VT.getSimpleVT().SimpleTy];
+  const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy];
   if (!RC)
     return std::make_pair(RC, 0);
 
@@ -752,14 +858,13 @@ void TargetLowering::computeRegisterProperties() {
 
   // Every integer value type larger than this largest register takes twice as
   // many registers to represent as the previous ValueType.
-  for (unsigned ExpandedReg = LargestIntReg + 1; ; ++ExpandedReg) {
-    EVT ExpandedVT = (MVT::SimpleValueType)ExpandedReg;
-    if (!ExpandedVT.isInteger())
-      break;
+  for (unsigned ExpandedReg = LargestIntReg + 1;
+       ExpandedReg <= MVT::LAST_INTEGER_VALUETYPE; ++ExpandedReg) {
     NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1];
     RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg;
     TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1);
-    ValueTypeActions.setTypeAction(ExpandedVT, TypeExpandInteger);
+    ValueTypeActions.setTypeAction((MVT::SimpleValueType)ExpandedReg,
+                                   TypeExpandInteger);
   }
 
   // Inspect all of the ValueType's smaller than the largest integer
@@ -767,7 +872,7 @@ void TargetLowering::computeRegisterProperties() {
   unsigned LegalIntReg = LargestIntReg;
   for (unsigned IntReg = LargestIntReg - 1;
        IntReg >= (unsigned)MVT::i1; --IntReg) {
-    EVT IVT = (MVT::SimpleValueType)IntReg;
+    MVT IVT = (MVT::SimpleValueType)IntReg;
     if (isTypeLegal(IVT)) {
       LegalIntReg = IntReg;
     } else {
@@ -818,14 +923,14 @@ void TargetLowering::computeRegisterProperties() {
 
     // Determine if there is a legal wider type.  If so, we should promote to
     // that wider vector type.
-    EVT EltVT = VT.getVectorElementType();
+    MVT EltVT = VT.getVectorElementType();
     unsigned NElts = VT.getVectorNumElements();
-    if (NElts != 1) {
+    if (NElts != 1 && !shouldSplitVectorElementType(EltVT)) {
       bool IsLegalWiderType = false;
       // First try to promote the elements of integer vectors. If no legal
       // promotion was found, fallback to the widen-vector method.
       for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
-        EVT SVT = (MVT::SimpleValueType)nVT;
+        MVT SVT = (MVT::SimpleValueType)nVT;
         // Promote vectors of integers to vectors with the same number
         // of elements, with a wider element type.
         if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits()
@@ -844,7 +949,7 @@ void TargetLowering::computeRegisterProperties() {
 
       // Try to widen the vector.
       for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
-        EVT SVT = (MVT::SimpleValueType)nVT;
+        MVT SVT = (MVT::SimpleValueType)nVT;
         if (SVT.getVectorElementType() == EltVT &&
             SVT.getVectorNumElements() > NElts &&
             isTypeLegal(SVT)) {
@@ -860,14 +965,14 @@ void TargetLowering::computeRegisterProperties() {
     }
 
     MVT IntermediateVT;
-    EVT RegisterVT;
+    MVT RegisterVT;
     unsigned NumIntermediates;
     NumRegistersForVT[i] =
       getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates,
                                 RegisterVT, this);
     RegisterTypeForVT[i] = RegisterVT;
 
-    EVT NVT = VT.getPow2VectorType();
+    MVT NVT = VT.getPow2VectorType();
     if (NVT == VT) {
       // Type is already a power of 2.  The default action is to split.
       TransformToType[i] = MVT::Other;
@@ -900,7 +1005,7 @@ const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
 
 EVT TargetLowering::getSetCCResultType(EVT VT) const {
   assert(!VT.isVector() && "No default SetCC type for vectors!");
-  return PointerTy.SimpleTy;
+  return getPointerTy(0).SimpleTy;
 }
 
 MVT::SimpleValueType TargetLowering::getCmpLibcallReturnType() const {
@@ -919,7 +1024,7 @@ MVT::SimpleValueType TargetLowering::getCmpLibcallReturnType() const {
 unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
                                                 EVT &IntermediateVT,
                                                 unsigned &NumIntermediates,
-                                                EVT &RegisterVT) const {
+                                                MVT &RegisterVT) const {
   unsigned NumElts = VT.getVectorNumElements();
 
   // If there is a wider vector type with the same element type as this one,
@@ -929,9 +1034,10 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
   // <4 x i1> -> <4 x i32>.
   LegalizeTypeAction TA = getTypeAction(Context, VT);
   if (NumElts != 1 && (TA == TypeWidenVector || TA == TypePromoteInteger)) {
-    RegisterVT = getTypeToTransformTo(Context, VT);
-    if (isTypeLegal(RegisterVT)) {
-      IntermediateVT = RegisterVT;
+    EVT RegisterEVT = getTypeToTransformTo(Context, VT);
+    if (isTypeLegal(RegisterEVT)) {
+      IntermediateVT = RegisterEVT;
+      RegisterVT = RegisterEVT.getSimpleVT();
       NumIntermediates = 1;
       return 1;
     }
@@ -964,7 +1070,7 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
     NewVT = EltTy;
   IntermediateVT = NewVT;
 
-  EVT DestVT = getRegisterType(Context, NewVT);
+  MVT DestVT = getRegisterType(Context, NewVT);
   RegisterVT = DestVT;
   unsigned NewVTSize = NewVT.getSizeInBits();
 
@@ -972,7 +1078,7 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
   if (!isPowerOf2_32(NewVTSize))
     NewVTSize = NextPowerOf2(NewVTSize);
 
-  if (DestVT.bitsLT(NewVT))   // Value is expanded, e.g. i64 -> i16.
+  if (EVT(DestVT).bitsLT(NewVT))   // Value is expanded, e.g. i64 -> i16.
     return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());
 
   // Otherwise, promotion or legal types use the same number of registers as
@@ -984,7 +1090,7 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
 /// type of the given function.  This does not require a DAG or a return value,
 /// and is suitable for use before any DAGs for the function are constructed.
 /// TODO: Move this out of TargetLowering.cpp.
-void llvm::GetReturnInfo(Type* ReturnType, Attributes attr,
+void llvm::GetReturnInfo(Type* ReturnType, AttributeSet attr,
                          SmallVectorImpl<ISD::OutputArg> &Outs,
                          const TargetLowering &TLI) {
   SmallVector<EVT, 4> ValueVTs;
@@ -996,9 +1102,9 @@ void llvm::GetReturnInfo(Type* ReturnType, Attributes attr,
     EVT VT = ValueVTs[j];
     ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
 
-    if (attr & Attribute::SExt)
+    if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
       ExtendKind = ISD::SIGN_EXTEND;
-    else if (attr & Attribute::ZExt)
+    else if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt))
       ExtendKind = ISD::ZERO_EXTEND;
 
     // FIXME: C calling convention requires the return type to be promoted to
@@ -1006,28 +1112,27 @@ void llvm::GetReturnInfo(Type* ReturnType, Attributes attr,
     // conventions. The frontend should mark functions whose return values
     // require promoting with signext or zeroext attributes.
     if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
-      EVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32);
+      MVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32);
       if (VT.bitsLT(MinVT))
         VT = MinVT;
     }
 
     unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT);
-    EVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT);
+    MVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT);
 
     // 'inreg' on function refers to return value
     ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
-    if (attr & Attribute::InReg)
+    if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::InReg))
       Flags.setInReg();
 
     // Propagate extension type if any
-    if (attr & Attribute::SExt)
+    if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
       Flags.setSExt();
-    else if (attr & Attribute::ZExt)
+    else if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt))
       Flags.setZExt();
 
-    for (unsigned i = 0; i < NumParts; ++i) {
-      Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true));
-    }
+    for (unsigned i = 0; i < NumParts; ++i)
+      Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true, 0, 0));
   }
 }
 
@@ -1061,7 +1166,7 @@ SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
 
   if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
       (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
-    return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy());
+    return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(0));
 
   return Table;
 }
@@ -1094,6 +1199,103 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
 }
 
 //===----------------------------------------------------------------------===//
+//  TargetTransformInfo Helpers
+//===----------------------------------------------------------------------===//
+
+int TargetLowering::InstructionOpcodeToISD(unsigned Opcode) const {
+  enum InstructionOpcodes {
+#define HANDLE_INST(NUM, OPCODE, CLASS) OPCODE = NUM,
+#define LAST_OTHER_INST(NUM) InstructionOpcodesCount = NUM
+#include "llvm/IR/Instruction.def"
+  };
+  switch (static_cast<InstructionOpcodes>(Opcode)) {
+  case Ret:            return 0;
+  case Br:             return 0;
+  case Switch:         return 0;
+  case IndirectBr:     return 0;
+  case Invoke:         return 0;
+  case Resume:         return 0;
+  case Unreachable:    return 0;
+  case Add:            return ISD::ADD;
+  case FAdd:           return ISD::FADD;
+  case Sub:            return ISD::SUB;
+  case FSub:           return ISD::FSUB;
+  case Mul:            return ISD::MUL;
+  case FMul:           return ISD::FMUL;
+  case UDiv:           return ISD::UDIV;
+  case SDiv:           return ISD::UDIV;
+  case FDiv:           return ISD::FDIV;
+  case URem:           return ISD::UREM;
+  case SRem:           return ISD::SREM;
+  case FRem:           return ISD::FREM;
+  case Shl:            return ISD::SHL;
+  case LShr:           return ISD::SRL;
+  case AShr:           return ISD::SRA;
+  case And:            return ISD::AND;
+  case Or:             return ISD::OR;
+  case Xor:            return ISD::XOR;
+  case Alloca:         return 0;
+  case Load:           return ISD::LOAD;
+  case Store:          return ISD::STORE;
+  case GetElementPtr:  return 0;
+  case Fence:          return 0;
+  case AtomicCmpXchg:  return 0;
+  case AtomicRMW:      return 0;
+  case Trunc:          return ISD::TRUNCATE;
+  case ZExt:           return ISD::ZERO_EXTEND;
+  case SExt:           return ISD::SIGN_EXTEND;
+  case FPToUI:         return ISD::FP_TO_UINT;
+  case FPToSI:         return ISD::FP_TO_SINT;
+  case UIToFP:         return ISD::UINT_TO_FP;
+  case SIToFP:         return ISD::SINT_TO_FP;
+  case FPTrunc:        return ISD::FP_ROUND;
+  case FPExt:          return ISD::FP_EXTEND;
+  case PtrToInt:       return ISD::BITCAST;
+  case IntToPtr:       return ISD::BITCAST;
+  case BitCast:        return ISD::BITCAST;
+  case ICmp:           return ISD::SETCC;
+  case FCmp:           return ISD::SETCC;
+  case PHI:            return 0;
+  case Call:           return 0;
+  case Select:         return ISD::SELECT;
+  case UserOp1:        return 0;
+  case UserOp2:        return 0;
+  case VAArg:          return 0;
+  case ExtractElement: return ISD::EXTRACT_VECTOR_ELT;
+  case InsertElement:  return ISD::INSERT_VECTOR_ELT;
+  case ShuffleVector:  return ISD::VECTOR_SHUFFLE;
+  case ExtractValue:   return ISD::MERGE_VALUES;
+  case InsertValue:    return ISD::MERGE_VALUES;
+  case LandingPad:     return 0;
+  }
+
+  llvm_unreachable("Unknown instruction type encountered!");
+}
+
+std::pair<unsigned, MVT>
+TargetLowering::getTypeLegalizationCost(Type *Ty) const {
+  LLVMContext &C = Ty->getContext();
+  EVT MTy = getValueType(Ty);
+
+  unsigned Cost = 1;
+  // We keep legalizing the type until we find a legal kind. We assume that
+  // the only operation that costs anything is the split. After splitting
+  // we need to handle two types.
+  while (true) {
+    LegalizeKind LK = getTypeConversion(C, MTy);
+
+    if (LK.first == TypeLegal)
+      return std::make_pair(Cost, MTy.getSimpleVT());
+
+    if (LK.first == TypeSplitVector || LK.first == TypeExpandInteger)
+      Cost *= 2;
+
+    // Keep legalizing the type.
+    MTy = LK.second;
+  }
+}
+
+//===----------------------------------------------------------------------===//
 //  Optimization Methods
 //===----------------------------------------------------------------------===//
 
@@ -1157,7 +1359,8 @@ TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op,
   // Search for the smallest integer type with free casts to and from
   // Op's type. For expedience, just check power-of-2 integer types.
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  unsigned SmallVTBits = BitWidth - Demanded.countLeadingZeros();
+  unsigned DemandedSize = BitWidth - Demanded.countLeadingZeros();
+  unsigned SmallVTBits = DemandedSize;
   if (!isPowerOf2_32(SmallVTBits))
     SmallVTBits = NextPowerOf2(SmallVTBits);
   for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
@@ -1170,7 +1373,9 @@ TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op,
                                           Op.getNode()->getOperand(0)),
                               DAG.getNode(ISD::TRUNCATE, dl, SmallVT,
                                           Op.getNode()->getOperand(1)));
-      SDValue Z = DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), X);
+      bool NeedZext = DemandedSize > SmallVTBits;
+      SDValue Z = DAG.getNode(NeedZext ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND,
+                              dl, Op.getValueType(), X);
       return CombineTo(Op, Z);
     }
   }
@@ -2106,7 +2311,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         EVT newVT = N0.getOperand(0).getValueType();
         if (DCI.isBeforeLegalizeOps() ||
             (isOperationLegal(ISD::SETCC, newVT) &&
-              getCondCodeAction(Cond, newVT)==Legal))
+             getCondCodeAction(Cond, newVT.getSimpleVT())==Legal))
           return DAG.getSetCC(dl, VT, N0.getOperand(0),
                               DAG.getConstant(C1.trunc(InSize), newVT),
                               Cond);
@@ -2202,9 +2407,10 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
           return DAG.getSetCC(dl, VT, Op0.getOperand(0), Op0.getOperand(1),
                               Cond);
-        } else if (Op0.getOpcode() == ISD::AND &&
-                isa<ConstantSDNode>(Op0.getOperand(1)) &&
-                cast<ConstantSDNode>(Op0.getOperand(1))->getAPIntValue() == 1) {
+        }
+        if (Op0.getOpcode() == ISD::AND &&
+            isa<ConstantSDNode>(Op0.getOperand(1)) &&
+            cast<ConstantSDNode>(Op0.getOperand(1))->getAPIntValue() == 1) {
           // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
           if (Op0.getValueType().bitsGT(VT))
             Op0 = DAG.getNode(ISD::AND, dl, VT,
@@ -2219,6 +2425,11 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
                               DAG.getConstant(0, Op0.getValueType()),
                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
         }
+        if (Op0.getOpcode() == ISD::AssertZext &&
+            cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
+          return DAG.getSetCC(dl, VT, Op0,
+                              DAG.getConstant(0, Op0.getValueType()),
+                              Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
       }
     }
 
@@ -2271,7 +2482,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
                           DAG.getConstant(MinVal, N0.getValueType()),
                           ISD::SETEQ);
     // If we have setugt X, Max-1, turn it into seteq X, Max
-    else if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1)
+    if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1)
       return DAG.getSetCC(dl, VT, N0,
                           DAG.getConstant(MaxVal, N0.getValueType()),
                           ISD::SETEQ);
@@ -2401,36 +2612,36 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
 
     // If the condition is not legal, see if we can find an equivalent one
     // which is legal.
-    if (!isCondCodeLegal(Cond, N0.getValueType())) {
+    if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
       // If the comparison was an awkward floating-point == or != and one of
       // the comparison operands is infinity or negative infinity, convert the
       // condition to a less-awkward <= or >=.
       if (CFP->getValueAPF().isInfinity()) {
         if (CFP->getValueAPF().isNegative()) {
           if (Cond == ISD::SETOEQ &&
-              isCondCodeLegal(ISD::SETOLE, N0.getValueType()))
+              isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType()))
             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLE);
           if (Cond == ISD::SETUEQ &&
-              isCondCodeLegal(ISD::SETOLE, N0.getValueType()))
+              isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType()))
             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULE);
           if (Cond == ISD::SETUNE &&
-              isCondCodeLegal(ISD::SETUGT, N0.getValueType()))
+              isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType()))
             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGT);
           if (Cond == ISD::SETONE &&
-              isCondCodeLegal(ISD::SETUGT, N0.getValueType()))
+              isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType()))
             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGT);
         } else {
           if (Cond == ISD::SETOEQ &&
-              isCondCodeLegal(ISD::SETOGE, N0.getValueType()))
+              isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType()))
             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGE);
           if (Cond == ISD::SETUEQ &&
-              isCondCodeLegal(ISD::SETOGE, N0.getValueType()))
+              isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType()))
             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGE);
           if (Cond == ISD::SETUNE &&
-              isCondCodeLegal(ISD::SETULT, N0.getValueType()))
+              isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType()))
             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULT);
           if (Cond == ISD::SETONE &&
-              isCondCodeLegal(ISD::SETULT, N0.getValueType()))
+              isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType()))
             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLT);
         }
       }
@@ -2464,7 +2675,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
     // if it is not already.
     ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
     if (NewCond != Cond && (DCI.isBeforeLegalizeOps() ||
-          getCondCodeAction(NewCond, N0.getValueType()) == Legal))
+          getCondCodeAction(NewCond, N0.getSimpleValueType()) == Legal))
       return DAG.getSetCC(dl, VT, N0, N1, NewCond);
   }
 
@@ -2545,7 +2756,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           if (DAG.isCommutativeBinOp(N0.getOpcode()))
             return DAG.getSetCC(dl, VT, N0.getOperand(0),
                                 DAG.getConstant(0, N0.getValueType()), Cond);
-          else if (N0.getNode()->hasOneUse()) {
+          if (N0.getNode()->hasOneUse()) {
             assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
             // (Z-X) == X  --> Z == X<<1
             SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N1,
@@ -2561,14 +2772,14 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
     if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
         N1.getOpcode() == ISD::XOR) {
       // Simplify  X == (X+Z) -->  Z == 0
-      if (N1.getOperand(0) == N0) {
+      if (N1.getOperand(0) == N0)
         return DAG.getSetCC(dl, VT, N1.getOperand(1),
                         DAG.getConstant(0, N1.getValueType()), Cond);
-      } else if (N1.getOperand(1) == N0) {
-        if (DAG.isCommutativeBinOp(N1.getOpcode())) {
+      if (N1.getOperand(1) == N0) {
+        if (DAG.isCommutativeBinOp(N1.getOpcode()))
           return DAG.getSetCC(dl, VT, N1.getOperand(0),
                           DAG.getConstant(0, N1.getValueType()), Cond);
-        } else if (N1.getNode()->hasOneUse()) {
+        if (N1.getNode()->hasOneUse()) {
           assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!");
           // X == (Z-X)  --> X<<1 == Z
           SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0,
@@ -2825,6 +3036,9 @@ getRegForInlineAsmConstraint(const std::string &Constraint,
   // Remove the braces from around the name.
   StringRef RegName(Constraint.data()+1, Constraint.size()-2);
 
+  std::pair<unsigned, const TargetRegisterClass*> R =
+    std::make_pair(0u, static_cast<const TargetRegisterClass*>(0));
+
   // Figure out which register class contains this reg.
   const TargetRegisterInfo *RI = TM.getRegisterInfo();
   for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(),
@@ -2838,12 +3052,22 @@ getRegForInlineAsmConstraint(const std::string &Constraint,
 
     for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
          I != E; ++I) {
-      if (RegName.equals_lower(RI->getName(*I)))
-        return std::make_pair(*I, RC);
+      if (RegName.equals_lower(RI->getName(*I))) {
+        std::pair<unsigned, const TargetRegisterClass*> S =
+          std::make_pair(*I, RC);
+
+        // If this register class has the requested value type, return it,
+        // otherwise keep searching and return the first class found
+        // if no other is found which explicitly has the requested type.
+        if (RC->hasType(VT))
+          return S;
+        else if (!R.second)
+          R = S;
+      }
     }
   }
 
-  return std::make_pair(0u, static_cast<const TargetRegisterClass*>(0));
+  return R;
 }
 
 //===----------------------------------------------------------------------===//
@@ -2908,10 +3132,10 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
       assert(!CS.getType()->isVoidTy() &&
              "Bad inline asm!");
       if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
-        OpInfo.ConstraintVT = getValueType(STy->getElementType(ResNo));
+        OpInfo.ConstraintVT = getSimpleValueType(STy->getElementType(ResNo));
       } else {
         assert(ResNo == 0 && "Asm only has one result!");
-        OpInfo.ConstraintVT = getValueType(CS.getType());
+        OpInfo.ConstraintVT = getSimpleValueType(CS.getType());
       }
       ++ResNo;
       break;
@@ -2950,13 +3174,14 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
         case 64:
         case 128:
           OpInfo.ConstraintVT =
-              EVT::getEVT(IntegerType::get(OpTy->getContext(), BitSize), true);
+            MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true);
           break;
         }
-      } else if (dyn_cast<PointerType>(OpTy)) {
-        OpInfo.ConstraintVT = MVT::getIntegerVT(8*TD->getPointerSize());
+      } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) {
+        OpInfo.ConstraintVT = MVT::getIntegerVT(
+            8*TD->getPointerSize(PT->getAddressSpace()));
       } else {
-        OpInfo.ConstraintVT = EVT::getEVT(OpTy, true);
+        OpInfo.ConstraintVT = MVT::getVT(OpTy, true);
       }
     }
   }
@@ -3319,7 +3544,7 @@ SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, DebugLoc dl,
 /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
 SDValue TargetLowering::
 BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
-          std::vector<SDNode*>* Created) const {
+          std::vector<SDNode*> *Created) const {
   EVT VT = N->getValueType(0);
   DebugLoc dl= N->getDebugLoc();
 
@@ -3379,7 +3604,7 @@ BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
 /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
 SDValue TargetLowering::
 BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
-          std::vector<SDNode*>* Created) const {
+          std::vector<SDNode*> *Created) const {
   EVT VT = N->getValueType(0);
   DebugLoc dl = N->getDebugLoc();
 
diff --git a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
index a081e3c..f769b44 100644
--- a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
@@ -16,7 +16,7 @@
 using namespace llvm;
 
 TargetSelectionDAGInfo::TargetSelectionDAGInfo(const TargetMachine &TM)
-  : TD(TM.getTargetData()) {
+  : TD(TM.getDataLayout()) {
 }
 
 TargetSelectionDAGInfo::~TargetSelectionDAGInfo() {
diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp
index 8a6b120..10f64c7 100644
--- a/lib/CodeGen/ShadowStackGC.cpp
+++ b/lib/CodeGen/ShadowStackGC.cpp
@@ -26,12 +26,12 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "shadowstackgc"
-#include "llvm/IRBuilder.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Module.h"
+#include "llvm/CodeGen/GCs.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/CodeGen/GCStrategy.h"
-#include "llvm/CodeGen/GCs.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/CallSite.h"
 
 using namespace llvm;
diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp
index 4fbe1b3..9ab4918 100644
--- a/lib/CodeGen/ShrinkWrapping.cpp
+++ b/lib/CodeGen/ShrinkWrapping.cpp
@@ -35,22 +35,21 @@
 #define DEBUG_TYPE "shrink-wrap"
 
 #include "PrologEpilogInserter.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/SparseBitVector.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SparseBitVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <sstream>
 
 using namespace llvm;
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index 7f46a06..09e923c 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -13,24 +13,24 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "sjljehprepare"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -191,7 +191,7 @@ setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) {
   // that needs to be restored on all exits from the function. This is an alloca
   // because the value needs to be added to the global context list.
   unsigned Align =
-    TLI->getTargetData()->getPrefTypeAlignment(FunctionContextTy);
+    TLI->getDataLayout()->getPrefTypeAlignment(FunctionContextTy);
   FuncCtx =
     new AllocaInst(FunctionContextTy, 0, Align, "fn_context", EntryBB->begin());
 
diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp
index c98efb4..95faafa 100644
--- a/lib/CodeGen/SlotIndexes.cpp
+++ b/lib/CodeGen/SlotIndexes.cpp
@@ -143,7 +143,7 @@ void SlotIndexes::renumberIndexes(IndexList::iterator curItr) {
 }
 
 
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void SlotIndexes::dump() const {
   for (IndexList::const_iterator itr = indexList.begin();
        itr != indexList.end(); ++itr) {
@@ -170,7 +170,7 @@ void SlotIndex::print(raw_ostream &os) const {
     os << "invalid";
 }
 
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 // Dump a SlotIndex to stderr.
 void SlotIndex::dump() const {
   print(dbgs());
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
index 4cd22eb..209792f 100644
--- a/lib/CodeGen/Spiller.cpp
+++ b/lib/CodeGen/Spiller.cpp
@@ -10,7 +10,6 @@
 #define DEBUG_TYPE "spiller"
 
 #include "Spiller.h"
-#include "VirtRegMap.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveRangeEdit.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
@@ -19,12 +18,13 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
 
 using namespace llvm;
 
diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp
index 96151b6..0a3818e 100644
--- a/lib/CodeGen/SplitKit.cpp
+++ b/lib/CodeGen/SplitKit.cpp
@@ -14,7 +14,6 @@
 
 #define DEBUG_TYPE "regalloc"
 #include "SplitKit.h"
-#include "VirtRegMap.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveRangeEdit.h"
@@ -22,6 +21,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
@@ -356,7 +356,7 @@ void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) {
   Edit->anyRematerializable(0);
 }
 
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void SplitEditor::dump() const {
   if (RegAssign.empty()) {
     dbgs() << " empty\n";
diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp
index dbfa4bb..42502eb 100644
--- a/lib/CodeGen/StackColoring.cpp
+++ b/lib/CodeGen/StackColoring.cpp
@@ -22,49 +22,60 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "stackcoloring"
+#include "llvm/CodeGen/Passes.h"
 #include "MachineTraceMetrics.h"
-#include "llvm/Function.h"
-#include "llvm/Module.h"
 #include "llvm/ADT/BitVector.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SparseSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/LiveInterval.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/SlotIndexes.h"
 #include "llvm/DebugInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
 #include "llvm/MC/MCInstrItineraries.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 
 using namespace llvm;
 
 static cl::opt<bool>
 DisableColoring("no-stack-coloring",
-               cl::init(true), cl::Hidden,
-               cl::desc("Suppress stack coloring"));
+        cl::init(false), cl::Hidden,
+        cl::desc("Disable stack coloring"));
 
-STATISTIC(NumMarkerSeen,  "Number of life markers found.");
+/// The user may write code that uses allocas outside of the declared lifetime
+/// zone. This can happen when the user returns a reference to a local
+/// data-structure. We can detect these cases and decide not to optimize the
+/// code. If this flag is enabled, we try to save the user.
+static cl::opt<bool>
+ProtectFromEscapedAllocas("protect-from-escaped-allocas",
+        cl::init(false), cl::Hidden,
+        cl::desc("Do not optimize lifetime zones that are broken"));
+
+STATISTIC(NumMarkerSeen,  "Number of lifetime markers found.");
 STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots.");
 STATISTIC(StackSlotMerged, "Number of stack slot merged.");
+STATISTIC(EscapedAllocas,
+          "Number of allocas that escaped the lifetime region");
 
 //===----------------------------------------------------------------------===//
 //                           StackColoring Pass
@@ -104,7 +115,7 @@ class StackColoring : public MachineFunctionPass {
   /// VNInfo is used for the construction of LiveIntervals.
   VNInfo::Allocator VNInfoAllocator;
   /// SlotIndex analysis object.
-  SlotIndexes* Indexes;
+  SlotIndexes *Indexes;
 
   /// The list of lifetime markers found. These markers are to be removed
   /// once the coloring is done.
@@ -158,6 +169,14 @@ private:
   /// slots to use the joint slots.
   void remapInstructions(DenseMap<int, int> &SlotRemap);
 
+  /// The input program may contain intructions which are not inside lifetime
+  /// markers. This can happen due to a bug in the compiler or due to a bug in
+  /// user code (for example, returning a reference to a local variable).
+  /// This procedure checks all of the instructions in the function and
+  /// invalidates lifetime ranges which do not contain all of the instructions
+  /// which access that frame slot.
+  void removeInvalidSlotRanges();
+
   /// Map entries which point to other entries to their destination.
   ///   A->B->C becomes A->C.
    void expungeSlotMap(DenseMap<int, int> &SlotRemap, unsigned NumSlots);
@@ -241,10 +260,10 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {
 
       MarkersFound++;
 
-      const Value *Allocation = MFI->getObjectAllocation(Slot);
+      const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);
       if (Allocation) {
-        DEBUG(dbgs()<<"Found lifetime marker for allocation: "<<
-              Allocation->getName()<<"\n");
+        DEBUG(dbgs()<<"Found a lifetime marker for slot #"<<Slot<<
+              " with allocation: "<< Allocation->getName()<<"\n");
       }
 
       if (IsStart) {
@@ -461,11 +480,11 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
   }
 
   // Keep a list of *allocas* which need to be remapped.
-  DenseMap<const Value*, const Value*> Allocas;
+  DenseMap<const AllocaInst*, const AllocaInst*> Allocas;
   for (DenseMap<int, int>::iterator it = SlotRemap.begin(),
        e = SlotRemap.end(); it != e; ++it) {
-    const Value *From = MFI->getObjectAllocation(it->first);
-    const Value *To = MFI->getObjectAllocation(it->second);
+    const AllocaInst *From = MFI->getObjectAllocation(it->first);
+    const AllocaInst *To = MFI->getObjectAllocation(it->second);
     assert(To && From && "Invalid allocation object");
     Allocas[From] = To;
   }
@@ -495,10 +514,17 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
         V = GetUnderlyingObject(V);
         // If we did not find one, or if the one that we found is not in our
         // map, then move on.
-        if (!V || !Allocas.count(V))
+        if (!V || !isa<AllocaInst>(V)) {
+          // Clear mem operand since we don't know for sure that it doesn't
+          // alias a merged alloca.
+          MMO->setValue(0);
+          continue;
+        }
+        const AllocaInst *AI= cast<AllocaInst>(V);
+        if (!Allocas.count(AI))
           continue;
 
-        MMO->setValue(Allocas[V]);
+        MMO->setValue(Allocas[AI]);
         FixedMemOp++;
       }
 
@@ -521,12 +547,20 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
         // In a debug build, check that the instruction that we are modifying is
         // inside the expected live range. If the instruction is not inside
         // the calculated range then it means that the alloca usage moved
-        // outside of the lifetime markers.
+        // outside of the lifetime markers, or that the user has a bug.
+        // NOTE: Alloca address calculations which happen outside the lifetime
+        // zone are are okay, despite the fact that we don't have a good way
+        // for validating all of the usages of the calculation.
 #ifndef NDEBUG
-        SlotIndex Index = Indexes->getInstructionIndex(I);
-        LiveInterval* Interval = Intervals[FromSlot];
-        assert(Interval->find(Index) != Interval->end() &&
+        bool TouchesMemory = I->mayLoad() || I->mayStore();
+        // If we *don't* protect the user from escaped allocas, don't bother
+        // validating the instructions.
+        if (!I->isDebugValue() && TouchesMemory && ProtectFromEscapedAllocas) {
+          SlotIndex Index = Indexes->getInstructionIndex(I);
+          LiveInterval *Interval = Intervals[FromSlot];
+          assert(Interval->find(Index) != Interval->end() &&
                "Found instruction usage outside of live range.");
+        }
 #endif
 
         // Fix the machine instructions.
@@ -541,6 +575,53 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
   DEBUG(dbgs()<<"Fixed "<<FixedInstr<<" machine instructions.\n");
 }
 
+void StackColoring::removeInvalidSlotRanges() {
+  MachineFunction::iterator BB, BBE;
+  MachineBasicBlock::iterator I, IE;
+  for (BB = MF->begin(), BBE = MF->end(); BB != BBE; ++BB)
+    for (I = BB->begin(), IE = BB->end(); I != IE; ++I) {
+
+      if (I->getOpcode() == TargetOpcode::LIFETIME_START ||
+          I->getOpcode() == TargetOpcode::LIFETIME_END || I->isDebugValue())
+        continue;
+
+      // Some intervals are suspicious! In some cases we find address
+      // calculations outside of the lifetime zone, but not actual memory
+      // read or write. Memory accesses outside of the lifetime zone are a clear
+      // violation, but address calculations are okay. This can happen when
+      // GEPs are hoisted outside of the lifetime zone.
+      // So, in here we only check instructions which can read or write memory.
+      if (!I->mayLoad() && !I->mayStore())
+        continue;
+
+      // Check all of the machine operands.
+      for (unsigned i = 0 ; i <  I->getNumOperands(); ++i) {
+        MachineOperand &MO = I->getOperand(i);
+
+        if (!MO.isFI())
+          continue;
+
+        int Slot = MO.getIndex();
+
+        if (Slot<0)
+          continue;
+
+        if (Intervals[Slot]->empty())
+          continue;
+
+        // Check that the used slot is inside the calculated lifetime range.
+        // If it is not, warn about it and invalidate the range.
+        LiveInterval *Interval = Intervals[Slot];
+        SlotIndex Index = Indexes->getInstructionIndex(I);
+        if (Interval->find(Index) == Interval->end()) {
+          Intervals[Slot]->clear();
+          DEBUG(dbgs()<<"Invalidating range #"<<Slot<<"\n");
+          EscapedAllocas++;
+        }
+      }
+    }
+}
+
 void StackColoring::expungeSlotMap(DenseMap<int, int> &SlotRemap,
                                    unsigned NumSlots) {
   // Expunge slot remap map.
@@ -596,7 +677,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
   DEBUG(dbgs()<<"Total Stack size: "<<TotalSize<<" bytes\n\n");
 
   // Don't continue because there are not enough lifetime markers, or the
-  // stack or too small, or we are told not to optimize the slots.
+  // stack is too small, or we are told not to optimize the slots.
   if (NumMarkers < 2 || TotalSize < 16 || DisableColoring) {
     DEBUG(dbgs()<<"Will not try to merge slots.\n");
     return removeAllMarkers();
@@ -615,6 +696,11 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
   // Propagate the liveness information.
   calculateLiveIntervals(NumSlots);
 
+  // Search for allocas which are used outside of the declared lifetime
+  // markers.
+  if (ProtectFromEscapedAllocas)
+    removeInvalidSlotRanges();
+
   // Maps old slots to new slots.
   DenseMap<int, int> SlotRemap;
   unsigned RemovedSlots = 0;
@@ -633,7 +719,9 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
   // and continue.
 
   // Sort the slots according to their size. Place unused slots at the end.
-  std::sort(SortedSlots.begin(), SortedSlots.end(), SlotSizeSorter(MFI));
+  // Use stable sort to guarantee deterministic code generation.
+  std::stable_sort(SortedSlots.begin(), SortedSlots.end(),
+                   SlotSizeSorter(MFI));
 
   bool Chanded = true;
   while (Chanded) {
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index a04ac3f..665388b 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -16,20 +16,20 @@
 
 #define DEBUG_TYPE "stack-protector"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/Analysis/Dominators.h"
-#include "llvm/Attributes.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/Triple.h"
 using namespace llvm;
 
 namespace {
@@ -117,7 +117,7 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool InStruct) const {
 
     // If an array has more than SSPBufferSize bytes of allocated space, then we
     // emit stack protectors.
-    if (TM.Options.SSPBufferSize <= TLI->getTargetData()->getTypeAllocSize(AT))
+    if (TM.Options.SSPBufferSize <= TLI->getDataLayout()->getTypeAllocSize(AT))
       return true;
   }
 
@@ -137,10 +137,12 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool InStruct) const {
 /// add a guard variable to functions that call alloca, and functions with
 /// buffers larger than SSPBufferSize bytes.
 bool StackProtector::RequiresStackProtector() const {
-  if (F->hasFnAttr(Attribute::StackProtectReq))
+  if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                      Attribute::StackProtectReq))
     return true;
 
-  if (!F->hasFnAttr(Attribute::StackProtect))
+  if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                       Attribute::StackProtect))
     return false;
 
   for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index 9d0fd0a..f951561 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -11,9 +11,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "stackcoloring"
-#include "llvm/Module.h"
+#define DEBUG_TYPE "stackslotcoloring"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -22,14 +25,11 @@
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
 #include <vector>
 using namespace llvm;
 
diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp
index 39fd600..b337c53 100644
--- a/lib/CodeGen/StrongPHIElimination.cpp
+++ b/lib/CodeGen/StrongPHIElimination.cpp
@@ -39,17 +39,17 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "strongphielim"
-#include "PHIEliminationUtils.h"
 #include "llvm/CodeGen/Passes.h"
+#include "PHIEliminationUtils.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
 using namespace llvm;
 
 namespace {
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index a813fa6..1ec8817 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -13,25 +13,25 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "tailduplication"
-#include "llvm/Function.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/MachineSSAUpdater.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 STATISTIC(NumTails     , "Number of tails duplicated");
@@ -461,6 +461,7 @@ TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
          II != EE; ++II) {
       if (!II->isPHI())
         break;
+      MachineInstrBuilder MIB(*FromBB->getParent(), II);
       unsigned Idx = 0;
       for (unsigned i = 1, e = II->getNumOperands(); i != e; i += 2) {
         MachineOperand &MO = II->getOperand(i+1);
@@ -508,8 +509,7 @@ TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
             II->getOperand(Idx+1).setMBB(SrcBB);
             Idx = 0;
           } else {
-            II->addOperand(MachineOperand::CreateReg(SrcReg, false));
-            II->addOperand(MachineOperand::CreateMBB(SrcBB));
+            MIB.addReg(SrcReg).addMBB(SrcBB);
           }
         }
       } else {
@@ -521,8 +521,7 @@ TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
             II->getOperand(Idx+1).setMBB(SrcBB);
             Idx = 0;
           } else {
-            II->addOperand(MachineOperand::CreateReg(Reg, false));
-            II->addOperand(MachineOperand::CreateMBB(SrcBB));
+            MIB.addReg(Reg).addMBB(SrcBB);
           }
         }
       }
@@ -552,7 +551,8 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
   // compensate for the duplication.
   unsigned MaxDuplicateCount;
   if (TailDuplicateSize.getNumOccurrences() == 0 &&
-      MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+      MF.getFunction()->getAttributes().
+        hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize))
     MaxDuplicateCount = 1;
   else
     MaxDuplicateCount = TailDuplicateSize;
diff --git a/lib/CodeGen/TargetFrameLoweringImpl.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp
index cadb878..883e9d1 100644
--- a/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -11,12 +11,11 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-
 #include <cstdlib>
 using namespace llvm;
 
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfo.cpp
index 7e7f835..d5fbf14 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfo.cpp
@@ -1,4 +1,4 @@
-//===-- TargetInstrInfoImpl.cpp - Target Instruction Information ----------===//
+//===-- TargetInstrInfo.cpp - Target Instruction Information --------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,39 +7,96 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the TargetInstrInfoImpl class, it just provides default
-// implementations of various methods.
+// This file implements the TargetInstrInfo class.
 //
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <cctype>
 using namespace llvm;
 
 static cl::opt<bool> DisableHazardRecognizer(
   "disable-sched-hazard", cl::Hidden, cl::init(false),
   cl::desc("Disable hazard detection during preRA scheduling"));
 
+TargetInstrInfo::~TargetInstrInfo() {
+}
+
+const TargetRegisterClass*
+TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum,
+                             const TargetRegisterInfo *TRI,
+                             const MachineFunction &MF) const {
+  if (OpNum >= MCID.getNumOperands())
+    return 0;
+
+  short RegClass = MCID.OpInfo[OpNum].RegClass;
+  if (MCID.OpInfo[OpNum].isLookupPtrRegClass())
+    return TRI->getPointerRegClass(MF, RegClass);
+
+  // Instructions like INSERT_SUBREG do not have fixed register classes.
+  if (RegClass < 0)
+    return 0;
+
+  // Otherwise just look it up normally.
+  return TRI->getRegClass(RegClass);
+}
+
+/// insertNoop - Insert a noop into the instruction stream at the specified
+/// point.
+void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator MI) const {
+  llvm_unreachable("Target didn't implement insertNoop!");
+}
+
+/// Measure the specified inline asm to determine an approximation of its
+/// length.
+/// Comments (which run till the next SeparatorString or newline) do not
+/// count as an instruction.
+/// Any other non-whitespace text is considered an instruction, with
+/// multiple instructions separated by SeparatorString or newlines.
+/// Variable-length instructions are not handled here; this function
+/// may be overloaded in the target code to do that.
+unsigned TargetInstrInfo::getInlineAsmLength(const char *Str,
+                                             const MCAsmInfo &MAI) const {
+
+
+  // Count the number of instructions in the asm.
+  bool atInsnStart = true;
+  unsigned Length = 0;
+  for (; *Str; ++Str) {
+    if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(),
+                                strlen(MAI.getSeparatorString())) == 0)
+      atInsnStart = true;
+    if (atInsnStart && !std::isspace(*Str)) {
+      Length += MAI.getMaxInstLength();
+      atInsnStart = false;
+    }
+    if (atInsnStart && strncmp(Str, MAI.getCommentString(),
+                               strlen(MAI.getCommentString())) == 0)
+      atInsnStart = false;
+  }
+
+  return Length;
+}
+
 /// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
 /// after it, replacing it with an unconditional branch to NewDest.
 void
-TargetInstrInfoImpl::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
-                                             MachineBasicBlock *NewDest) const {
+TargetInstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
+                                         MachineBasicBlock *NewDest) const {
   MachineBasicBlock *MBB = Tail->getParent();
 
   // Remove all the old successors of MBB from the CFG.
@@ -58,8 +115,8 @@ TargetInstrInfoImpl::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
 
 // commuteInstruction - The default implementation of this method just exchanges
 // the two operands returned by findCommutedOpIndices.
-MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
-                                                      bool NewMI) const {
+MachineInstr *TargetInstrInfo::commuteInstruction(MachineInstr *MI,
+                                                  bool NewMI) const {
   const MCInstrDesc &MCID = MI->getDesc();
   bool HasDef = MCID.getNumDefs();
   if (HasDef && !MI->getOperand(0).isReg())
@@ -119,11 +176,11 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
 /// findCommutedOpIndices - If specified MI is commutable, return the two
 /// operand indices that would swap value. Return true if the instruction
 /// is not in a form which this routine understands.
-bool TargetInstrInfoImpl::findCommutedOpIndices(MachineInstr *MI,
-                                                unsigned &SrcOpIdx1,
-                                                unsigned &SrcOpIdx2) const {
+bool TargetInstrInfo::findCommutedOpIndices(MachineInstr *MI,
+                                            unsigned &SrcOpIdx1,
+                                            unsigned &SrcOpIdx2) const {
   assert(!MI->isBundle() &&
-         "TargetInstrInfoImpl::findCommutedOpIndices() can't handle bundles");
+         "TargetInstrInfo::findCommutedOpIndices() can't handle bundles");
 
   const MCInstrDesc &MCID = MI->getDesc();
   if (!MCID.isCommutable())
@@ -141,7 +198,7 @@ bool TargetInstrInfoImpl::findCommutedOpIndices(MachineInstr *MI,
 
 
 bool
-TargetInstrInfoImpl::isUnpredicatedTerminator(const MachineInstr *MI) const {
+TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
   if (!MI->isTerminator()) return false;
 
   // Conditional branch is a special case.
@@ -153,12 +210,12 @@ TargetInstrInfoImpl::isUnpredicatedTerminator(const MachineInstr *MI) const {
 }
 
 
-bool TargetInstrInfoImpl::PredicateInstruction(MachineInstr *MI,
+bool TargetInstrInfo::PredicateInstruction(MachineInstr *MI,
                             const SmallVectorImpl<MachineOperand> &Pred) const {
   bool MadeChange = false;
 
   assert(!MI->isBundle() &&
-         "TargetInstrInfoImpl::PredicateInstruction() can't handle bundles");
+         "TargetInstrInfo::PredicateInstruction() can't handle bundles");
 
   const MCInstrDesc &MCID = MI->getDesc();
   if (!MI->isPredicable())
@@ -183,9 +240,9 @@ bool TargetInstrInfoImpl::PredicateInstruction(MachineInstr *MI,
   return MadeChange;
 }
 
-bool TargetInstrInfoImpl::hasLoadFromStackSlot(const MachineInstr *MI,
-                                        const MachineMemOperand *&MMO,
-                                        int &FrameIndex) const {
+bool TargetInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
+                                           const MachineMemOperand *&MMO,
+                                           int &FrameIndex) const {
   for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
          oe = MI->memoperands_end();
        o != oe;
@@ -201,9 +258,9 @@ bool TargetInstrInfoImpl::hasLoadFromStackSlot(const MachineInstr *MI,
   return false;
 }
 
-bool TargetInstrInfoImpl::hasStoreToStackSlot(const MachineInstr *MI,
-                                       const MachineMemOperand *&MMO,
-                                       int &FrameIndex) const {
+bool TargetInstrInfo::hasStoreToStackSlot(const MachineInstr *MI,
+                                          const MachineMemOperand *&MMO,
+                                          int &FrameIndex) const {
   for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
          oe = MI->memoperands_end();
        o != oe;
@@ -219,26 +276,26 @@ bool TargetInstrInfoImpl::hasStoreToStackSlot(const MachineInstr *MI,
   return false;
 }
 
-void TargetInstrInfoImpl::reMaterialize(MachineBasicBlock &MBB,
-                                        MachineBasicBlock::iterator I,
-                                        unsigned DestReg,
-                                        unsigned SubIdx,
-                                        const MachineInstr *Orig,
-                                        const TargetRegisterInfo &TRI) const {
+void TargetInstrInfo::reMaterialize(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator I,
+                                    unsigned DestReg,
+                                    unsigned SubIdx,
+                                    const MachineInstr *Orig,
+                                    const TargetRegisterInfo &TRI) const {
   MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
   MI->substituteRegister(MI->getOperand(0).getReg(), DestReg, SubIdx, TRI);
   MBB.insert(I, MI);
 }
 
 bool
-TargetInstrInfoImpl::produceSameValue(const MachineInstr *MI0,
-                                      const MachineInstr *MI1,
-                                      const MachineRegisterInfo *MRI) const {
+TargetInstrInfo::produceSameValue(const MachineInstr *MI0,
+                                  const MachineInstr *MI1,
+                                  const MachineRegisterInfo *MRI) const {
   return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
 }
 
-MachineInstr *TargetInstrInfoImpl::duplicate(MachineInstr *Orig,
-                                             MachineFunction &MF) const {
+MachineInstr *TargetInstrInfo::duplicate(MachineInstr *Orig,
+                                         MachineFunction &MF) const {
   assert(!Orig->isNotDuplicable() &&
          "Instruction cannot be duplicated");
   return MF.CloneMachineInstr(Orig);
@@ -278,7 +335,7 @@ static const TargetRegisterClass *canFoldCopy(const MachineInstr *MI,
   return 0;
 }
 
-bool TargetInstrInfoImpl::
+bool TargetInstrInfo::
 canFoldMemoryOperand(const MachineInstr *MI,
                      const SmallVectorImpl<unsigned> &Ops) const {
   return MI->isCopy() && Ops.size() == 1 && canFoldCopy(MI, Ops[0]);
@@ -459,9 +516,9 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
 /// isSchedulingBoundary - Test if the given instruction should be
 /// considered a scheduling boundary. This primarily includes labels
 /// and terminators.
-bool TargetInstrInfoImpl::isSchedulingBoundary(const MachineInstr *MI,
-                                               const MachineBasicBlock *MBB,
-                                               const MachineFunction &MF) const{
+bool TargetInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
+                                           const MachineBasicBlock *MBB,
+                                           const MachineFunction &MF) const {
   // Terminators and labels can't be scheduled around.
   if (MI->isTerminator() || MI->isLabel())
     return true;
@@ -472,7 +529,8 @@ bool TargetInstrInfoImpl::isSchedulingBoundary(const MachineInstr *MI,
   // stack slot reference to depend on the instruction that does the
   // modification.
   const TargetLowering &TLI = *MF.getTarget().getTargetLowering();
-  if (MI->definesRegister(TLI.getStackPointerRegisterToSaveRestore()))
+  const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+  if (MI->modifiesRegister(TLI.getStackPointerRegisterToSaveRestore(), TRI))
     return true;
 
   return false;
@@ -480,12 +538,12 @@ bool TargetInstrInfoImpl::isSchedulingBoundary(const MachineInstr *MI,
 
 // Provide a global flag for disabling the PreRA hazard recognizer that targets
 // may choose to honor.
-bool TargetInstrInfoImpl::usePreRAHazardRecognizer() const {
+bool TargetInstrInfo::usePreRAHazardRecognizer() const {
   return !DisableHazardRecognizer;
 }
 
 // Default implementation of CreateTargetRAHazardRecognizer.
-ScheduleHazardRecognizer *TargetInstrInfoImpl::
+ScheduleHazardRecognizer *TargetInstrInfo::
 CreateTargetHazardRecognizer(const TargetMachine *TM,
                              const ScheduleDAG *DAG) const {
   // Dummy hazard recognizer allows all instructions to issue.
@@ -493,7 +551,7 @@ CreateTargetHazardRecognizer(const TargetMachine *TM,
 }
 
 // Default implementation of CreateTargetMIHazardRecognizer.
-ScheduleHazardRecognizer *TargetInstrInfoImpl::
+ScheduleHazardRecognizer *TargetInstrInfo::
 CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
                                const ScheduleDAG *DAG) const {
   return (ScheduleHazardRecognizer *)
@@ -501,7 +559,7 @@ CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
 }
 
 // Default implementation of CreateTargetPostRAHazardRecognizer.
-ScheduleHazardRecognizer *TargetInstrInfoImpl::
+ScheduleHazardRecognizer *TargetInstrInfo::
 CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
                                    const ScheduleDAG *DAG) const {
   return (ScheduleHazardRecognizer *)
@@ -513,9 +571,9 @@ CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
 //===----------------------------------------------------------------------===//
 
 int
-TargetInstrInfoImpl::getOperandLatency(const InstrItineraryData *ItinData,
-                                       SDNode *DefNode, unsigned DefIdx,
-                                       SDNode *UseNode, unsigned UseIdx) const {
+TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+                                   SDNode *DefNode, unsigned DefIdx,
+                                   SDNode *UseNode, unsigned UseIdx) const {
   if (!ItinData || ItinData->isEmpty())
     return -1;
 
@@ -529,8 +587,8 @@ TargetInstrInfoImpl::getOperandLatency(const InstrItineraryData *ItinData,
   return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
 }
 
-int TargetInstrInfoImpl::getInstrLatency(const InstrItineraryData *ItinData,
-                                         SDNode *N) const {
+int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+                                     SDNode *N) const {
   if (!ItinData || ItinData->isEmpty())
     return 1;
 
@@ -545,8 +603,8 @@ int TargetInstrInfoImpl::getInstrLatency(const InstrItineraryData *ItinData,
 //===----------------------------------------------------------------------===//
 
 unsigned
-TargetInstrInfoImpl::getNumMicroOps(const InstrItineraryData *ItinData,
-                                    const MachineInstr *MI) const {
+TargetInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
+                                const MachineInstr *MI) const {
   if (!ItinData || ItinData->isEmpty())
     return 1;
 
@@ -563,6 +621,8 @@ TargetInstrInfoImpl::getNumMicroOps(const InstrItineraryData *ItinData,
 /// Return the default expected latency for a def based on it's opcode.
 unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel *SchedModel,
                                             const MachineInstr *DefMI) const {
+  if (DefMI->isTransient())
+    return 0;
   if (DefMI->mayLoad())
     return SchedModel->LoadLatency;
   if (isHighLatencyDef(DefMI->getOpcode()))
@@ -570,7 +630,7 @@ unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel *SchedModel,
   return 1;
 }
 
-unsigned TargetInstrInfoImpl::
+unsigned TargetInstrInfo::
 getInstrLatency(const InstrItineraryData *ItinData,
                 const MachineInstr *MI,
                 unsigned *PredCost) const {
@@ -582,9 +642,9 @@ getInstrLatency(const InstrItineraryData *ItinData,
   return ItinData->getStageLatency(MI->getDesc().getSchedClass());
 }
 
-bool TargetInstrInfoImpl::hasLowDefLatency(const InstrItineraryData *ItinData,
-                                           const MachineInstr *DefMI,
-                                           unsigned DefIdx) const {
+bool TargetInstrInfo::hasLowDefLatency(const InstrItineraryData *ItinData,
+                                       const MachineInstr *DefMI,
+                                       unsigned DefIdx) const {
   if (!ItinData || ItinData->isEmpty())
     return false;
 
@@ -595,7 +655,7 @@ bool TargetInstrInfoImpl::hasLowDefLatency(const InstrItineraryData *ItinData,
 
 /// Both DefMI and UseMI must be valid.  By default, call directly to the
 /// itinerary. This may be overriden by the target.
-int TargetInstrInfoImpl::
+int TargetInstrInfo::
 getOperandLatency(const InstrItineraryData *ItinData,
                   const MachineInstr *DefMI, unsigned DefIdx,
                   const MachineInstr *UseMI, unsigned UseIdx) const {
@@ -606,13 +666,13 @@ getOperandLatency(const InstrItineraryData *ItinData,
 
 /// If we can determine the operand latency from the def only, without itinerary
 /// lookup, do so. Otherwise return -1.
-static int computeDefOperandLatency(
-  const TargetInstrInfo *TII, const InstrItineraryData *ItinData,
-  const MachineInstr *DefMI, bool FindMin) {
+int TargetInstrInfo::computeDefOperandLatency(
+  const InstrItineraryData *ItinData,
+  const MachineInstr *DefMI, bool FindMin) const {
 
   // Let the target hook getInstrLatency handle missing itineraries.
   if (!ItinData)
-    return TII->getInstrLatency(ItinData, DefMI);
+    return getInstrLatency(ItinData, DefMI);
 
   // Return a latency based on the itinerary properties and defining instruction
   // if possible. Some common subtargets don't require per-operand latency,
@@ -621,7 +681,7 @@ static int computeDefOperandLatency(
     // If MinLatency is valid, call getInstrLatency. This uses Stage latency if
     // it exists before defaulting to MinLatency.
     if (ItinData->SchedModel->MinLatency >= 0)
-      return TII->getInstrLatency(ItinData, DefMI);
+      return getInstrLatency(ItinData, DefMI);
 
     // If MinLatency is invalid, OperandLatency is interpreted as MinLatency.
     // For empty itineraries, short-cirtuit the check and default to one cycle.
@@ -629,7 +689,7 @@ static int computeDefOperandLatency(
       return 1;
   }
   else if(ItinData->isEmpty())
-    return TII->defaultDefLatency(ItinData->SchedModel, DefMI);
+    return defaultDefLatency(ItinData->SchedModel, DefMI);
 
   // ...operand lookup required
   return -1;
@@ -652,7 +712,7 @@ computeOperandLatency(const InstrItineraryData *ItinData,
                       const MachineInstr *UseMI, unsigned UseIdx,
                       bool FindMin) const {
 
-  int DefLatency = computeDefOperandLatency(this, ItinData, DefMI, FindMin);
+  int DefLatency = computeDefOperandLatency(ItinData, DefMI, FindMin);
   if (DefLatency >= 0)
     return DefLatency;
 
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 2a2fa9e..76c2546 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -13,30 +13,30 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Module.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSectionMachO.h"
-#include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Triple.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
 using namespace llvm;
 using namespace dwarf;
 
@@ -77,9 +77,9 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer,
                                                     Flags,
                                                     SectionKind::getDataRel(),
                                                     0, Label->getName());
-  unsigned Size = TM.getTargetData()->getPointerSize();
+  unsigned Size = TM.getDataLayout()->getPointerSize();
   Streamer.SwitchSection(Sec);
-  Streamer.EmitValueToAlignment(TM.getTargetData()->getPointerABIAlignment());
+  Streamer.EmitValueToAlignment(TM.getDataLayout()->getPointerABIAlignment());
   Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject);
   const MCExpr *E = MCConstantExpr::Create(Size, getContext());
   Streamer.EmitELFSize(Label, E);
@@ -88,6 +88,36 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer,
   Streamer.EmitSymbolValue(Sym, Size);
 }
 
+const MCExpr *TargetLoweringObjectFileELF::
+getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
+                        MachineModuleInfo *MMI, unsigned Encoding,
+                        MCStreamer &Streamer) const {
+
+  if (Encoding & dwarf::DW_EH_PE_indirect) {
+    MachineModuleInfoELF &ELFMMI = MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+    SmallString<128> Name;
+    Mang->getNameWithPrefix(Name, GV, true);
+    Name += ".DW.stub";
+
+    // Add information about the stub reference to ELFMMI so that the stub
+    // gets emitted by the asmprinter.
+    MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str());
+    MachineModuleInfoImpl::StubValueTy &StubSym = ELFMMI.getGVStubEntry(SSym);
+    if (StubSym.getPointer() == 0) {
+      MCSymbol *Sym = Mang->getSymbol(GV);
+      StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
+    }
+
+    return TargetLoweringObjectFile::
+      getTTypeReference(MCSymbolRefExpr::Create(SSym, getContext()),
+                        Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
+  }
+
+  return TargetLoweringObjectFile::
+    getTTypeGlobalReference(GV, Mang, MMI, Encoding, Streamer);
+}
+
 static SectionKind
 getELFKindForNamedSection(StringRef Name, SectionKind K) {
   // N.B.: The defaults used in here are no the same ones used in MC.
@@ -247,7 +277,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
     // FIXME: this is getting the alignment of the character, not the
     // alignment of the global!
     unsigned Align =
-      TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV));
+      TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV));
 
     const char *SizeSpec = ".rodata.str1.";
     if (Kind.isMergeable2ByteCString())
@@ -314,35 +344,6 @@ getSectionForConstant(SectionKind Kind) const {
   return DataRelROSection;
 }
 
-const MCExpr *TargetLoweringObjectFileELF::
-getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
-                               MachineModuleInfo *MMI,
-                               unsigned Encoding, MCStreamer &Streamer) const {
-
-  if (Encoding & dwarf::DW_EH_PE_indirect) {
-    MachineModuleInfoELF &ELFMMI = MMI->getObjFileInfo<MachineModuleInfoELF>();
-
-    SmallString<128> Name;
-    Mang->getNameWithPrefix(Name, GV, true);
-    Name += ".DW.stub";
-
-    // Add information about the stub reference to ELFMMI so that the stub
-    // gets emitted by the asmprinter.
-    MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str());
-    MachineModuleInfoImpl::StubValueTy &StubSym = ELFMMI.getGVStubEntry(SSym);
-    if (StubSym.getPointer() == 0) {
-      MCSymbol *Sym = Mang->getSymbol(GV);
-      StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
-    }
-
-    return TargetLoweringObjectFile::
-      getExprForDwarfReference(SSym, Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
-  }
-
-  return TargetLoweringObjectFile::
-    getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer);
-}
-
 const MCSection *
 TargetLoweringObjectFileELF::getStaticCtorSection(unsigned Priority) const {
   // The default scheme is .ctor / .dtor, so we have to invert the priority
@@ -522,14 +523,14 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
 
   // FIXME: Alignment check should be handled by section classifier.
   if (Kind.isMergeable1ByteCString() &&
-      TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
+      TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
     return CStringSection;
 
   // Do not put 16-bit arrays in the UString section if they have an
   // externally visible label, this runs into issues with certain linker
   // versions.
   if (Kind.isMergeable2ByteCString() && !GV->hasExternalLinkage() &&
-      TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
+      TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
     return UStringSection;
 
   if (Kind.isMergeableConst()) {
@@ -604,9 +605,9 @@ shouldEmitUsedDirectiveFor(const GlobalValue *GV, Mangler *Mang) const {
 }
 
 const MCExpr *TargetLoweringObjectFileMachO::
-getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
-                               MachineModuleInfo *MMI, unsigned Encoding,
-                               MCStreamer &Streamer) const {
+getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
+                        MachineModuleInfo *MMI, unsigned Encoding,
+                        MCStreamer &Streamer) const {
   // The mach-o version of this method defaults to returning a stub reference.
 
   if (Encoding & DW_EH_PE_indirect) {
@@ -629,11 +630,12 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
     }
 
     return TargetLoweringObjectFile::
-      getExprForDwarfReference(SSym, Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
+      getTTypeReference(MCSymbolRefExpr::Create(SSym, getContext()),
+                        Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
   }
 
   return TargetLoweringObjectFile::
-    getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer);
+    getTTypeGlobalReference(GV, Mang, MMI, Encoding, Streamer);
 }
 
 MCSymbol *TargetLoweringObjectFileMachO::
@@ -701,8 +703,19 @@ getCOFFSectionFlags(SectionKind K) {
 const MCSection *TargetLoweringObjectFileCOFF::
 getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
                          Mangler *Mang, const TargetMachine &TM) const {
-  return getContext().getCOFFSection(GV->getSection(),
-                                     getCOFFSectionFlags(Kind),
+  int Selection = 0;
+  unsigned Characteristics = getCOFFSectionFlags(Kind);
+  SmallString<128> Name(GV->getSection().c_str());
+  if (GV->isWeakForLinker()) {
+    Selection = COFF::IMAGE_COMDAT_SELECT_ANY;
+    Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
+    MCSymbol *Sym = Mang->getSymbol(GV);
+    Name.append("$");
+    Name.append(Sym->getName().begin() + 1, Sym->getName().end());
+  }
+  return getContext().getCOFFSection(Name,
+                                     Characteristics,
+                                     Selection,
                                      Kind);
 }
 
diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp
new file mode 100644
index 0000000..9b776d1
--- /dev/null
+++ b/lib/CodeGen/TargetRegisterInfo.cpp
@@ -0,0 +1,286 @@
+//===- TargetRegisterInfo.cpp - Target Register Information Implementation ===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetRegisterInfo interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID,
+                             regclass_iterator RCB, regclass_iterator RCE,
+                             const char *const *SRINames,
+                             const unsigned *SRILaneMasks)
+  : InfoDesc(ID), SubRegIndexNames(SRINames),
+    SubRegIndexLaneMasks(SRILaneMasks),
+    RegClassBegin(RCB), RegClassEnd(RCE) {
+}
+
+TargetRegisterInfo::~TargetRegisterInfo() {}
+
+void PrintReg::print(raw_ostream &OS) const {
+  if (!Reg)
+    OS << "%noreg";
+  else if (TargetRegisterInfo::isStackSlot(Reg))
+    OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg);
+  else if (TargetRegisterInfo::isVirtualRegister(Reg))
+    OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Reg);
+  else if (TRI && Reg < TRI->getNumRegs())
+    OS << '%' << TRI->getName(Reg);
+  else
+    OS << "%physreg" << Reg;
+  if (SubIdx) {
+    if (TRI)
+      OS << ':' << TRI->getSubRegIndexName(SubIdx);
+    else
+      OS << ":sub(" << SubIdx << ')';
+  }
+}
+
+void PrintRegUnit::print(raw_ostream &OS) const {
+  // Generic printout when TRI is missing.
+  if (!TRI) {
+    OS << "Unit~" << Unit;
+    return;
+  }
+
+  // Check for invalid register units.
+  if (Unit >= TRI->getNumRegUnits()) {
+    OS << "BadUnit~" << Unit;
+    return;
+  }
+
+  // Normal units have at least one root.
+  MCRegUnitRootIterator Roots(Unit, TRI);
+  assert(Roots.isValid() && "Unit has no roots.");
+  OS << TRI->getName(*Roots);
+  for (++Roots; Roots.isValid(); ++Roots)
+    OS << '~' << TRI->getName(*Roots);
+}
+
+/// getAllocatableClass - Return the maximal subclass of the given register
+/// class that is alloctable, or NULL.
+const TargetRegisterClass *
+TargetRegisterInfo::getAllocatableClass(const TargetRegisterClass *RC) const {
+  if (!RC || RC->isAllocatable())
+    return RC;
+
+  const unsigned *SubClass = RC->getSubClassMask();
+  for (unsigned Base = 0, BaseE = getNumRegClasses();
+       Base < BaseE; Base += 32) {
+    unsigned Idx = Base;
+    for (unsigned Mask = *SubClass++; Mask; Mask >>= 1) {
+      unsigned Offset = CountTrailingZeros_32(Mask);
+      const TargetRegisterClass *SubRC = getRegClass(Idx + Offset);
+      if (SubRC->isAllocatable())
+        return SubRC;
+      Mask >>= Offset;
+      Idx += Offset + 1;
+    }
+  }
+  return NULL;
+}
+
+/// getMinimalPhysRegClass - Returns the Register Class of a physical
+/// register of the given type, picking the most sub register class of
+/// the right type that contains this physreg.
+const TargetRegisterClass *
+TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, EVT VT) const {
+  assert(isPhysicalRegister(reg) && "reg must be a physical register");
+
+  // Pick the most sub register class of the right type that contains
+  // this physreg.
+  const TargetRegisterClass* BestRC = 0;
+  for (regclass_iterator I = regclass_begin(), E = regclass_end(); I != E; ++I){
+    const TargetRegisterClass* RC = *I;
+    if ((VT == MVT::Other || RC->hasType(VT)) && RC->contains(reg) &&
+        (!BestRC || BestRC->hasSubClass(RC)))
+      BestRC = RC;
+  }
+
+  assert(BestRC && "Couldn't find the register class");
+  return BestRC;
+}
+
+/// getAllocatableSetForRC - Toggle the bits that represent allocatable
+/// registers for the specific register class.
+static void getAllocatableSetForRC(const MachineFunction &MF,
+                                   const TargetRegisterClass *RC, BitVector &R){
+  assert(RC->isAllocatable() && "invalid for nonallocatable sets");
+  ArrayRef<uint16_t> Order = RC->getRawAllocationOrder(MF);
+  for (unsigned i = 0; i != Order.size(); ++i)
+    R.set(Order[i]);
+}
+
+BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF,
+                                          const TargetRegisterClass *RC) const {
+  BitVector Allocatable(getNumRegs());
+  if (RC) {
+    // A register class with no allocatable subclass returns an empty set.
+    const TargetRegisterClass *SubClass = getAllocatableClass(RC);
+    if (SubClass)
+      getAllocatableSetForRC(MF, SubClass, Allocatable);
+  } else {
+    for (TargetRegisterInfo::regclass_iterator I = regclass_begin(),
+         E = regclass_end(); I != E; ++I)
+      if ((*I)->isAllocatable())
+        getAllocatableSetForRC(MF, *I, Allocatable);
+  }
+
+  // Mask out the reserved registers
+  BitVector Reserved = getReservedRegs(MF);
+  Allocatable &= Reserved.flip();
+
+  return Allocatable;
+}
+
+static inline
+const TargetRegisterClass *firstCommonClass(const uint32_t *A,
+                                            const uint32_t *B,
+                                            const TargetRegisterInfo *TRI) {
+  for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; I += 32)
+    if (unsigned Common = *A++ & *B++)
+      return TRI->getRegClass(I + CountTrailingZeros_32(Common));
+  return 0;
+}
+
+const TargetRegisterClass *
+TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A,
+                                      const TargetRegisterClass *B) const {
+  // First take care of the trivial cases.
+  if (A == B)
+    return A;
+  if (!A || !B)
+    return 0;
+
+  // Register classes are ordered topologically, so the largest common
+  // sub-class it the common sub-class with the smallest ID.
+  return firstCommonClass(A->getSubClassMask(), B->getSubClassMask(), this);
+}
+
+const TargetRegisterClass *
+TargetRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
+                                             const TargetRegisterClass *B,
+                                             unsigned Idx) const {
+  assert(A && B && "Missing register class");
+  assert(Idx && "Bad sub-register index");
+
+  // Find Idx in the list of super-register indices.
+  for (SuperRegClassIterator RCI(B, this); RCI.isValid(); ++RCI)
+    if (RCI.getSubReg() == Idx)
+      // The bit mask contains all register classes that are projected into B
+      // by Idx. Find a class that is also a sub-class of A.
+      return firstCommonClass(RCI.getMask(), A->getSubClassMask(), this);
+  return 0;
+}
+
+const TargetRegisterClass *TargetRegisterInfo::
+getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA,
+                       const TargetRegisterClass *RCB, unsigned SubB,
+                       unsigned &PreA, unsigned &PreB) const {
+  assert(RCA && SubA && RCB && SubB && "Invalid arguments");
+
+  // Search all pairs of sub-register indices that project into RCA and RCB
+  // respectively. This is quadratic, but usually the sets are very small. On
+  // most targets like X86, there will only be a single sub-register index
+  // (e.g., sub_16bit projecting into GR16).
+  //
+  // The worst case is a register class like DPR on ARM.
+  // We have indices dsub_0..dsub_7 projecting into that class.
+  //
+  // It is very common that one register class is a sub-register of the other.
+  // Arrange for RCA to be the larger register so the answer will be found in
+  // the first iteration. This makes the search linear for the most common
+  // case.
+  const TargetRegisterClass *BestRC = 0;
+  unsigned *BestPreA = &PreA;
+  unsigned *BestPreB = &PreB;
+  if (RCA->getSize() < RCB->getSize()) {
+    std::swap(RCA, RCB);
+    std::swap(SubA, SubB);
+    std::swap(BestPreA, BestPreB);
+  }
+
+  // Also terminate the search one we have found a register class as small as
+  // RCA.
+  unsigned MinSize = RCA->getSize();
+
+  for (SuperRegClassIterator IA(RCA, this, true); IA.isValid(); ++IA) {
+    unsigned FinalA = composeSubRegIndices(IA.getSubReg(), SubA);
+    for (SuperRegClassIterator IB(RCB, this, true); IB.isValid(); ++IB) {
+      // Check if a common super-register class exists for this index pair.
+      const TargetRegisterClass *RC =
+        firstCommonClass(IA.getMask(), IB.getMask(), this);
+      if (!RC || RC->getSize() < MinSize)
+        continue;
+
+      // The indexes must compose identically: PreA+SubA == PreB+SubB.
+      unsigned FinalB = composeSubRegIndices(IB.getSubReg(), SubB);
+      if (FinalA != FinalB)
+        continue;
+
+      // Is RC a better candidate than BestRC?
+      if (BestRC && RC->getSize() >= BestRC->getSize())
+        continue;
+
+      // Yes, RC is the smallest super-register seen so far.
+      BestRC = RC;
+      *BestPreA = IA.getSubReg();
+      *BestPreB = IB.getSubReg();
+
+      // Bail early if we reached MinSize. We won't find a better candidate.
+      if (BestRC->getSize() == MinSize)
+        return BestRC;
+    }
+  }
+  return BestRC;
+}
+
+// Compute target-independent register allocator hints to help eliminate copies.
+void
+TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg,
+                                          ArrayRef<MCPhysReg> Order,
+                                          SmallVectorImpl<MCPhysReg> &Hints,
+                                          const MachineFunction &MF,
+                                          const VirtRegMap *VRM) const {
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  std::pair<unsigned, unsigned> Hint = MRI.getRegAllocationHint(VirtReg);
+
+  // Hints with HintType != 0 were set by target-dependent code.
+  // Such targets must provide their own implementation of
+  // TRI::getRegAllocationHints to interpret those hint types.
+  assert(Hint.first == 0 && "Target must implement TRI::getRegAllocationHints");
+
+  // Target-independent hints are either a physical or a virtual register.
+  unsigned Phys = Hint.second;
+  if (VRM && isVirtualRegister(Phys))
+    Phys = VRM->getPhys(Phys);
+
+  // Check that Phys is a valid hint in VirtReg's register class.
+  if (!isPhysicalRegister(Phys))
+    return;
+  if (MRI.isReserved(Phys))
+    return;
+  // Check that Phys is in the allocation order. We shouldn't heed hints
+  // from VirtReg's register class if they aren't in the allocation order. The
+  // target probably has a reason for removing the register.
+  if (std::find(Order.begin(), Order.end(), Phys) == Order.end())
+    return;
+
+  // All clear, tell the register allocator to prefer this register.
+  Hints.push_back(Phys);
+}
diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp
new file mode 100644
index 0000000..f31f67d
--- /dev/null
+++ b/lib/CodeGen/TargetSchedule.cpp
@@ -0,0 +1,306 @@
+//===-- llvm/Target/TargetSchedule.cpp - Sched Machine Model ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a wrapper around MCSchedModel that allows the interface
+// to benefit from information currently only available in TargetInstrInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+static cl::opt<bool> EnableSchedModel("schedmodel", cl::Hidden, cl::init(true),
+  cl::desc("Use TargetSchedModel for latency lookup"));
+
+static cl::opt<bool> EnableSchedItins("scheditins", cl::Hidden, cl::init(true),
+  cl::desc("Use InstrItineraryData for latency lookup"));
+
+bool TargetSchedModel::hasInstrSchedModel() const {
+  return EnableSchedModel && SchedModel.hasInstrSchedModel();
+}
+
+bool TargetSchedModel::hasInstrItineraries() const {
+  return EnableSchedItins && !InstrItins.isEmpty();
+}
+
+static unsigned gcd(unsigned Dividend, unsigned Divisor) {
+  // Dividend and Divisor will be naturally swapped as needed.
+  while(Divisor) {
+    unsigned Rem = Dividend % Divisor;
+    Dividend = Divisor;
+    Divisor = Rem;
+  };
+  return Dividend;
+}
+static unsigned lcm(unsigned A, unsigned B) {
+  unsigned LCM = (uint64_t(A) * B) / gcd(A, B);
+  assert((LCM >= A && LCM >= B) && "LCM overflow");
+  return LCM;
+}
+
+void TargetSchedModel::init(const MCSchedModel &sm,
+                            const TargetSubtargetInfo *sti,
+                            const TargetInstrInfo *tii) {
+  SchedModel = sm;
+  STI = sti;
+  TII = tii;
+  STI->initInstrItins(InstrItins);
+
+  unsigned NumRes = SchedModel.getNumProcResourceKinds();
+  ResourceFactors.resize(NumRes);
+  ResourceLCM = SchedModel.IssueWidth;
+  for (unsigned Idx = 0; Idx < NumRes; ++Idx) {
+    unsigned NumUnits = SchedModel.getProcResource(Idx)->NumUnits;
+    if (NumUnits > 0)
+      ResourceLCM = lcm(ResourceLCM, NumUnits);
+  }
+  MicroOpFactor = ResourceLCM / SchedModel.IssueWidth;
+  for (unsigned Idx = 0; Idx < NumRes; ++Idx) {
+    unsigned NumUnits = SchedModel.getProcResource(Idx)->NumUnits;
+    ResourceFactors[Idx] = NumUnits ? (ResourceLCM / NumUnits) : 0;
+  }
+}
+
+unsigned TargetSchedModel::getNumMicroOps(const MachineInstr *MI,
+                                          const MCSchedClassDesc *SC) const {
+  if (hasInstrItineraries()) {
+    int UOps = InstrItins.getNumMicroOps(MI->getDesc().getSchedClass());
+    return (UOps >= 0) ? UOps : TII->getNumMicroOps(&InstrItins, MI);
+  }
+  if (hasInstrSchedModel()) {
+    if (!SC)
+      SC = resolveSchedClass(MI);
+    if (SC->isValid())
+      return SC->NumMicroOps;
+  }
+  return MI->isTransient() ? 0 : 1;
+}
+
+// The machine model may explicitly specify an invalid latency, which
+// effectively means infinite latency. Since users of the TargetSchedule API
+// don't know how to handle this, we convert it to a very large latency that is
+// easy to distinguish when debugging the DAG but won't induce overflow.
+static unsigned convertLatency(int Cycles) {
+  return Cycles >= 0 ? Cycles : 1000;
+}
+
+/// If we can determine the operand latency from the def only, without machine
+/// model or itinerary lookup, do so. Otherwise return -1.
+int TargetSchedModel::getDefLatency(const MachineInstr *DefMI,
+                                    bool FindMin) const {
+
+  // Return a latency based on the itinerary properties and defining instruction
+  // if possible. Some common subtargets don't require per-operand latency,
+  // especially for minimum latencies.
+  if (FindMin) {
+    // If MinLatency is invalid, then use the itinerary for MinLatency. If no
+    // itinerary exists either, then use single cycle latency.
+    if (SchedModel.MinLatency < 0 && !hasInstrItineraries()) {
+      return 1;
+    }
+    return SchedModel.MinLatency;
+  }
+  else if (!hasInstrSchedModel() && !hasInstrItineraries()) {
+    return TII->defaultDefLatency(&SchedModel, DefMI);
+  }
+  // ...operand lookup required
+  return -1;
+}
+
+/// Return the MCSchedClassDesc for this instruction. Some SchedClasses require
+/// evaluation of predicates that depend on instruction operands or flags.
+const MCSchedClassDesc *TargetSchedModel::
+resolveSchedClass(const MachineInstr *MI) const {
+
+  // Get the definition's scheduling class descriptor from this machine model.
+  unsigned SchedClass = MI->getDesc().getSchedClass();
+  const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SchedClass);
+
+#ifndef NDEBUG
+  unsigned NIter = 0;
+#endif
+  while (SCDesc->isVariant()) {
+    assert(++NIter < 6 && "Variants are nested deeper than the magic number");
+
+    SchedClass = STI->resolveSchedClass(SchedClass, MI, this);
+    SCDesc = SchedModel.getSchedClassDesc(SchedClass);
+  }
+  return SCDesc;
+}
+
+/// Find the def index of this operand. This index maps to the machine model and
+/// is independent of use operands. Def operands may be reordered with uses or
+/// merged with uses without affecting the def index (e.g. before/after
+/// regalloc). However, an instruction's def operands must never be reordered
+/// with respect to each other.
+static unsigned findDefIdx(const MachineInstr *MI, unsigned DefOperIdx) {
+  unsigned DefIdx = 0;
+  for (unsigned i = 0; i != DefOperIdx; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.isDef())
+      ++DefIdx;
+  }
+  return DefIdx;
+}
+
+/// Find the use index of this operand. This is independent of the instruction's
+/// def operands.
+///
+/// Note that uses are not determined by the operand's isUse property, which
+/// is simply the inverse of isDef. Here we consider any readsReg operand to be
+/// a "use". The machine model allows an operand to be both a Def and Use.
+static unsigned findUseIdx(const MachineInstr *MI, unsigned UseOperIdx) {
+  unsigned UseIdx = 0;
+  for (unsigned i = 0; i != UseOperIdx; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.readsReg())
+      ++UseIdx;
+  }
+  return UseIdx;
+}
+
+// Top-level API for clients that know the operand indices.
+unsigned TargetSchedModel::computeOperandLatency(
+  const MachineInstr *DefMI, unsigned DefOperIdx,
+  const MachineInstr *UseMI, unsigned UseOperIdx,
+  bool FindMin) const {
+
+  int DefLatency = getDefLatency(DefMI, FindMin);
+  if (DefLatency >= 0)
+    return DefLatency;
+
+  if (hasInstrItineraries()) {
+    int OperLatency = 0;
+    if (UseMI) {
+      OperLatency =
+        TII->getOperandLatency(&InstrItins, DefMI, DefOperIdx, UseMI, UseOperIdx);
+    }
+    else {
+      unsigned DefClass = DefMI->getDesc().getSchedClass();
+      OperLatency = InstrItins.getOperandCycle(DefClass, DefOperIdx);
+    }
+    if (OperLatency >= 0)
+      return OperLatency;
+
+    // No operand latency was found.
+    unsigned InstrLatency = TII->getInstrLatency(&InstrItins, DefMI);
+
+    // Expected latency is the max of the stage latency and itinerary props.
+    // Rather than directly querying InstrItins stage latency, we call a TII
+    // hook to allow subtargets to specialize latency. This hook is only
+    // applicable to the InstrItins model. InstrSchedModel should model all
+    // special cases without TII hooks.
+    if (!FindMin)
+      InstrLatency = std::max(InstrLatency,
+                              TII->defaultDefLatency(&SchedModel, DefMI));
+    return InstrLatency;
+  }
+  assert(!FindMin && hasInstrSchedModel() &&
+         "Expected a SchedModel for this cpu");
+  const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI);
+  unsigned DefIdx = findDefIdx(DefMI, DefOperIdx);
+  if (DefIdx < SCDesc->NumWriteLatencyEntries) {
+    // Lookup the definition's write latency in SubtargetInfo.
+    const MCWriteLatencyEntry *WLEntry =
+      STI->getWriteLatencyEntry(SCDesc, DefIdx);
+    unsigned WriteID = WLEntry->WriteResourceID;
+    unsigned Latency = convertLatency(WLEntry->Cycles);
+    if (!UseMI)
+      return Latency;
+
+    // Lookup the use's latency adjustment in SubtargetInfo.
+    const MCSchedClassDesc *UseDesc = resolveSchedClass(UseMI);
+    if (UseDesc->NumReadAdvanceEntries == 0)
+      return Latency;
+    unsigned UseIdx = findUseIdx(UseMI, UseOperIdx);
+    return Latency - STI->getReadAdvanceCycles(UseDesc, UseIdx, WriteID);
+  }
+  // If DefIdx does not exist in the model (e.g. implicit defs), then return
+  // unit latency (defaultDefLatency may be too conservative).
+#ifndef NDEBUG
+  if (SCDesc->isValid() && !DefMI->getOperand(DefOperIdx).isImplicit()
+      && !DefMI->getDesc().OpInfo[DefOperIdx].isOptionalDef()) {
+    std::string Err;
+    raw_string_ostream ss(Err);
+    ss << "DefIdx " << DefIdx << " exceeds machine model writes for "
+       << *DefMI;
+    report_fatal_error(ss.str());
+  }
+#endif
+  return DefMI->isTransient() ? 0 : 1;
+}
+
+unsigned TargetSchedModel::computeInstrLatency(const MachineInstr *MI) const {
+  // For the itinerary model, fall back to the old subtarget hook.
+  // Allow subtargets to compute Bundle latencies outside the machine model.
+  if (hasInstrItineraries() || MI->isBundle())
+    return TII->getInstrLatency(&InstrItins, MI);
+
+  if (hasInstrSchedModel()) {
+    const MCSchedClassDesc *SCDesc = resolveSchedClass(MI);
+    if (SCDesc->isValid()) {
+      unsigned Latency = 0;
+      for (unsigned DefIdx = 0, DefEnd = SCDesc->NumWriteLatencyEntries;
+           DefIdx != DefEnd; ++DefIdx) {
+        // Lookup the definition's write latency in SubtargetInfo.
+        const MCWriteLatencyEntry *WLEntry =
+          STI->getWriteLatencyEntry(SCDesc, DefIdx);
+        Latency = std::max(Latency, convertLatency(WLEntry->Cycles));
+      }
+      return Latency;
+    }
+  }
+  return TII->defaultDefLatency(&SchedModel, MI);
+}
+
+unsigned TargetSchedModel::
+computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx,
+                     const MachineInstr *DepMI) const {
+  // MinLatency == -1 is for in-order processors that always have unit
+  // MinLatency. MinLatency > 0 is for in-order processors with varying min
+  // latencies, but since this is not a RAW dep, we always use unit latency.
+  if (SchedModel.MinLatency != 0)
+    return 1;
+
+  // MinLatency == 0 indicates an out-of-order processor that can dispatch
+  // WAW dependencies in the same cycle.
+
+  // Treat predication as a data dependency for out-of-order cpus. In-order
+  // cpus do not need to treat predicated writes specially.
+  //
+  // TODO: The following hack exists because predication passes do not
+  // correctly append imp-use operands, and readsReg() strangely returns false
+  // for predicated defs.
+  unsigned Reg = DefMI->getOperand(DefOperIdx).getReg();
+  const MachineFunction &MF = *DefMI->getParent()->getParent();
+  const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+  if (!DepMI->readsRegister(Reg, TRI) && TII->isPredicated(DepMI))
+    return computeInstrLatency(DefMI);
+
+  // If we have a per operand scheduling model, check if this def is writing
+  // an unbuffered resource. If so, it treated like an in-order cpu.
+  if (hasInstrSchedModel()) {
+    const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI);
+    if (SCDesc->isValid()) {
+      for (const MCWriteProcResEntry *PRI = STI->getWriteProcResBegin(SCDesc),
+             *PRE = STI->getWriteProcResEnd(SCDesc); PRI != PRE; ++PRI) {
+        if (!SchedModel.getProcResource(PRI->ProcResourceIdx)->IsBuffered)
+          return 1;
+      }
+    }
+  }
+  return 0;
+}
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index bd12f92..8e6f809 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -29,26 +29,26 @@
 
 #define DEBUG_TYPE "twoaddrinstr"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Function.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/IR/Function.h"
 #include "llvm/MC/MCInstrItineraries.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 STATISTIC(NumTwoAddressInstrs, "Number of two-address instructions");
@@ -60,116 +60,100 @@ STATISTIC(NumReSchedUps,       "Number of instructions re-scheduled up");
 STATISTIC(NumReSchedDowns,     "Number of instructions re-scheduled down");
 
 namespace {
-  class TwoAddressInstructionPass : public MachineFunctionPass {
-    MachineFunction *MF;
-    const TargetInstrInfo *TII;
-    const TargetRegisterInfo *TRI;
-    const InstrItineraryData *InstrItins;
-    MachineRegisterInfo *MRI;
-    LiveVariables *LV;
-    SlotIndexes *Indexes;
-    LiveIntervals *LIS;
-    AliasAnalysis *AA;
-    CodeGenOpt::Level OptLevel;
-
-    // DistanceMap - Keep track the distance of a MI from the start of the
-    // current basic block.
-    DenseMap<MachineInstr*, unsigned> DistanceMap;
-
-    // SrcRegMap - A map from virtual registers to physical registers which
-    // are likely targets to be coalesced to due to copies from physical
-    // registers to virtual registers. e.g. v1024 = move r0.
-    DenseMap<unsigned, unsigned> SrcRegMap;
-
-    // DstRegMap - A map from virtual registers to physical registers which
-    // are likely targets to be coalesced to due to copies to physical
-    // registers from virtual registers. e.g. r1 = move v1024.
-    DenseMap<unsigned, unsigned> DstRegMap;
-
-    /// RegSequences - Keep track the list of REG_SEQUENCE instructions seen
-    /// during the initial walk of the machine function.
-    SmallVector<MachineInstr*, 16> RegSequences;
-
-    bool Sink3AddrInstruction(MachineBasicBlock *MBB, MachineInstr *MI,
-                              unsigned Reg,
-                              MachineBasicBlock::iterator OldPos);
-
-    bool NoUseAfterLastDef(unsigned Reg, MachineBasicBlock *MBB, unsigned Dist,
-                           unsigned &LastDef);
-
-    bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
-                               MachineInstr *MI, MachineBasicBlock *MBB,
-                               unsigned Dist);
+class TwoAddressInstructionPass : public MachineFunctionPass {
+  MachineFunction *MF;
+  const TargetInstrInfo *TII;
+  const TargetRegisterInfo *TRI;
+  const InstrItineraryData *InstrItins;
+  MachineRegisterInfo *MRI;
+  LiveVariables *LV;
+  SlotIndexes *Indexes;
+  LiveIntervals *LIS;
+  AliasAnalysis *AA;
+  CodeGenOpt::Level OptLevel;
+
+  // The current basic block being processed.
+  MachineBasicBlock *MBB;
+
+  // DistanceMap - Keep track the distance of a MI from the start of the
+  // current basic block.
+  DenseMap<MachineInstr*, unsigned> DistanceMap;
+
+  // Set of already processed instructions in the current block.
+  SmallPtrSet<MachineInstr*, 8> Processed;
 
-    bool CommuteInstruction(MachineBasicBlock::iterator &mi,
-                            MachineFunction::iterator &mbbi,
-                            unsigned RegB, unsigned RegC, unsigned Dist);
+  // SrcRegMap - A map from virtual registers to physical registers which are
+  // likely targets to be coalesced to due to copies from physical registers to
+  // virtual registers. e.g. v1024 = move r0.
+  DenseMap<unsigned, unsigned> SrcRegMap;
 
-    bool isProfitableToConv3Addr(unsigned RegA, unsigned RegB);
+  // DstRegMap - A map from virtual registers to physical registers which are
+  // likely targets to be coalesced to due to copies to physical registers from
+  // virtual registers. e.g. r1 = move v1024.
+  DenseMap<unsigned, unsigned> DstRegMap;
 
-    bool ConvertInstTo3Addr(MachineBasicBlock::iterator &mi,
-                            MachineBasicBlock::iterator &nmi,
-                            MachineFunction::iterator &mbbi,
-                            unsigned RegA, unsigned RegB, unsigned Dist);
+  bool sink3AddrInstruction(MachineInstr *MI, unsigned Reg,
+                            MachineBasicBlock::iterator OldPos);
 
-    bool isDefTooClose(unsigned Reg, unsigned Dist,
-                       MachineInstr *MI, MachineBasicBlock *MBB);
+  bool noUseAfterLastDef(unsigned Reg, unsigned Dist, unsigned &LastDef);
 
-    bool RescheduleMIBelowKill(MachineBasicBlock *MBB,
-                               MachineBasicBlock::iterator &mi,
-                               MachineBasicBlock::iterator &nmi,
-                               unsigned Reg);
-    bool RescheduleKillAboveMI(MachineBasicBlock *MBB,
-                               MachineBasicBlock::iterator &mi,
-                               MachineBasicBlock::iterator &nmi,
-                               unsigned Reg);
+  bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
+                             MachineInstr *MI, unsigned Dist);
 
-    bool TryInstructionTransform(MachineBasicBlock::iterator &mi,
-                                 MachineBasicBlock::iterator &nmi,
-                                 MachineFunction::iterator &mbbi,
-                                 unsigned SrcIdx, unsigned DstIdx,
-                                 unsigned Dist,
-                                 SmallPtrSet<MachineInstr*, 8> &Processed);
+  bool commuteInstruction(MachineBasicBlock::iterator &mi,
+                          unsigned RegB, unsigned RegC, unsigned Dist);
 
-    void ScanUses(unsigned DstReg, MachineBasicBlock *MBB,
-                  SmallPtrSet<MachineInstr*, 8> &Processed);
+  bool isProfitableToConv3Addr(unsigned RegA, unsigned RegB);
 
-    void ProcessCopy(MachineInstr *MI, MachineBasicBlock *MBB,
-                     SmallPtrSet<MachineInstr*, 8> &Processed);
+  bool convertInstTo3Addr(MachineBasicBlock::iterator &mi,
+                          MachineBasicBlock::iterator &nmi,
+                          unsigned RegA, unsigned RegB, unsigned Dist);
 
-    typedef SmallVector<std::pair<unsigned, unsigned>, 4> TiedPairList;
-    typedef SmallDenseMap<unsigned, TiedPairList> TiedOperandMap;
-    bool collectTiedOperands(MachineInstr *MI, TiedOperandMap&);
-    void processTiedPairs(MachineInstr *MI, TiedPairList&, unsigned &Dist);
+  bool isDefTooClose(unsigned Reg, unsigned Dist, MachineInstr *MI);
 
-    void CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs, unsigned DstReg);
+  bool rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
+                             MachineBasicBlock::iterator &nmi,
+                             unsigned Reg);
+  bool rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
+                             MachineBasicBlock::iterator &nmi,
+                             unsigned Reg);
 
-    /// EliminateRegSequences - Eliminate REG_SEQUENCE instructions as part
-    /// of the de-ssa process. This replaces sources of REG_SEQUENCE as
-    /// sub-register references of the register defined by REG_SEQUENCE.
-    bool EliminateRegSequences();
+  bool tryInstructionTransform(MachineBasicBlock::iterator &mi,
+                               MachineBasicBlock::iterator &nmi,
+                               unsigned SrcIdx, unsigned DstIdx,
+                               unsigned Dist);
 
-  public:
-    static char ID; // Pass identification, replacement for typeid
-    TwoAddressInstructionPass() : MachineFunctionPass(ID) {
-      initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry());
-    }
+  void scanUses(unsigned DstReg);
 
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.setPreservesCFG();
-      AU.addRequired<AliasAnalysis>();
-      AU.addPreserved<LiveVariables>();
-      AU.addPreserved<SlotIndexes>();
-      AU.addPreserved<LiveIntervals>();
-      AU.addPreservedID(MachineLoopInfoID);
-      AU.addPreservedID(MachineDominatorsID);
-      MachineFunctionPass::getAnalysisUsage(AU);
-    }
+  void processCopy(MachineInstr *MI);
 
-    /// runOnMachineFunction - Pass entry point.
-    bool runOnMachineFunction(MachineFunction&);
-  };
-}
+  typedef SmallVector<std::pair<unsigned, unsigned>, 4> TiedPairList;
+  typedef SmallDenseMap<unsigned, TiedPairList> TiedOperandMap;
+  bool collectTiedOperands(MachineInstr *MI, TiedOperandMap&);
+  void processTiedPairs(MachineInstr *MI, TiedPairList&, unsigned &Dist);
+  void eliminateRegSequence(MachineBasicBlock::iterator&);
+
+public:
+  static char ID; // Pass identification, replacement for typeid
+  TwoAddressInstructionPass() : MachineFunctionPass(ID) {
+    initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesCFG();
+    AU.addRequired<AliasAnalysis>();
+    AU.addPreserved<LiveVariables>();
+    AU.addPreserved<SlotIndexes>();
+    AU.addPreserved<LiveIntervals>();
+    AU.addPreservedID(MachineLoopInfoID);
+    AU.addPreservedID(MachineDominatorsID);
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  /// runOnMachineFunction - Pass entry point.
+  bool runOnMachineFunction(MachineFunction&);
+};
+} // end anonymous namespace
 
 char TwoAddressInstructionPass::ID = 0;
 INITIALIZE_PASS_BEGIN(TwoAddressInstructionPass, "twoaddressinstruction",
@@ -180,13 +164,13 @@ INITIALIZE_PASS_END(TwoAddressInstructionPass, "twoaddressinstruction",
 
 char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID;
 
-/// Sink3AddrInstruction - A two-address instruction has been converted to a
+/// sink3AddrInstruction - A two-address instruction has been converted to a
 /// three-address instruction to avoid clobbering a register. Try to sink it
 /// past the instruction that would kill the above mentioned register to reduce
 /// register pressure.
-bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
-                                           MachineInstr *MI, unsigned SavedReg,
-                                           MachineBasicBlock::iterator OldPos) {
+bool TwoAddressInstructionPass::
+sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
+                     MachineBasicBlock::iterator OldPos) {
   // FIXME: Shouldn't we be trying to do this before we three-addressify the
   // instruction?  After this transformation is done, we no longer need
   // the instruction to be in three-address form.
@@ -299,13 +283,12 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
   return true;
 }
 
-/// NoUseAfterLastDef - Return true if there are no intervening uses between the
+/// noUseAfterLastDef - Return true if there are no intervening uses between the
 /// last instruction in the MBB that defines the specified register and the
 /// two-address instruction which is being processed. It also returns the last
 /// def location by reference
-bool TwoAddressInstructionPass::NoUseAfterLastDef(unsigned Reg,
-                                           MachineBasicBlock *MBB, unsigned Dist,
-                                           unsigned &LastDef) {
+bool TwoAddressInstructionPass::noUseAfterLastDef(unsigned Reg, unsigned Dist,
+                                                  unsigned &LastDef) {
   LastDef = 0;
   unsigned LastUse = Dist;
   for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Reg),
@@ -465,10 +448,9 @@ regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) {
 /// isProfitableToCommute - Return true if it's potentially profitable to commute
 /// the two-address instruction that's being processed.
 bool
-TwoAddressInstructionPass::isProfitableToCommute(unsigned regA, unsigned regB,
-                                       unsigned regC,
-                                       MachineInstr *MI, MachineBasicBlock *MBB,
-                                       unsigned Dist) {
+TwoAddressInstructionPass::
+isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
+                      MachineInstr *MI, unsigned Dist) {
   if (OptLevel == CodeGenOpt::None)
     return false;
 
@@ -516,13 +498,13 @@ TwoAddressInstructionPass::isProfitableToCommute(unsigned regA, unsigned regB,
   // If there is a use of regC between its last def (could be livein) and this
   // instruction, then bail.
   unsigned LastDefC = 0;
-  if (!NoUseAfterLastDef(regC, MBB, Dist, LastDefC))
+  if (!noUseAfterLastDef(regC, Dist, LastDefC))
     return false;
 
   // If there is a use of regB between its last def (could be livein) and this
   // instruction, then go ahead and make this transformation.
   unsigned LastDefB = 0;
-  if (!NoUseAfterLastDef(regB, MBB, Dist, LastDefB))
+  if (!noUseAfterLastDef(regB, Dist, LastDefB))
     return true;
 
   // Since there are no intervening uses for both registers, then commute
@@ -530,13 +512,12 @@ TwoAddressInstructionPass::isProfitableToCommute(unsigned regA, unsigned regB,
   return LastDefB && LastDefC && LastDefC > LastDefB;
 }
 
-/// CommuteInstruction - Commute a two-address instruction and update the basic
+/// commuteInstruction - Commute a two-address instruction and update the basic
 /// block, distance map, and live variables if needed. Return true if it is
 /// successful.
-bool
-TwoAddressInstructionPass::CommuteInstruction(MachineBasicBlock::iterator &mi,
-                               MachineFunction::iterator &mbbi,
-                               unsigned RegB, unsigned RegC, unsigned Dist) {
+bool TwoAddressInstructionPass::
+commuteInstruction(MachineBasicBlock::iterator &mi,
+                   unsigned RegB, unsigned RegC, unsigned Dist) {
   MachineInstr *MI = mi;
   DEBUG(dbgs() << "2addr: COMMUTING  : " << *MI);
   MachineInstr *NewMI = TII->commuteInstruction(MI);
@@ -555,8 +536,8 @@ TwoAddressInstructionPass::CommuteInstruction(MachineBasicBlock::iterator &mi,
     if (Indexes)
       Indexes->replaceMachineInstrInMaps(MI, NewMI);
 
-    mbbi->insert(mi, NewMI);           // Insert the new inst
-    mbbi->erase(mi);                   // Nuke the old inst.
+    MBB->insert(mi, NewMI);           // Insert the new inst
+    MBB->erase(mi);                   // Nuke the old inst.
     mi = NewMI;
     DistanceMap.insert(std::make_pair(NewMI, Dist));
   }
@@ -588,51 +569,51 @@ TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA,unsigned RegB){
   return (ToRegA && !regsAreCompatible(FromRegB, ToRegA, TRI));
 }
 
-/// ConvertInstTo3Addr - Convert the specified two-address instruction into a
+/// convertInstTo3Addr - Convert the specified two-address instruction into a
 /// three address one. Return true if this transformation was successful.
 bool
-TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi,
+TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi,
                                               MachineBasicBlock::iterator &nmi,
-                                              MachineFunction::iterator &mbbi,
                                               unsigned RegA, unsigned RegB,
                                               unsigned Dist) {
-  MachineInstr *NewMI = TII->convertToThreeAddress(mbbi, mi, LV);
-  if (NewMI) {
-    DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi);
-    DEBUG(dbgs() << "2addr:         TO 3-ADDR: " << *NewMI);
-    bool Sunk = false;
+  // FIXME: Why does convertToThreeAddress() need an iterator reference?
+  MachineFunction::iterator MFI = MBB;
+  MachineInstr *NewMI = TII->convertToThreeAddress(MFI, mi, LV);
+  assert(MBB == MFI && "convertToThreeAddress changed iterator reference");
+  if (!NewMI)
+    return false;
 
-    if (Indexes)
-      Indexes->replaceMachineInstrInMaps(mi, NewMI);
+  DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi);
+  DEBUG(dbgs() << "2addr:         TO 3-ADDR: " << *NewMI);
+  bool Sunk = false;
 
-    if (NewMI->findRegisterUseOperand(RegB, false, TRI))
-      // FIXME: Temporary workaround. If the new instruction doesn't
-      // uses RegB, convertToThreeAddress must have created more
-      // then one instruction.
-      Sunk = Sink3AddrInstruction(mbbi, NewMI, RegB, mi);
+  if (Indexes)
+    Indexes->replaceMachineInstrInMaps(mi, NewMI);
 
-    mbbi->erase(mi); // Nuke the old inst.
+  if (NewMI->findRegisterUseOperand(RegB, false, TRI))
+    // FIXME: Temporary workaround. If the new instruction doesn't
+    // uses RegB, convertToThreeAddress must have created more
+    // then one instruction.
+    Sunk = sink3AddrInstruction(NewMI, RegB, mi);
 
-    if (!Sunk) {
-      DistanceMap.insert(std::make_pair(NewMI, Dist));
-      mi = NewMI;
-      nmi = llvm::next(mi);
-    }
+  MBB->erase(mi); // Nuke the old inst.
 
-    // Update source and destination register maps.
-    SrcRegMap.erase(RegA);
-    DstRegMap.erase(RegB);
-    return true;
+  if (!Sunk) {
+    DistanceMap.insert(std::make_pair(NewMI, Dist));
+    mi = NewMI;
+    nmi = llvm::next(mi);
   }
 
-  return false;
+  // Update source and destination register maps.
+  SrcRegMap.erase(RegA);
+  DstRegMap.erase(RegB);
+  return true;
 }
 
-/// ScanUses - Scan forward recursively for only uses, update maps if the use
+/// scanUses - Scan forward recursively for only uses, update maps if the use
 /// is a copy or a two-address instruction.
 void
-TwoAddressInstructionPass::ScanUses(unsigned DstReg, MachineBasicBlock *MBB,
-                                    SmallPtrSet<MachineInstr*, 8> &Processed) {
+TwoAddressInstructionPass::scanUses(unsigned DstReg) {
   SmallVector<unsigned, 4> VirtRegPairs;
   bool IsDstPhys;
   bool IsCopy = false;
@@ -676,7 +657,7 @@ TwoAddressInstructionPass::ScanUses(unsigned DstReg, MachineBasicBlock *MBB,
   }
 }
 
-/// ProcessCopy - If the specified instruction is not yet processed, process it
+/// processCopy - If the specified instruction is not yet processed, process it
 /// if it's a copy. For a copy instruction, we find the physical registers the
 /// source and destination registers might be mapped to. These are kept in
 /// point-to maps used to determine future optimizations. e.g.
@@ -688,9 +669,7 @@ TwoAddressInstructionPass::ScanUses(unsigned DstReg, MachineBasicBlock *MBB,
 /// coalesced to r0 (from the input side). v1025 is mapped to r1. v1026 is
 /// potentially joined with r1 on the output side. It's worthwhile to commute
 /// 'add' to eliminate a copy.
-void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI,
-                                     MachineBasicBlock *MBB,
-                                     SmallPtrSet<MachineInstr*, 8> &Processed) {
+void TwoAddressInstructionPass::processCopy(MachineInstr *MI) {
   if (Processed.count(MI))
     return;
 
@@ -707,21 +686,20 @@ void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI,
       assert(SrcRegMap[DstReg] == SrcReg &&
              "Can't map to two src physical registers!");
 
-    ScanUses(DstReg, MBB, Processed);
+    scanUses(DstReg);
   }
 
   Processed.insert(MI);
   return;
 }
 
-/// RescheduleMIBelowKill - If there is one more local instruction that reads
+/// rescheduleMIBelowKill - If there is one more local instruction that reads
 /// 'Reg' and it kills 'Reg, consider moving the instruction below the kill
 /// instruction in order to eliminate the need for the copy.
-bool
-TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB,
-                                     MachineBasicBlock::iterator &mi,
-                                     MachineBasicBlock::iterator &nmi,
-                                     unsigned Reg) {
+bool TwoAddressInstructionPass::
+rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
+                      MachineBasicBlock::iterator &nmi,
+                      unsigned Reg) {
   // Bail immediately if we don't have LV available. We use it to find kills
   // efficiently.
   if (!LV)
@@ -853,8 +831,7 @@ TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB,
 /// isDefTooClose - Return true if the re-scheduling will put the given
 /// instruction too close to the defs of its register dependencies.
 bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist,
-                                              MachineInstr *MI,
-                                              MachineBasicBlock *MBB) {
+                                              MachineInstr *MI) {
   for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(Reg),
          DE = MRI->def_end(); DI != DE; ++DI) {
     MachineInstr *DefMI = &*DI;
@@ -873,15 +850,14 @@ bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist,
   return false;
 }
 
-/// RescheduleKillAboveMI - If there is one more local instruction that reads
+/// rescheduleKillAboveMI - If there is one more local instruction that reads
 /// 'Reg' and it kills 'Reg, consider moving the kill instruction above the
 /// current two-address instruction in order to eliminate the need for the
 /// copy.
-bool
-TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB,
-                                     MachineBasicBlock::iterator &mi,
-                                     MachineBasicBlock::iterator &nmi,
-                                     unsigned Reg) {
+bool TwoAddressInstructionPass::
+rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
+                      MachineBasicBlock::iterator &nmi,
+                      unsigned Reg) {
   // Bail immediately if we don't have LV available. We use it to find kills
   // efficiently.
   if (!LV)
@@ -918,7 +894,7 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB,
     if (MO.isUse()) {
       if (!MOReg)
         continue;
-      if (isDefTooClose(MOReg, DI->second, MI, MBB))
+      if (isDefTooClose(MOReg, DI->second, MI))
         return false;
       if (MOReg == Reg && !MO.isKill())
         return false;
@@ -1006,18 +982,16 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB,
   return true;
 }
 
-/// TryInstructionTransform - For the case where an instruction has a single
+/// tryInstructionTransform - For the case where an instruction has a single
 /// pair of tied register operands, attempt some transformations that may
 /// either eliminate the tied operands or improve the opportunities for
 /// coalescing away the register copy.  Returns true if no copy needs to be
 /// inserted to untie mi's operands (either because they were untied, or
 /// because mi was rescheduled, and will be visited again later).
 bool TwoAddressInstructionPass::
-TryInstructionTransform(MachineBasicBlock::iterator &mi,
+tryInstructionTransform(MachineBasicBlock::iterator &mi,
                         MachineBasicBlock::iterator &nmi,
-                        MachineFunction::iterator &mbbi,
-                        unsigned SrcIdx, unsigned DstIdx, unsigned Dist,
-                        SmallPtrSet<MachineInstr*, 8> &Processed) {
+                        unsigned SrcIdx, unsigned DstIdx, unsigned Dist) {
   if (OptLevel == CodeGenOpt::None)
     return false;
 
@@ -1030,7 +1004,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
   bool regBKilled = isKilled(MI, regB, MRI, TII);
 
   if (TargetRegisterInfo::isVirtualRegister(regA))
-    ScanUses(regA, &*mbbi, Processed);
+    scanUses(regA);
 
   // Check if it is profitable to commute the operands.
   unsigned SrcOp1, SrcOp2;
@@ -1051,7 +1025,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
         // If C dies but B does not, swap the B and C operands.
         // This makes the live ranges of A and C joinable.
         TryCommute = true;
-      else if (isProfitableToCommute(regA, regB, regC, &MI, mbbi, Dist)) {
+      else if (isProfitableToCommute(regA, regB, regC, &MI, Dist)) {
         TryCommute = true;
         AggressiveCommute = true;
       }
@@ -1059,7 +1033,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
   }
 
   // If it's profitable to commute, try to do so.
-  if (TryCommute && CommuteInstruction(mi, mbbi, regB, regC, Dist)) {
+  if (TryCommute && commuteInstruction(mi, regB, regC, Dist)) {
     ++NumCommuted;
     if (AggressiveCommute)
       ++NumAggrCommuted;
@@ -1068,7 +1042,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
 
   // If there is one more use of regB later in the same MBB, consider
   // re-schedule this MI below it.
-  if (RescheduleMIBelowKill(mbbi, mi, nmi, regB)) {
+  if (rescheduleMIBelowKill(mi, nmi, regB)) {
     ++NumReSchedDowns;
     return true;
   }
@@ -1078,7 +1052,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
     // three-address instruction.  Check if it is profitable.
     if (!regBKilled || isProfitableToConv3Addr(regA, regB)) {
       // Try to convert it.
-      if (ConvertInstTo3Addr(mi, nmi, mbbi, regA, regB, Dist)) {
+      if (convertInstTo3Addr(mi, nmi, regA, regB, Dist)) {
         ++NumConvertedTo3Addr;
         return true; // Done with this instruction.
       }
@@ -1087,7 +1061,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
 
   // If there is one more use of regB later in the same MBB, consider
   // re-schedule it before this MI if it's legal.
-  if (RescheduleKillAboveMI(mbbi, mi, nmi, regB)) {
+  if (rescheduleKillAboveMI(mi, nmi, regB)) {
     ++NumReSchedUps;
     return true;
   }
@@ -1131,8 +1105,8 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
 
         // Tentatively insert the instructions into the block so that they
         // look "normal" to the transformation logic.
-        mbbi->insert(mi, NewMIs[0]);
-        mbbi->insert(mi, NewMIs[1]);
+        MBB->insert(mi, NewMIs[0]);
+        MBB->insert(mi, NewMIs[1]);
 
         DEBUG(dbgs() << "2addr:    NEW LOAD: " << *NewMIs[0]
                      << "2addr:    NEW INST: " << *NewMIs[1]);
@@ -1142,8 +1116,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
         unsigned NewSrcIdx = NewMIs[1]->findRegisterUseOperandIdx(regB);
         MachineBasicBlock::iterator NewMI = NewMIs[1];
         bool TransformSuccess =
-          TryInstructionTransform(NewMI, mi, mbbi,
-                                  NewSrcIdx, NewDstIdx, Dist, Processed);
+          tryInstructionTransform(NewMI, mi, NewSrcIdx, NewDstIdx, Dist);
         if (TransformSuccess ||
             NewMIs[1]->getOperand(NewSrcIdx).isKill()) {
           // Success, or at least we made an improvement. Keep the unfolded
@@ -1378,16 +1351,15 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
   MRI->leaveSSA();
 
   TiedOperandMap TiedOperands;
-
-  SmallPtrSet<MachineInstr*, 8> Processed;
-  for (MachineFunction::iterator mbbi = MF->begin(), mbbe = MF->end();
-       mbbi != mbbe; ++mbbi) {
+  for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
+       MBBI != MBBE; ++MBBI) {
+    MBB = MBBI;
     unsigned Dist = 0;
     DistanceMap.clear();
     SrcRegMap.clear();
     DstRegMap.clear();
     Processed.clear();
-    for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end();
+    for (MachineBasicBlock::iterator mi = MBB->begin(), me = MBB->end();
          mi != me; ) {
       MachineBasicBlock::iterator nmi = llvm::next(mi);
       if (mi->isDebugValue()) {
@@ -1395,13 +1367,14 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
         continue;
       }
 
-      // Remember REG_SEQUENCE instructions, we'll deal with them later.
+      // Expand REG_SEQUENCE instructions. This will position mi at the first
+      // expanded instruction.
       if (mi->isRegSequence())
-        RegSequences.push_back(&*mi);
+        eliminateRegSequence(mi);
 
       DistanceMap.insert(std::make_pair(mi, ++Dist));
 
-      ProcessCopy(&*mi, &*mbbi, Processed);
+      processCopy(&*mi);
 
       // First scan through all the tied register uses in this instruction
       // and record a list of pairs of tied operands for each register.
@@ -1426,8 +1399,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
           unsigned SrcReg = mi->getOperand(SrcIdx).getReg();
           unsigned DstReg = mi->getOperand(DstIdx).getReg();
           if (SrcReg != DstReg &&
-              TryInstructionTransform(mi, nmi, mbbi, SrcIdx, DstIdx, Dist,
-                                      Processed)) {
+              tryInstructionTransform(mi, nmi, SrcIdx, DstIdx, Dist)) {
             // The tied operands have been eliminated or shifted further down the
             // block to ease elimination. Continue processing with 'nmi'.
             TiedOperands.clear();
@@ -1465,323 +1437,81 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
     }
   }
 
-  // Eliminate REG_SEQUENCE instructions. Their whole purpose was to preseve
-  // SSA form. It's now safe to de-SSA.
-  MadeChange |= EliminateRegSequences();
-
   return MadeChange;
 }
 
-static void UpdateRegSequenceSrcs(unsigned SrcReg,
-                                  unsigned DstReg, unsigned SubIdx,
-                                  MachineRegisterInfo *MRI,
-                                  const TargetRegisterInfo &TRI) {
-  for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg),
-         RE = MRI->reg_end(); RI != RE; ) {
-    MachineOperand &MO = RI.getOperand();
-    ++RI;
-    MO.substVirtReg(DstReg, SubIdx, TRI);
+/// Eliminate a REG_SEQUENCE instruction as part of the de-ssa process.
+///
+/// The instruction is turned into a sequence of sub-register copies:
+///
+///   %dst = REG_SEQUENCE %v1, ssub0, %v2, ssub1
+///
+/// Becomes:
+///
+///   %dst:ssub0<def,undef> = COPY %v1
+///   %dst:ssub1<def> = COPY %v2
+///
+void TwoAddressInstructionPass::
+eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
+  MachineInstr *MI = MBBI;
+  unsigned DstReg = MI->getOperand(0).getReg();
+  if (MI->getOperand(0).getSubReg() ||
+      TargetRegisterInfo::isPhysicalRegister(DstReg) ||
+      !(MI->getNumOperands() & 1)) {
+    DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI);
+    llvm_unreachable(0);
   }
-}
-
-// Find the first def of Reg, assuming they are all in the same basic block.
-static MachineInstr *findFirstDef(unsigned Reg, MachineRegisterInfo *MRI) {
-  SmallPtrSet<MachineInstr*, 8> Defs;
-  MachineInstr *First = 0;
-  for (MachineRegisterInfo::def_iterator RI = MRI->def_begin(Reg);
-       MachineInstr *MI = RI.skipInstruction(); Defs.insert(MI))
-    First = MI;
-  if (!First)
-    return 0;
 
-  MachineBasicBlock *MBB = First->getParent();
-  MachineBasicBlock::iterator A = First, B = First;
-  bool Moving;
-  do {
-    Moving = false;
-    if (A != MBB->begin()) {
-      Moving = true;
-      --A;
-      if (Defs.erase(A)) First = A;
-    }
-    if (B != MBB->end()) {
-      Defs.erase(B);
-      ++B;
-      Moving = true;
-    }
-  } while (Moving && !Defs.empty());
-  assert(Defs.empty() && "Instructions outside basic block!");
-  return First;
-}
-
-/// CoalesceExtSubRegs - If a number of sources of the REG_SEQUENCE are
-/// EXTRACT_SUBREG from the same register and to the same virtual register
-/// with different sub-register indices, attempt to combine the
-/// EXTRACT_SUBREGs and pre-coalesce them. e.g.
-/// %reg1026<def> = VLDMQ %reg1025<kill>, 260, pred:14, pred:%reg0
-/// %reg1029:6<def> = EXTRACT_SUBREG %reg1026, 6
-/// %reg1029:5<def> = EXTRACT_SUBREG %reg1026<kill>, 5
-/// Since D subregs 5, 6 can combine to a Q register, we can coalesce
-/// reg1026 to reg1029.
-void
-TwoAddressInstructionPass::CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs,
-                                              unsigned DstReg) {
-  SmallSet<unsigned, 4> Seen;
-  for (unsigned i = 0, e = Srcs.size(); i != e; ++i) {
-    unsigned SrcReg = Srcs[i];
-    if (!Seen.insert(SrcReg))
-      continue;
-
-    // Check that the instructions are all in the same basic block.
-    MachineInstr *SrcDefMI = MRI->getUniqueVRegDef(SrcReg);
-    MachineInstr *DstDefMI = MRI->getUniqueVRegDef(DstReg);
-    if (!SrcDefMI || !DstDefMI ||
-        SrcDefMI->getParent() != DstDefMI->getParent())
-      continue;
-
-    // If there are no other uses than copies which feed into
-    // the reg_sequence, then we might be able to coalesce them.
-    bool CanCoalesce = true;
-    SmallVector<unsigned, 4> SrcSubIndices, DstSubIndices;
-    for (MachineRegisterInfo::use_nodbg_iterator
-           UI = MRI->use_nodbg_begin(SrcReg),
-           UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
-      MachineInstr *UseMI = &*UI;
-      if (!UseMI->isCopy() || UseMI->getOperand(0).getReg() != DstReg) {
-        CanCoalesce = false;
-        break;
-      }
-      SrcSubIndices.push_back(UseMI->getOperand(1).getSubReg());
-      DstSubIndices.push_back(UseMI->getOperand(0).getSubReg());
-    }
-
-    if (!CanCoalesce || SrcSubIndices.size() < 2)
-      continue;
-
-    // Check that the source subregisters can be combined.
-    std::sort(SrcSubIndices.begin(), SrcSubIndices.end());
-    unsigned NewSrcSubIdx = 0;
-    if (!TRI->canCombineSubRegIndices(MRI->getRegClass(SrcReg), SrcSubIndices,
-                                      NewSrcSubIdx))
+  bool DefEmitted = false;
+  for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) {
+    MachineOperand &UseMO = MI->getOperand(i);
+    unsigned SrcReg = UseMO.getReg();
+    unsigned SubIdx = MI->getOperand(i+1).getImm();
+    // Nothing needs to be inserted for <undef> operands.
+    if (UseMO.isUndef())
       continue;
 
-    // Check that the destination subregisters can also be combined.
-    std::sort(DstSubIndices.begin(), DstSubIndices.end());
-    unsigned NewDstSubIdx = 0;
-    if (!TRI->canCombineSubRegIndices(MRI->getRegClass(DstReg), DstSubIndices,
-                                      NewDstSubIdx))
-      continue;
-
-    // If neither source nor destination can be combined to the full register,
-    // just give up.  This could be improved if it ever matters.
-    if (NewSrcSubIdx != 0 && NewDstSubIdx != 0)
-      continue;
-
-    // Now that we know that all the uses are extract_subregs and that those
-    // subregs can somehow be combined, scan all the extract_subregs again to
-    // make sure the subregs are in the right order and can be composed.
-    MachineInstr *SomeMI = 0;
-    CanCoalesce = true;
-    for (MachineRegisterInfo::use_nodbg_iterator
-           UI = MRI->use_nodbg_begin(SrcReg),
-           UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
-      MachineInstr *UseMI = &*UI;
-      assert(UseMI->isCopy());
-      unsigned DstSubIdx = UseMI->getOperand(0).getSubReg();
-      unsigned SrcSubIdx = UseMI->getOperand(1).getSubReg();
-      assert(DstSubIdx != 0 && "missing subreg from RegSequence elimination");
-      if ((NewDstSubIdx == 0 &&
-           TRI->composeSubRegIndices(NewSrcSubIdx, DstSubIdx) != SrcSubIdx) ||
-          (NewSrcSubIdx == 0 &&
-           TRI->composeSubRegIndices(NewDstSubIdx, SrcSubIdx) != DstSubIdx)) {
-        CanCoalesce = false;
-        break;
-      }
-      // Keep track of one of the uses.  Preferably the first one which has a
-      // <def,undef> flag.
-      if (!SomeMI || UseMI->getOperand(0).isUndef())
-        SomeMI = UseMI;
-    }
-    if (!CanCoalesce)
-      continue;
+    // Defer any kill flag to the last operand using SrcReg. Otherwise, we
+    // might insert a COPY that uses SrcReg after is was killed.
+    bool isKill = UseMO.isKill();
+    if (isKill)
+      for (unsigned j = i + 2; j < e; j += 2)
+        if (MI->getOperand(j).getReg() == SrcReg) {
+          MI->getOperand(j).setIsKill();
+          UseMO.setIsKill(false);
+          isKill = false;
+          break;
+        }
 
-    // Insert a copy to replace the original.
-    MachineInstr *CopyMI = BuildMI(*SomeMI->getParent(), SomeMI,
-                                   SomeMI->getDebugLoc(),
+    // Insert the sub-register copy.
+    MachineInstr *CopyMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
                                    TII->get(TargetOpcode::COPY))
-      .addReg(DstReg, RegState::Define |
-                      getUndefRegState(SomeMI->getOperand(0).isUndef()),
-              NewDstSubIdx)
-      .addReg(SrcReg, 0, NewSrcSubIdx);
-
-    // Remove all the old extract instructions.
-    for (MachineRegisterInfo::use_nodbg_iterator
-           UI = MRI->use_nodbg_begin(SrcReg),
-           UE = MRI->use_nodbg_end(); UI != UE; ) {
-      MachineInstr *UseMI = &*UI;
-      ++UI;
-      if (UseMI == CopyMI)
-        continue;
-      assert(UseMI->isCopy());
-      // Move any kills to the new copy or extract instruction.
-      if (UseMI->getOperand(1).isKill()) {
-        CopyMI->getOperand(1).setIsKill();
-        if (LV)
-          // Update live variables
-          LV->replaceKillInstruction(SrcReg, UseMI, &*CopyMI);
-      }
-      UseMI->eraseFromParent();
-    }
-  }
-}
-
-static bool HasOtherRegSequenceUses(unsigned Reg, MachineInstr *RegSeq,
-                                    MachineRegisterInfo *MRI) {
-  for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
-         UE = MRI->use_end(); UI != UE; ++UI) {
-    MachineInstr *UseMI = &*UI;
-    if (UseMI != RegSeq && UseMI->isRegSequence())
-      return true;
-  }
-  return false;
-}
-
-/// EliminateRegSequences - Eliminate REG_SEQUENCE instructions as part
-/// of the de-ssa process. This replaces sources of REG_SEQUENCE as
-/// sub-register references of the register defined by REG_SEQUENCE. e.g.
-///
-/// %reg1029<def>, %reg1030<def> = VLD1q16 %reg1024<kill>, ...
-/// %reg1031<def> = REG_SEQUENCE %reg1029<kill>, 5, %reg1030<kill>, 6
-/// =>
-/// %reg1031:5<def>, %reg1031:6<def> = VLD1q16 %reg1024<kill>, ...
-bool TwoAddressInstructionPass::EliminateRegSequences() {
-  if (RegSequences.empty())
-    return false;
-
-  for (unsigned i = 0, e = RegSequences.size(); i != e; ++i) {
-    MachineInstr *MI = RegSequences[i];
-    unsigned DstReg = MI->getOperand(0).getReg();
-    if (MI->getOperand(0).getSubReg() ||
-        TargetRegisterInfo::isPhysicalRegister(DstReg) ||
-        !(MI->getNumOperands() & 1)) {
-      DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI);
-      llvm_unreachable(0);
+      .addReg(DstReg, RegState::Define, SubIdx)
+      .addOperand(UseMO);
+
+    // The first def needs an <undef> flag because there is no live register
+    // before it.
+    if (!DefEmitted) {
+      CopyMI->getOperand(0).setIsUndef(true);
+      // Return an iterator pointing to the first inserted instr.
+      MBBI = CopyMI;
     }
+    DefEmitted = true;
 
-    bool IsImpDef = true;
-    SmallVector<unsigned, 4> RealSrcs;
-    SmallSet<unsigned, 4> Seen;
-    for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) {
-      // Nothing needs to be inserted for <undef> operands.
-      if (MI->getOperand(i).isUndef()) {
-        MI->getOperand(i).setReg(0);
-        continue;
-      }
-      unsigned SrcReg = MI->getOperand(i).getReg();
-      unsigned SrcSubIdx = MI->getOperand(i).getSubReg();
-      unsigned SubIdx = MI->getOperand(i+1).getImm();
-      // DefMI of NULL means the value does not have a vreg in this block
-      // i.e., its a physical register or a subreg.
-      // In either case we force a copy to be generated.
-      MachineInstr *DefMI = NULL;
-      if (!MI->getOperand(i).getSubReg() &&
-          !TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
-        DefMI = MRI->getUniqueVRegDef(SrcReg);
-      }
+    // Update LiveVariables' kill info.
+    if (LV && isKill && !TargetRegisterInfo::isPhysicalRegister(SrcReg))
+      LV->replaceKillInstruction(SrcReg, MI, CopyMI);
 
-      if (DefMI && DefMI->isImplicitDef()) {
-        DefMI->eraseFromParent();
-        continue;
-      }
-      IsImpDef = false;
-
-      // Remember COPY sources. These might be candidate for coalescing.
-      if (DefMI && DefMI->isCopy() && DefMI->getOperand(1).getSubReg())
-        RealSrcs.push_back(DefMI->getOperand(1).getReg());
-
-      bool isKill = MI->getOperand(i).isKill();
-      if (!DefMI || !Seen.insert(SrcReg) ||
-          MI->getParent() != DefMI->getParent() ||
-          !isKill || HasOtherRegSequenceUses(SrcReg, MI, MRI) ||
-          !TRI->getMatchingSuperRegClass(MRI->getRegClass(DstReg),
-                                         MRI->getRegClass(SrcReg), SubIdx)) {
-        // REG_SEQUENCE cannot have duplicated operands, add a copy.
-        // Also add an copy if the source is live-in the block. We don't want
-        // to end up with a partial-redef of a livein, e.g.
-        // BB0:
-        // reg1051:10<def> =
-        // ...
-        // BB1:
-        // ... = reg1051:10
-        // BB2:
-        // reg1051:9<def> =
-        // LiveIntervalAnalysis won't like it.
-        //
-        // If the REG_SEQUENCE doesn't kill its source, keeping live variables
-        // correctly up to date becomes very difficult. Insert a copy.
-
-        // Defer any kill flag to the last operand using SrcReg. Otherwise, we
-        // might insert a COPY that uses SrcReg after is was killed.
-        if (isKill)
-          for (unsigned j = i + 2; j < e; j += 2)
-            if (MI->getOperand(j).getReg() == SrcReg) {
-              MI->getOperand(j).setIsKill();
-              isKill = false;
-              break;
-            }
-
-        MachineBasicBlock::iterator InsertLoc = MI;
-        MachineInstr *CopyMI = BuildMI(*MI->getParent(), InsertLoc,
-                                MI->getDebugLoc(), TII->get(TargetOpcode::COPY))
-            .addReg(DstReg, RegState::Define, SubIdx)
-            .addReg(SrcReg, getKillRegState(isKill), SrcSubIdx);
-        MI->getOperand(i).setReg(0);
-        if (LV && isKill && !TargetRegisterInfo::isPhysicalRegister(SrcReg))
-          LV->replaceKillInstruction(SrcReg, MI, CopyMI);
-        DEBUG(dbgs() << "Inserted: " << *CopyMI);
-      }
-    }
-
-    for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) {
-      unsigned SrcReg = MI->getOperand(i).getReg();
-      if (!SrcReg) continue;
-      unsigned SubIdx = MI->getOperand(i+1).getImm();
-      UpdateRegSequenceSrcs(SrcReg, DstReg, SubIdx, MRI, *TRI);
-    }
-
-    // Set <def,undef> flags on the first DstReg def in the basic block.
-    // It marks the beginning of the live range. All the other defs are
-    // read-modify-write.
-    if (MachineInstr *Def = findFirstDef(DstReg, MRI)) {
-      for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
-        MachineOperand &MO = Def->getOperand(i);
-        if (MO.isReg() && MO.isDef() && MO.getReg() == DstReg)
-          MO.setIsUndef();
-      }
-      // Make sure there is a full non-subreg imp-def operand on the
-      // instruction.  This shouldn't be necessary, but it seems that at least
-      // RAFast requires it.
-      Def->addRegisterDefined(DstReg, TRI);
-      DEBUG(dbgs() << "First def: " << *Def);
-    }
-
-    if (IsImpDef) {
-      DEBUG(dbgs() << "Turned: " << *MI << " into an IMPLICIT_DEF");
-      MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
-      for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j)
-        MI->RemoveOperand(j);
-    } else {
-      DEBUG(dbgs() << "Eliminated: " << *MI);
-      MI->eraseFromParent();
-    }
-
-    // Try coalescing some EXTRACT_SUBREG instructions. This can create
-    // INSERT_SUBREG instructions that must have <undef> flags added by
-    // LiveIntervalAnalysis, so only run it when LiveVariables is available.
-    if (LV)
-      CoalesceExtSubRegs(RealSrcs, DstReg);
+    DEBUG(dbgs() << "Inserted: " << *CopyMI);
   }
 
-  RegSequences.clear();
-  return true;
+  if (!DefEmitted) {
+    DEBUG(dbgs() << "Turned: " << *MI << " into an IMPLICIT_DEF");
+    MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
+    for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j)
+      MI->RemoveOperand(j);
+  } else {
+    DEBUG(dbgs() << "Eliminated: " << *MI);
+    MI->eraseFromParent();
+  }
 }
diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp
index 52693f0..a95ebcd 100644
--- a/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/lib/CodeGen/UnreachableBlockElim.cpp
@@ -21,22 +21,22 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Constant.h"
-#include "llvm/Instructions.h"
-#include "llvm/Function.h"
-#include "llvm/Pass.h"
-#include "llvm/Type.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/ProfileInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SmallPtrSet.h"
 using namespace llvm;
 
 namespace {
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index bd10a4b..cd012d2 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -17,23 +17,24 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "regalloc"
-#include "VirtRegMap.h"
+#include "llvm/CodeGen/VirtRegMap.h"
 #include "LiveDebugVariables.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -76,15 +77,22 @@ unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) {
   return SS;
 }
 
-unsigned VirtRegMap::getRegAllocPref(unsigned virtReg) {
-  std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(virtReg);
-  unsigned physReg = Hint.second;
-  if (TargetRegisterInfo::isVirtualRegister(physReg) && hasPhys(physReg))
-    physReg = getPhys(physReg);
-  if (Hint.first == 0)
-    return (TargetRegisterInfo::isPhysicalRegister(physReg))
-      ? physReg : 0;
-  return TRI->ResolveRegAllocHint(Hint.first, physReg, *MF);
+bool VirtRegMap::hasPreferredPhys(unsigned VirtReg) {
+  unsigned Hint = MRI->getSimpleHint(VirtReg);
+  if (!Hint)
+    return 0;
+  if (TargetRegisterInfo::isVirtualRegister(Hint))
+    Hint = getPhys(Hint);
+  return getPhys(VirtReg) == Hint;
+}
+
+bool VirtRegMap::hasKnownPreference(unsigned VirtReg) {
+  std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(VirtReg);
+  if (TargetRegisterInfo::isPhysicalRegister(Hint.second))
+    return true;
+  if (TargetRegisterInfo::isVirtualRegister(Hint.second))
+    return hasPhys(Hint.second);
+  return false;
 }
 
 int VirtRegMap::assignVirt2StackSlot(unsigned virtReg) {
@@ -126,7 +134,7 @@ void VirtRegMap::print(raw_ostream &OS, const Module*) const {
   OS << '\n';
 }
 
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void VirtRegMap::dump() const {
   print(dbgs());
 }
@@ -171,6 +179,7 @@ INITIALIZE_PASS_BEGIN(VirtRegRewriter, "virtregrewriter",
 INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
 INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
+INITIALIZE_PASS_DEPENDENCY(LiveStacks)
 INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
 INITIALIZE_PASS_END(VirtRegRewriter, "virtregrewriter",
                     "Virtual Register Rewriter", false, false)
@@ -183,6 +192,8 @@ void VirtRegRewriter::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<SlotIndexes>();
   AU.addPreserved<SlotIndexes>();
   AU.addRequired<LiveDebugVariables>();
+  AU.addRequired<LiveStacks>();
+  AU.addPreserved<LiveStacks>();
   AU.addRequired<VirtRegMap>();
   MachineFunctionPass::getAnalysisUsage(AU);
 }
@@ -253,9 +264,6 @@ void VirtRegRewriter::rewrite() {
   SmallVector<unsigned, 8> SuperDeads;
   SmallVector<unsigned, 8> SuperDefs;
   SmallVector<unsigned, 8> SuperKills;
-#ifndef NDEBUG
-  BitVector Reserved = TRI->getReservedRegs(*MF);
-#endif
 
   for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
        MBBI != MBBE; ++MBBI) {
@@ -279,7 +287,7 @@ void VirtRegRewriter::rewrite() {
         unsigned PhysReg = VRM->getPhys(VirtReg);
         assert(PhysReg != VirtRegMap::NO_PHYS_REG &&
                "Instruction uses unmapped VirtReg");
-        assert(!Reserved.test(PhysReg) && "Reserved register assignment");
+        assert(!MRI->isReserved(PhysReg) && "Reserved register assignment");
 
         // Preserve semantics of sub-register operands.
         if (MO.getSubReg()) {
diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h
deleted file mode 100644
index c320985..0000000
--- a/lib/CodeGen/VirtRegMap.h
+++ /dev/null
@@ -1,190 +0,0 @@
-//===-- llvm/CodeGen/VirtRegMap.h - Virtual Register Map -*- C++ -*--------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a virtual register map. This maps virtual registers to
-// physical registers and virtual registers to stack slots. It is created and
-// updated by a register allocator and then used by a machine code rewriter that
-// adds spill code and rewrites virtual into physical register references.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_VIRTREGMAP_H
-#define LLVM_CODEGEN_VIRTREGMAP_H
-
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/IndexedMap.h"
-
-namespace llvm {
-  class MachineInstr;
-  class MachineFunction;
-  class MachineRegisterInfo;
-  class TargetInstrInfo;
-  class raw_ostream;
-  class SlotIndexes;
-
-  class VirtRegMap : public MachineFunctionPass {
-  public:
-    enum {
-      NO_PHYS_REG = 0,
-      NO_STACK_SLOT = (1L << 30)-1,
-      MAX_STACK_SLOT = (1L << 18)-1
-    };
-
-  private:
-    MachineRegisterInfo *MRI;
-    const TargetInstrInfo *TII;
-    const TargetRegisterInfo *TRI;
-    MachineFunction *MF;
-
-    /// Virt2PhysMap - This is a virtual to physical register
-    /// mapping. Each virtual register is required to have an entry in
-    /// it; even spilled virtual registers (the register mapped to a
-    /// spilled register is the temporary used to load it from the
-    /// stack).
-    IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2PhysMap;
-
-    /// Virt2StackSlotMap - This is virtual register to stack slot
-    /// mapping. Each spilled virtual register has an entry in it
-    /// which corresponds to the stack slot this register is spilled
-    /// at.
-    IndexedMap<int, VirtReg2IndexFunctor> Virt2StackSlotMap;
-
-    /// Virt2SplitMap - This is virtual register to splitted virtual register
-    /// mapping.
-    IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2SplitMap;
-
-    /// createSpillSlot - Allocate a spill slot for RC from MFI.
-    unsigned createSpillSlot(const TargetRegisterClass *RC);
-
-    VirtRegMap(const VirtRegMap&);     // DO NOT IMPLEMENT
-    void operator=(const VirtRegMap&); // DO NOT IMPLEMENT
-
-  public:
-    static char ID;
-    VirtRegMap() : MachineFunctionPass(ID), Virt2PhysMap(NO_PHYS_REG),
-                   Virt2StackSlotMap(NO_STACK_SLOT), Virt2SplitMap(0) { }
-    virtual bool runOnMachineFunction(MachineFunction &MF);
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.setPreservesAll();
-      MachineFunctionPass::getAnalysisUsage(AU);
-    }
-
-    MachineFunction &getMachineFunction() const {
-      assert(MF && "getMachineFunction called before runOnMachineFunction");
-      return *MF;
-    }
-
-    MachineRegisterInfo &getRegInfo() const { return *MRI; }
-    const TargetRegisterInfo &getTargetRegInfo() const { return *TRI; }
-
-    void grow();
-
-    /// @brief returns true if the specified virtual register is
-    /// mapped to a physical register
-    bool hasPhys(unsigned virtReg) const {
-      return getPhys(virtReg) != NO_PHYS_REG;
-    }
-
-    /// @brief returns the physical register mapped to the specified
-    /// virtual register
-    unsigned getPhys(unsigned virtReg) const {
-      assert(TargetRegisterInfo::isVirtualRegister(virtReg));
-      return Virt2PhysMap[virtReg];
-    }
-
-    /// @brief creates a mapping for the specified virtual register to
-    /// the specified physical register
-    void assignVirt2Phys(unsigned virtReg, unsigned physReg) {
-      assert(TargetRegisterInfo::isVirtualRegister(virtReg) &&
-             TargetRegisterInfo::isPhysicalRegister(physReg));
-      assert(Virt2PhysMap[virtReg] == NO_PHYS_REG &&
-             "attempt to assign physical register to already mapped "
-             "virtual register");
-      Virt2PhysMap[virtReg] = physReg;
-    }
-
-    /// @brief clears the specified virtual register's, physical
-    /// register mapping
-    void clearVirt(unsigned virtReg) {
-      assert(TargetRegisterInfo::isVirtualRegister(virtReg));
-      assert(Virt2PhysMap[virtReg] != NO_PHYS_REG &&
-             "attempt to clear a not assigned virtual register");
-      Virt2PhysMap[virtReg] = NO_PHYS_REG;
-    }
-
-    /// @brief clears all virtual to physical register mappings
-    void clearAllVirt() {
-      Virt2PhysMap.clear();
-      grow();
-    }
-
-    /// @brief returns the register allocation preference.
-    unsigned getRegAllocPref(unsigned virtReg);
-
-    /// @brief returns true if VirtReg is assigned to its preferred physreg.
-    bool hasPreferredPhys(unsigned VirtReg) {
-      return getPhys(VirtReg) == getRegAllocPref(VirtReg);
-    }
-
-    /// @brief records virtReg is a split live interval from SReg.
-    void setIsSplitFromReg(unsigned virtReg, unsigned SReg) {
-      Virt2SplitMap[virtReg] = SReg;
-    }
-
-    /// @brief returns the live interval virtReg is split from.
-    unsigned getPreSplitReg(unsigned virtReg) const {
-      return Virt2SplitMap[virtReg];
-    }
-
-    /// getOriginal - Return the original virtual register that VirtReg descends
-    /// from through splitting.
-    /// A register that was not created by splitting is its own original.
-    /// This operation is idempotent.
-    unsigned getOriginal(unsigned VirtReg) const {
-      unsigned Orig = getPreSplitReg(VirtReg);
-      return Orig ? Orig : VirtReg;
-    }
-
-    /// @brief returns true if the specified virtual register is not
-    /// mapped to a stack slot or rematerialized.
-    bool isAssignedReg(unsigned virtReg) const {
-      if (getStackSlot(virtReg) == NO_STACK_SLOT)
-        return true;
-      // Split register can be assigned a physical register as well as a
-      // stack slot or remat id.
-      return (Virt2SplitMap[virtReg] && Virt2PhysMap[virtReg] != NO_PHYS_REG);
-    }
-
-    /// @brief returns the stack slot mapped to the specified virtual
-    /// register
-    int getStackSlot(unsigned virtReg) const {
-      assert(TargetRegisterInfo::isVirtualRegister(virtReg));
-      return Virt2StackSlotMap[virtReg];
-    }
-
-    /// @brief create a mapping for the specifed virtual register to
-    /// the next available stack slot
-    int assignVirt2StackSlot(unsigned virtReg);
-    /// @brief create a mapping for the specified virtual register to
-    /// the specified stack slot
-    void assignVirt2StackSlot(unsigned virtReg, int frameIndex);
-
-    void print(raw_ostream &OS, const Module* M = 0) const;
-    void dump() const;
-  };
-
-  inline raw_ostream &operator<<(raw_ostream &OS, const VirtRegMap &VRM) {
-    VRM.print(OS);
-    return OS;
-  }
-} // End llvm namespace
-
-#endif
author	Stephen Hines <srhines@google.com>	2013-01-21 13:15:17 -0800
committer	Stephen Hines <srhines@google.com>	2013-01-21 13:15:17 -0800
commit	059800f9e3fee2852672f846d91a2da14da7783a (patch)
tree	a6ef16b7263252ae1b8069295ea9cbbae0d9467d /lib/CodeGen
parent	cbefa15de4821975bb99fc6d74b3bdb42b2df45c (diff)
parent	b6714227eda5d499f7667fc865f931126a8dc488 (diff)
download	external_llvm-059800f9e3fee2852672f846d91a2da14da7783a.zip external_llvm-059800f9e3fee2852672f846d91a2da14da7783a.tar.gz external_llvm-059800f9e3fee2852672f846d91a2da14da7783a.tar.bz2