Merge commit '10251753b6897adcd22cc981c0cc42f348c109de' into merge-20130807

Conflicts: lib/Archive/ArchiveReader.cpp lib/Support/Unix/PathV2.inc Change-Id: I29d8c1e321a4a380b6013f00bac6a8e4b593cc4e
author: Stephen Hines <srhines@google.com> 2013-08-07 15:07:10 -0700
committer: Stephen Hines <srhines@google.com> 2013-08-07 15:07:10 -0700
commit: fab2daa4a1127ecb217abe2b07c1769122b6fee1 (patch)
tree: 268ebfd1963fd98ba412e76819afdf95a7d4267b /lib/CodeGen
parent: 8197ac1c1a0a91baa70c4dea8cb488f254ef974c (diff)
parent: 10251753b6897adcd22cc981c0cc42f348c109de (diff)
download: external_llvm-fab2daa4a1127ecb217abe2b07c1769122b6fee1.zip
external_llvm-fab2daa4a1127ecb217abe2b07c1769122b6fee1.tar.gz
external_llvm-fab2daa4a1127ecb217abe2b07c1769122b6fee1.tar.bz2
95 files changed, 3767 insertions, 2544 deletions
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index 4731af5..ca08b5b 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -202,161 +202,271 @@ ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) {
 }
 
 static bool isNoopBitcast(Type *T1, Type *T2,
-                          const TargetLowering& TLI) {
+                          const TargetLoweringBase& TLI) {
   return T1 == T2 || (T1->isPointerTy() && T2->isPointerTy()) ||
          (isa<VectorType>(T1) && isa<VectorType>(T2) &&
           TLI.isTypeLegal(EVT::getEVT(T1)) && TLI.isTypeLegal(EVT::getEVT(T2)));
 }
 
-/// sameNoopInput - Return true if V1 == V2, else if either V1 or V2 is a noop
-/// (i.e., lowers to no machine code), look through it (and any transitive noop
-/// operands to it) and check if it has the same noop input value.  This is
-/// used to determine if a tail call can be formed.
-static bool sameNoopInput(const Value *V1, const Value *V2,
-                          SmallVectorImpl<unsigned> &Els1,
-                          SmallVectorImpl<unsigned> &Els2,
-                          const TargetLowering &TLI) {
-  using std::swap;
-  bool swapParity = false;
-  bool equalEls = Els1 == Els2;
+/// Look through operations that will be free to find the earliest source of
+/// this value.
+///
+/// @param ValLoc If V has aggegate type, we will be interested in a particular
+/// scalar component. This records its address; the reverse of this list gives a
+/// sequence of indices appropriate for an extractvalue to locate the important
+/// value. This value is updated during the function and on exit will indicate
+/// similar information for the Value returned.
+///
+/// @param DataBits If this function looks through truncate instructions, this
+/// will record the smallest size attained.
+static const Value *getNoopInput(const Value *V,
+                                 SmallVectorImpl<unsigned> &ValLoc,
+                                 unsigned &DataBits,
+                                 const TargetLoweringBase &TLI) {
   while (true) {
-    if ((equalEls && V1 == V2) || isa<UndefValue>(V1) || isa<UndefValue>(V2)) {
-      if (swapParity)
-        // Revert to original Els1 and Els2 to avoid confusing recursive calls
-        swap(Els1, Els2);
-      return true;
-    }
-
     // Try to look through V1; if V1 is not an instruction, it can't be looked
     // through.
-    const Instruction *I = dyn_cast<Instruction>(V1);
+    const Instruction *I = dyn_cast<Instruction>(V);
+    if (!I || I->getNumOperands() == 0) return V;
     const Value *NoopInput = 0;
-    if (I != 0 && I->getNumOperands() > 0) {
-     Value *Op = I->getOperand(0);
-      if (isa<TruncInst>(I)) {
-        // Look through truly no-op truncates.
-        if (TLI.isTruncateFree(Op->getType(), I->getType()))
-          NoopInput = Op;
-      } else if (isa<BitCastInst>(I)) {
-        // Look through truly no-op bitcasts.
-        if (isNoopBitcast(Op->getType(), I->getType(), TLI))
-          NoopInput = Op;
-      } else if (isa<GetElementPtrInst>(I)) {
-        // Look through getelementptr
-        if (cast<GetElementPtrInst>(I)->hasAllZeroIndices())
-          NoopInput = Op;
-      } else if (isa<IntToPtrInst>(I)) {
-        // Look through inttoptr.
-        // Make sure this isn't a truncating or extending cast.  We could
-        // support this eventually, but don't bother for now.
-        if (!isa<VectorType>(I->getType()) &&
-            TLI.getPointerTy().getSizeInBits() == 
-              cast<IntegerType>(Op->getType())->getBitWidth())
-          NoopInput = Op;
-      } else if (isa<PtrToIntInst>(I)) {
-        // Look through ptrtoint.
-        // Make sure this isn't a truncating or extending cast.  We could
-        // support this eventually, but don't bother for now.
-        if (!isa<VectorType>(I->getType()) &&
-            TLI.getPointerTy().getSizeInBits() == 
-              cast<IntegerType>(I->getType())->getBitWidth())
-          NoopInput = Op;
-      } else if (isa<CallInst>(I)) {
-        // Look through call
-        for (User::const_op_iterator i = I->op_begin(),
-                                     // Skip Callee
-                                     e = I->op_end() - 1;
-             i != e; ++i) {
-          unsigned attrInd = i - I->op_begin() + 1;
-          if (cast<CallInst>(I)->paramHasAttr(attrInd, Attribute::Returned) &&
-              isNoopBitcast((*i)->getType(), I->getType(), TLI)) {
-            NoopInput = *i;
-            break;
-          }
+
+    Value *Op = I->getOperand(0);
+    if (isa<BitCastInst>(I)) {
+      // Look through truly no-op bitcasts.
+      if (isNoopBitcast(Op->getType(), I->getType(), TLI))
+        NoopInput = Op;
+    } else if (isa<GetElementPtrInst>(I)) {
+      // Look through getelementptr
+      if (cast<GetElementPtrInst>(I)->hasAllZeroIndices())
+        NoopInput = Op;
+    } else if (isa<IntToPtrInst>(I)) {
+      // Look through inttoptr.
+      // Make sure this isn't a truncating or extending cast.  We could
+      // support this eventually, but don't bother for now.
+      if (!isa<VectorType>(I->getType()) &&
+          TLI.getPointerTy().getSizeInBits() ==
+          cast<IntegerType>(Op->getType())->getBitWidth())
+        NoopInput = Op;
+    } else if (isa<PtrToIntInst>(I)) {
+      // Look through ptrtoint.
+      // Make sure this isn't a truncating or extending cast.  We could
+      // support this eventually, but don't bother for now.
+      if (!isa<VectorType>(I->getType()) &&
+          TLI.getPointerTy().getSizeInBits() ==
+          cast<IntegerType>(I->getType())->getBitWidth())
+        NoopInput = Op;
+    } else if (isa<TruncInst>(I) &&
+               TLI.allowTruncateForTailCall(Op->getType(), I->getType())) {
+      DataBits = std::min(DataBits, I->getType()->getPrimitiveSizeInBits());
+      NoopInput = Op;
+    } else if (isa<CallInst>(I)) {
+      // Look through call (skipping callee)
+      for (User::const_op_iterator i = I->op_begin(), e = I->op_end() - 1;
+           i != e; ++i) {
+        unsigned attrInd = i - I->op_begin() + 1;
+        if (cast<CallInst>(I)->paramHasAttr(attrInd, Attribute::Returned) &&
+            isNoopBitcast((*i)->getType(), I->getType(), TLI)) {
+          NoopInput = *i;
+          break;
         }
-      } else if (isa<InvokeInst>(I)) {
-        // Look through invoke
-        for (User::const_op_iterator i = I->op_begin(),
-                                     // Skip BB, BB, Callee
-                                     e = I->op_end() - 3;
-             i != e; ++i) {
-          unsigned attrInd = i - I->op_begin() + 1;
-          if (cast<InvokeInst>(I)->paramHasAttr(attrInd, Attribute::Returned) &&
-              isNoopBitcast((*i)->getType(), I->getType(), TLI)) {
-            NoopInput = *i;
-            break;
-          }
+      }
+    } else if (isa<InvokeInst>(I)) {
+      // Look through invoke (skipping BB, BB, Callee)
+      for (User::const_op_iterator i = I->op_begin(), e = I->op_end() - 3;
+           i != e; ++i) {
+        unsigned attrInd = i - I->op_begin() + 1;
+        if (cast<InvokeInst>(I)->paramHasAttr(attrInd, Attribute::Returned) &&
+            isNoopBitcast((*i)->getType(), I->getType(), TLI)) {
+          NoopInput = *i;
+          break;
         }
       }
+    } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(V)) {
+      // Value may come from either the aggregate or the scalar
+      ArrayRef<unsigned> InsertLoc = IVI->getIndices();
+      if (std::equal(InsertLoc.rbegin(), InsertLoc.rend(),
+                     ValLoc.rbegin())) {
+        // The type being inserted is a nested sub-type of the aggregate; we
+        // have to remove those initial indices to get the location we're
+        // interested in for the operand.
+        ValLoc.resize(ValLoc.size() - InsertLoc.size());
+        NoopInput = IVI->getInsertedValueOperand();
+      } else {
+        // The struct we're inserting into has the value we're interested in, no
+        // change of address.
+        NoopInput = Op;
+      }
+    } else if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(V)) {
+      // The part we're interested in will inevitably be some sub-section of the
+      // previous aggregate. Combine the two paths to obtain the true address of
+      // our element.
+      ArrayRef<unsigned> ExtractLoc = EVI->getIndices();
+      std::copy(ExtractLoc.rbegin(), ExtractLoc.rend(),
+                std::back_inserter(ValLoc));
+      NoopInput = Op;
     }
+    // Terminate if we couldn't find anything to look through.
+    if (!NoopInput)
+      return V;
 
-    if (NoopInput) {
-      V1 = NoopInput;
-      continue;
-    }
+    V = NoopInput;
+  }
+}
+
+/// Return true if this scalar return value only has bits discarded on its path
+/// from the "tail call" to the "ret". This includes the obvious noop
+/// instructions handled by getNoopInput above as well as free truncations (or
+/// extensions prior to the call).
+static bool slotOnlyDiscardsData(const Value *RetVal, const Value *CallVal,
+                                 SmallVectorImpl<unsigned> &RetIndices,
+                                 SmallVectorImpl<unsigned> &CallIndices,
+                                 const TargetLoweringBase &TLI) {
 
-    // If we already swapped, avoid infinite loop
-    if (swapParity)
-      break;
+  // Trace the sub-value needed by the return value as far back up the graph as
+  // possible, in the hope that it will intersect with the value produced by the
+  // call. In the simple case with no "returned" attribute, the hope is actually
+  // that we end up back at the tail call instruction itself.
+  unsigned BitsRequired = UINT_MAX;
+  RetVal = getNoopInput(RetVal, RetIndices, BitsRequired, TLI);
+
+  // If this slot in the value returned is undef, it doesn't matter what the
+  // call puts there, it'll be fine.
+  if (isa<UndefValue>(RetVal))
+    return true;
+
+  // Now do a similar search up through the graph to find where the value
+  // actually returned by the "tail call" comes from. In the simple case without
+  // a "returned" attribute, the search will be blocked immediately and the loop
+  // a Noop.
+  unsigned BitsProvided = UINT_MAX;
+  CallVal = getNoopInput(CallVal, CallIndices, BitsProvided, TLI);
+
+  // There's no hope if we can't actually trace them to (the same part of!) the
+  // same value.
+  if (CallVal != RetVal || CallIndices != RetIndices)
+    return false;
 
-    // Otherwise, swap V1<->V2, Els1<->Els2
-    swap(V1, V2);
-    swap(Els1, Els2);
-    swapParity = !swapParity;
+  // However, intervening truncates may have made the call non-tail. Make sure
+  // all the bits that are needed by the "ret" have been provided by the "tail
+  // call". FIXME: with sufficiently cunning bit-tracking, we could look through
+  // extensions too.
+  if (BitsProvided < BitsRequired)
+    return false;
+
+  return true;
+}
+
+/// For an aggregate type, determine whether a given index is within bounds or
+/// not.
+static bool indexReallyValid(CompositeType *T, unsigned Idx) {
+  if (ArrayType *AT = dyn_cast<ArrayType>(T))
+    return Idx < AT->getNumElements();
+
+  return Idx < cast<StructType>(T)->getNumElements();
+}
+
+/// Move the given iterators to the next leaf type in depth first traversal.
+///
+/// Performs a depth-first traversal of the type as specified by its arguments,
+/// stopping at the next leaf node (which may be a legitimate scalar type or an
+/// empty struct or array).
+///
+/// @param SubTypes List of the partial components making up the type from
+/// outermost to innermost non-empty aggregate. The element currently
+/// represented is SubTypes.back()->getTypeAtIndex(Path.back() - 1).
+///
+/// @param Path Set of extractvalue indices leading from the outermost type
+/// (SubTypes[0]) to the leaf node currently represented.
+///
+/// @returns true if a new type was found, false otherwise. Calling this
+/// function again on a finished iterator will repeatedly return
+/// false. SubTypes.back()->getTypeAtIndex(Path.back()) is either an empty
+/// aggregate or a non-aggregate
+static bool
+advanceToNextLeafType(SmallVectorImpl<CompositeType *> &SubTypes,
+                     SmallVectorImpl<unsigned> &Path) {
+  // First march back up the tree until we can successfully increment one of the
+  // coordinates in Path.
+  while (!Path.empty() && !indexReallyValid(SubTypes.back(), Path.back() + 1)) {
+    Path.pop_back();
+    SubTypes.pop_back();
   }
 
-  for (unsigned n = 0; n < 2; ++n) {
-    if (isa<InsertValueInst>(V1)) {
-      if (isa<StructType>(V1->getType())) {
-        // Look through insertvalue
-        unsigned i, e;
-        for (i = 0, e = cast<StructType>(V1->getType())->getNumElements();
-             i != e; ++i) {
-          const Value *InScalar = FindInsertedValue(const_cast<Value*>(V1), i);
-          if (InScalar == 0)
-            break;
-          Els1.push_back(i);
-          if (!sameNoopInput(InScalar, V2, Els1, Els2, TLI)) {
-            Els1.pop_back();
-            break;
-          }
-          Els1.pop_back();
-        }
-        if (i == e) {
-          if (swapParity)
-            swap(Els1, Els2);
-          return true;
-        }
-      }
-    } else if (!Els1.empty() && isa<ExtractValueInst>(V1)) {
-      const ExtractValueInst *EVI = cast<ExtractValueInst>(V1);
-      unsigned i = Els1.back();
-      // If the scalar value being inserted is an extractvalue of the right
-      // index from the call, then everything is good.
-      if (isa<StructType>(EVI->getOperand(0)->getType()) &&
-          EVI->getNumIndices() == 1 && EVI->getIndices()[0] == i) {
-        // Look through extractvalue
-        Els1.pop_back();
-        if (sameNoopInput(EVI->getOperand(0), V2, Els1, Els2, TLI)) {
-          Els1.push_back(i);
-          if (swapParity)
-            swap(Els1, Els2);
-          return true;
-        }
-        Els1.push_back(i);
-      }
-    }
+  // If we reached the top, then the iterator is done.
+  if (Path.empty())
+    return false;
+
+  // We know there's *some* valid leaf now, so march back down the tree picking
+  // out the left-most element at each node.
+  ++Path.back();
+  Type *DeeperType = SubTypes.back()->getTypeAtIndex(Path.back());
+  while (DeeperType->isAggregateType()) {
+    CompositeType *CT = cast<CompositeType>(DeeperType);
+    if (!indexReallyValid(CT, 0))
+      return true;
+
+    SubTypes.push_back(CT);
+    Path.push_back(0);
 
-    swap(V1, V2);
-    swap(Els1, Els2);
-    swapParity = !swapParity;
+    DeeperType = CT->getTypeAtIndex(0U);
   }
 
-  if (swapParity)
-    swap(Els1, Els2);
-  return false;
+  return true;
+}
+
+/// Find the first non-empty, scalar-like type in Next and setup the iterator
+/// components.
+///
+/// Assuming Next is an aggregate of some kind, this function will traverse the
+/// tree from left to right (i.e. depth-first) looking for the first
+/// non-aggregate type which will play a role in function return.
+///
+/// For example, if Next was {[0 x i64], {{}, i32, {}}, i32} then we would setup
+/// Path as [1, 1] and SubTypes as [Next, {{}, i32, {}}] to represent the first
+/// i32 in that type.
+static bool firstRealType(Type *Next,
+                          SmallVectorImpl<CompositeType *> &SubTypes,
+                          SmallVectorImpl<unsigned> &Path) {
+  // First initialise the iterator components to the first "leaf" node
+  // (i.e. node with no valid sub-type at any index, so {} does count as a leaf
+  // despite nominally being an aggregate).
+  while (Next->isAggregateType() &&
+         indexReallyValid(cast<CompositeType>(Next), 0)) {
+    SubTypes.push_back(cast<CompositeType>(Next));
+    Path.push_back(0);
+    Next = cast<CompositeType>(Next)->getTypeAtIndex(0U);
+  }
+
+  // If there's no Path now, Next was originally scalar already (or empty
+  // leaf). We're done.
+  if (Path.empty())
+    return true;
+
+  // Otherwise, use normal iteration to keep looking through the tree until we
+  // find a non-aggregate type.
+  while (SubTypes.back()->getTypeAtIndex(Path.back())->isAggregateType()) {
+    if (!advanceToNextLeafType(SubTypes, Path))
+      return false;
+  }
+
+  return true;
+}
+
+/// Set the iterator data-structures to the next non-empty, non-aggregate
+/// subtype.
+bool nextRealType(SmallVectorImpl<CompositeType *> &SubTypes,
+                  SmallVectorImpl<unsigned> &Path) {
+  do {
+    if (!advanceToNextLeafType(SubTypes, Path))
+      return false;
+
+    assert(!Path.empty() && "found a leaf but didn't set the path?");
+  } while (SubTypes.back()->getTypeAtIndex(Path.back())->isAggregateType());
+
+  return true;
 }
 
+
 /// Test if the given instruction is in a position to be optimized
 /// with a tail-call. This roughly means that it's in a block with
 /// a return and there's nothing that needs to be scheduled
@@ -422,7 +532,50 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS,
       CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
     return false;
 
-  // Otherwise, make sure the return value and I have the same value
-  SmallVector<unsigned, 4> Els1, Els2;
-  return sameNoopInput(Ret->getOperand(0), I, Els1, Els2, TLI);
+  const Value *RetVal = Ret->getOperand(0), *CallVal = I;
+  SmallVector<unsigned, 4> RetPath, CallPath;
+  SmallVector<CompositeType *, 4> RetSubTypes, CallSubTypes;
+
+  bool RetEmpty = !firstRealType(RetVal->getType(), RetSubTypes, RetPath);
+  bool CallEmpty = !firstRealType(CallVal->getType(), CallSubTypes, CallPath);
+
+  // Nothing's actually returned, it doesn't matter what the callee put there
+  // it's a valid tail call.
+  if (RetEmpty)
+    return true;
+
+  // Iterate pairwise through each of the value types making up the tail call
+  // and the corresponding return. For each one we want to know whether it's
+  // essentially going directly from the tail call to the ret, via operations
+  // that end up not generating any code.
+  //
+  // We allow a certain amount of covariance here. For example it's permitted
+  // for the tail call to define more bits than the ret actually cares about
+  // (e.g. via a truncate).
+  do {
+    if (CallEmpty) {
+      // We've exhausted the values produced by the tail call instruction, the
+      // rest are essentially undef. The type doesn't really matter, but we need
+      // *something*.
+      Type *SlotType = RetSubTypes.back()->getTypeAtIndex(RetPath.back());
+      CallVal = UndefValue::get(SlotType);
+    }
+
+    // The manipulations performed when we're looking through an insertvalue or
+    // an extractvalue would happen at the front of the RetPath list, so since
+    // we have to copy it anyway it's more efficient to create a reversed copy.
+    using std::copy;
+    SmallVector<unsigned, 4> TmpRetPath, TmpCallPath;
+    copy(RetPath.rbegin(), RetPath.rend(), std::back_inserter(TmpRetPath));
+    copy(CallPath.rbegin(), CallPath.rend(), std::back_inserter(TmpCallPath));
+
+    // Finally, we can check whether the value produced by the tail call at this
+    // index is compatible with the value we return.
+    if (!slotOnlyDiscardsData(RetVal, CallVal, TmpRetPath, TmpCallPath, TLI))
+      return false;
+
+    CallEmpty  = !nextRealType(CallSubTypes, CallPath);
+  } while(nextRealType(RetSubTypes, RetPath));
+
+  return true;
 }
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 69ff329..12c3574 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -42,6 +42,7 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
@@ -49,9 +50,9 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
-static const char *DWARFGroupName = "DWARF Emission";
-static const char *DbgTimerName = "DWARF Debug Writer";
-static const char *EHTimerName = "DWARF Exception Writer";
+static const char *const DWARFGroupName = "DWARF Emission";
+static const char *const DbgTimerName = "DWARF Debug Writer";
+static const char *const EHTimerName = "DWARF Exception Writer";
 
 STATISTIC(EmittedInsts, "Number of machine instrs printed");
 
@@ -97,7 +98,7 @@ AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer)
     OutContext(Streamer.getContext()),
     OutStreamer(Streamer),
     LastMI(0), LastFn(0), Counter(~0U), SetCounter(0) {
-  DD = 0; DE = 0; MMI = 0; LI = 0;
+  DD = 0; DE = 0; MMI = 0; LI = 0; MF = 0;
   CurrentFnSym = CurrentFnSymForSize = 0;
   GCMetadataPrinters = 0;
   VerboseAsm = Streamer.isVerboseAsm();
@@ -154,8 +155,6 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
 }
 
 bool AsmPrinter::doInitialization(Module &M) {
-  OutStreamer.InitStreamer();
-
   MMI = getAnalysisIfAvailable<MachineModuleInfo>();
   MMI->AnalyzeModule(M);
 
@@ -163,6 +162,8 @@ bool AsmPrinter::doInitialization(Module &M) {
   const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
     .Initialize(OutContext, TM);
 
+  OutStreamer.InitStreamer();
+
   Mang = new Mangler(OutContext, &TM);
 
   // Allow the target to emit any magic that it wants at the start of the file.
@@ -367,9 +368,10 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
     MCSymbol *MangSym =
       OutContext.GetOrCreateSymbol(GVSym->getName() + Twine("$tlv$init"));
 
-    if (GVKind.isThreadBSS())
+    if (GVKind.isThreadBSS()) {
+      TheSection = getObjFileLowering().getTLSBSSSection();
       OutStreamer.EmitTBSSSymbol(TheSection, MangSym, Size, 1 << AlignLog);
-    else if (GVKind.isThreadData()) {
+    } else if (GVKind.isThreadData()) {
       OutStreamer.SwitchSection(TheSection);
 
       EmitAlignment(AlignLog, GV);
@@ -395,7 +397,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
     //   - pointer to mangled symbol above with initializer
     unsigned PtrSize = TD->getPointerSizeInBits()/8;
     OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"),
-				PtrSize);
+                                PtrSize);
     OutStreamer.EmitIntValue(0, PtrSize);
     OutStreamer.EmitSymbolValue(MangSym, PtrSize);
 
@@ -562,10 +564,17 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
 
   // cast away const; DIetc do not take const operands for some reason.
   DIVariable V(const_cast<MDNode*>(MI->getOperand(2).getMetadata()));
-  if (V.getContext().isSubprogram())
-    OS << DISubprogram(V.getContext()).getDisplayName() << ":";
+  if (V.getContext().isSubprogram()) {
+    StringRef Name = DISubprogram(V.getContext()).getDisplayName();
+    if (!Name.empty())
+      OS << Name << ":";
+  }
   OS << V.getName() << " <- ";
 
+  // The second operand is only an offset if it's an immediate.
+  bool Deref = MI->getOperand(0).isReg() && MI->getOperand(1).isImm();
+  int64_t Offset = Deref ? MI->getOperand(1).getImm() : 0;
+
   // Register or immediate value. Register 0 means undef.
   if (MI->getOperand(0).isFPImm()) {
     APFloat APF = APFloat(MI->getOperand(0).getFPImm()->getValueAPF());
@@ -586,18 +595,31 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
   } else if (MI->getOperand(0).isCImm()) {
     MI->getOperand(0).getCImm()->getValue().print(OS, false /*isSigned*/);
   } else {
-    assert(MI->getOperand(0).isReg() && "Unknown operand type");
-    if (MI->getOperand(0).getReg() == 0) {
+    unsigned Reg;
+    if (MI->getOperand(0).isReg()) {
+      Reg = MI->getOperand(0).getReg();
+    } else {
+      assert(MI->getOperand(0).isFI() && "Unknown operand type");
+      const TargetFrameLowering *TFI = AP.TM.getFrameLowering();
+      Offset += TFI->getFrameIndexReference(*AP.MF,
+                                            MI->getOperand(0).getIndex(), Reg);
+      Deref = true;
+    }
+    if (Reg == 0) {
       // Suppress offset, it is not meaningful here.
       OS << "undef";
       // NOTE: Want this comment at start of line, don't emit with AddComment.
       AP.OutStreamer.EmitRawText(OS.str());
       return true;
     }
-    OS << AP.TM.getRegisterInfo()->getName(MI->getOperand(0).getReg());
+    if (Deref)
+      OS << '[';
+    OS << AP.TM.getRegisterInfo()->getName(Reg);
   }
 
-  OS << '+' << MI->getOperand(1).getImm();
+  if (Deref)
+    OS << '+' << Offset << ']';
+
   // NOTE: Want this comment at start of line, don't emit with AddComment.
   AP.OutStreamer.EmitRawText(OS.str());
   return true;
@@ -790,16 +812,9 @@ void AsmPrinter::EmitFunctionBody() {
   OutStreamer.AddBlankLine();
 }
 
-/// getDebugValueLocation - Get location information encoded by DBG_VALUE
-/// operands.
-MachineLocation AsmPrinter::
-getDebugValueLocation(const MachineInstr *MI) const {
-  // Target specific DBG_VALUE instructions are handled by each target.
-  return MachineLocation();
-}
-
 /// EmitDwarfRegOp - Emit dwarf register operation.
-void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const {
+void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc,
+                                bool Indirect) const {
   const TargetRegisterInfo *TRI = TM.getRegisterInfo();
   int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false);
 
@@ -817,7 +832,7 @@ void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const {
   // caller might be in the middle of an dwarf expression. We should
   // probably assert that Reg >= 0 once debug info generation is more mature.
 
-  if (MLoc.isIndirect()) {
+  if (MLoc.isIndirect() || Indirect) {
     if (Reg < 32) {
       OutStreamer.AddComment(
         dwarf::OperationEncodingString(dwarf::DW_OP_breg0 + Reg));
@@ -828,7 +843,9 @@ void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const {
       OutStreamer.AddComment(Twine(Reg));
       EmitULEB128(Reg);
     }
-    EmitSLEB128(MLoc.getOffset());
+    EmitSLEB128(!MLoc.isIndirect() ? 0 : MLoc.getOffset());
+    if (MLoc.isIndirect() && Indirect)
+      EmitInt8(dwarf::DW_OP_deref);
   } else {
     if (Reg < 32) {
       OutStreamer.AddComment(
@@ -1400,6 +1417,10 @@ void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset,
 void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
                                       unsigned Size)
   const {
+  if (MAI->needsDwarfSectionOffsetDirective() && Size == 4) { // secrel32 ONLY works for 32bits.
+    OutStreamer.EmitCOFFSecRel32(Label);
+    return;
+  }
 
   // Emit Label+Offset (or just Label if Offset is zero)
   const MCExpr *Expr = MCSymbolRefExpr::Create(Label, OutContext);
@@ -1561,8 +1582,7 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) {
   }
 }
 
-static void emitGlobalConstantImpl(const Constant *C, unsigned AddrSpace,
-                                   AsmPrinter &AP);
+static void emitGlobalConstantImpl(const Constant *C, AsmPrinter &AP);
 
 /// isRepeatedByteSequence - Determine whether the given value is
 /// composed of a repeated sequence of identical bytes and return the
@@ -1624,7 +1644,7 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) {
 }
 
 static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
-                                             unsigned AddrSpace,AsmPrinter &AP){
+                                             AsmPrinter &AP){
 
   // See if we can aggregate this into a .fill, if so, emit it as such.
   int Value = isRepeatedByteSequence(CDS, AP.TM);
@@ -1632,12 +1652,12 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
     uint64_t Bytes = AP.TM.getDataLayout()->getTypeAllocSize(CDS->getType());
     // Don't emit a 1-byte object as a .fill.
     if (Bytes > 1)
-      return AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace);
+      return AP.OutStreamer.EmitFill(Bytes, Value);
   }
 
   // If this can be emitted with .ascii/.asciz, emit it as such.
   if (CDS->isString())
-    return AP.OutStreamer.EmitBytes(CDS->getAsString(), AddrSpace);
+    return AP.OutStreamer.EmitBytes(CDS->getAsString());
 
   // Otherwise, emit the values in successive locations.
   unsigned ElementByteSize = CDS->getElementByteSize();
@@ -1647,7 +1667,7 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
         AP.OutStreamer.GetCommentOS() << format("0x%" PRIx64 "\n",
                                                 CDS->getElementAsInteger(i));
       AP.OutStreamer.EmitIntValue(CDS->getElementAsInteger(i),
-                                  ElementByteSize, AddrSpace);
+                                  ElementByteSize);
     }
   } else if (ElementByteSize == 4) {
     // FP Constants are printed as integer constants to avoid losing
@@ -1662,7 +1682,7 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
       F = CDS->getElementAsFloat(i);
       if (AP.isVerbose())
         AP.OutStreamer.GetCommentOS() << "float " << F << '\n';
-      AP.OutStreamer.EmitIntValue(I, 4, AddrSpace);
+      AP.OutStreamer.EmitIntValue(I, 4);
     }
   } else {
     assert(CDS->getElementType()->isDoubleTy());
@@ -1675,7 +1695,7 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
       F = CDS->getElementAsDouble(i);
       if (AP.isVerbose())
         AP.OutStreamer.GetCommentOS() << "double " << F << '\n';
-      AP.OutStreamer.EmitIntValue(I, 8, AddrSpace);
+      AP.OutStreamer.EmitIntValue(I, 8);
     }
   }
 
@@ -1684,41 +1704,38 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
   unsigned EmittedSize = TD.getTypeAllocSize(CDS->getType()->getElementType()) *
                         CDS->getNumElements();
   if (unsigned Padding = Size - EmittedSize)
-    AP.OutStreamer.EmitZeros(Padding, AddrSpace);
+    AP.OutStreamer.EmitZeros(Padding);
 
 }
 
-static void emitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace,
-                                    AsmPrinter &AP) {
+static void emitGlobalConstantArray(const ConstantArray *CA, AsmPrinter &AP) {
   // See if we can aggregate some values.  Make sure it can be
   // represented as a series of bytes of the constant value.
   int Value = isRepeatedByteSequence(CA, AP.TM);
 
   if (Value != -1) {
     uint64_t Bytes = AP.TM.getDataLayout()->getTypeAllocSize(CA->getType());
-    AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace);
+    AP.OutStreamer.EmitFill(Bytes, Value);
   }
   else {
     for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
-      emitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP);
+      emitGlobalConstantImpl(CA->getOperand(i), AP);
   }
 }
 
-static void emitGlobalConstantVector(const ConstantVector *CV,
-                                     unsigned AddrSpace, AsmPrinter &AP) {
+static void emitGlobalConstantVector(const ConstantVector *CV, AsmPrinter &AP) {
   for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
-    emitGlobalConstantImpl(CV->getOperand(i), AddrSpace, AP);
+    emitGlobalConstantImpl(CV->getOperand(i), AP);
 
   const DataLayout &TD = *AP.TM.getDataLayout();
   unsigned Size = TD.getTypeAllocSize(CV->getType());
   unsigned EmittedSize = TD.getTypeAllocSize(CV->getType()->getElementType()) *
                          CV->getType()->getNumElements();
   if (unsigned Padding = Size - EmittedSize)
-    AP.OutStreamer.EmitZeros(Padding, AddrSpace);
+    AP.OutStreamer.EmitZeros(Padding);
 }
 
-static void emitGlobalConstantStruct(const ConstantStruct *CS,
-                                     unsigned AddrSpace, AsmPrinter &AP) {
+static void emitGlobalConstantStruct(const ConstantStruct *CS, AsmPrinter &AP) {
   // Print the fields in successive locations. Pad to align if needed!
   const DataLayout *TD = AP.TM.getDataLayout();
   unsigned Size = TD->getTypeAllocSize(CS->getType());
@@ -1734,19 +1751,18 @@ static void emitGlobalConstantStruct(const ConstantStruct *CS,
     SizeSoFar += FieldSize + PadSize;
 
     // Now print the actual field value.
-    emitGlobalConstantImpl(Field, AddrSpace, AP);
+    emitGlobalConstantImpl(Field, AP);
 
     // Insert padding - this may include padding to increase the size of the
     // current field up to the ABI size (if the struct is not packed) as well
     // as padding to ensure that the next field starts at the right offset.
-    AP.OutStreamer.EmitZeros(PadSize, AddrSpace);
+    AP.OutStreamer.EmitZeros(PadSize);
   }
   assert(SizeSoFar == Layout->getSizeInBytes() &&
          "Layout of constant struct may be incorrect!");
 }
 
-static void emitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace,
-                                 AsmPrinter &AP) {
+static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) {
   APInt API = CFP->getValueAPF().bitcastToAPInt();
 
   // First print a comment with what we think the original floating-point value
@@ -1772,27 +1788,26 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace,
     int Chunk = API.getNumWords() - 1;
 
     if (TrailingBytes)
-      AP.OutStreamer.EmitIntValue(p[Chunk--], TrailingBytes, AddrSpace);
+      AP.OutStreamer.EmitIntValue(p[Chunk--], TrailingBytes);
 
     for (; Chunk >= 0; --Chunk)
-      AP.OutStreamer.EmitIntValue(p[Chunk], sizeof(uint64_t), AddrSpace);
+      AP.OutStreamer.EmitIntValue(p[Chunk], sizeof(uint64_t));
   } else {
     unsigned Chunk;
     for (Chunk = 0; Chunk < NumBytes / sizeof(uint64_t); ++Chunk)
-      AP.OutStreamer.EmitIntValue(p[Chunk], sizeof(uint64_t), AddrSpace);
+      AP.OutStreamer.EmitIntValue(p[Chunk], sizeof(uint64_t));
 
     if (TrailingBytes)
-      AP.OutStreamer.EmitIntValue(p[Chunk], TrailingBytes, AddrSpace);
+      AP.OutStreamer.EmitIntValue(p[Chunk], TrailingBytes);
   }
 
   // Emit the tail padding for the long double.
   const DataLayout &TD = *AP.TM.getDataLayout();
   AP.OutStreamer.EmitZeros(TD.getTypeAllocSize(CFP->getType()) -
-                           TD.getTypeStoreSize(CFP->getType()), AddrSpace);
+                           TD.getTypeStoreSize(CFP->getType()));
 }
 
-static void emitGlobalConstantLargeInt(const ConstantInt *CI,
-                                       unsigned AddrSpace, AsmPrinter &AP) {
+static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {
   const DataLayout *TD = AP.TM.getDataLayout();
   unsigned BitWidth = CI->getBitWidth();
 
@@ -1832,7 +1847,7 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI,
   const uint64_t *RawData = Realigned.getRawData();
   for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) {
     uint64_t Val = TD->isBigEndian() ? RawData[e - i - 1] : RawData[i];
-    AP.OutStreamer.EmitIntValue(Val, 8, AddrSpace);
+    AP.OutStreamer.EmitIntValue(Val, 8);
   }
 
   if (ExtraBitsSize) {
@@ -1844,16 +1859,15 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI,
     assert(Size && Size * 8 >= ExtraBitsSize &&
            (ExtraBits & (((uint64_t)-1) >> (64 - ExtraBitsSize)))
            == ExtraBits && "Directive too small for extra bits.");
-    AP.OutStreamer.EmitIntValue(ExtraBits, Size, AddrSpace);
+    AP.OutStreamer.EmitIntValue(ExtraBits, Size);
   }
 }
 
-static void emitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
-                                   AsmPrinter &AP) {
+static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) {
   const DataLayout *TD = AP.TM.getDataLayout();
   uint64_t Size = TD->getTypeAllocSize(CV->getType());
   if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV))
-    return AP.OutStreamer.EmitZeros(Size, AddrSpace);
+    return AP.OutStreamer.EmitZeros(Size);
 
   if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
     switch (Size) {
@@ -1864,36 +1878,36 @@ static void emitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
       if (AP.isVerbose())
         AP.OutStreamer.GetCommentOS() << format("0x%" PRIx64 "\n",
                                                 CI->getZExtValue());
-      AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size, AddrSpace);
+      AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size);
       return;
     default:
-      emitGlobalConstantLargeInt(CI, AddrSpace, AP);
+      emitGlobalConstantLargeInt(CI, AP);
       return;
     }
   }
 
   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV))
-    return emitGlobalConstantFP(CFP, AddrSpace, AP);
+    return emitGlobalConstantFP(CFP, AP);
 
   if (isa<ConstantPointerNull>(CV)) {
-    AP.OutStreamer.EmitIntValue(0, Size, AddrSpace);
+    AP.OutStreamer.EmitIntValue(0, Size);
     return;
   }
 
   if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(CV))
-    return emitGlobalConstantDataSequential(CDS, AddrSpace, AP);
+    return emitGlobalConstantDataSequential(CDS, AP);
 
   if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV))
-    return emitGlobalConstantArray(CVA, AddrSpace, AP);
+    return emitGlobalConstantArray(CVA, AP);
 
   if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV))
-    return emitGlobalConstantStruct(CVS, AddrSpace, AP);
+    return emitGlobalConstantStruct(CVS, AP);
 
   if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
     // Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of
     // vectors).
     if (CE->getOpcode() == Instruction::BitCast)
-      return emitGlobalConstantImpl(CE->getOperand(0), AddrSpace, AP);
+      return emitGlobalConstantImpl(CE->getOperand(0), AP);
 
     if (Size > 8) {
       // If the constant expression's size is greater than 64-bits, then we have
@@ -1901,27 +1915,27 @@ static void emitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
       // that way.
       Constant *New = ConstantFoldConstantExpression(CE, TD);
       if (New && New != CE)
-        return emitGlobalConstantImpl(New, AddrSpace, AP);
+        return emitGlobalConstantImpl(New, AP);
     }
   }
 
   if (const ConstantVector *V = dyn_cast<ConstantVector>(CV))
-    return emitGlobalConstantVector(V, AddrSpace, AP);
+    return emitGlobalConstantVector(V, AP);
 
   // Otherwise, it must be a ConstantExpr.  Lower it to an MCExpr, then emit it
   // thread the streamer with EmitValue.
-  AP.OutStreamer.EmitValue(lowerConstant(CV, AP), Size, AddrSpace);
+  AP.OutStreamer.EmitValue(lowerConstant(CV, AP), Size);
 }
 
 /// EmitGlobalConstant - Print a general LLVM constant to the .s file.
-void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) {
+void AsmPrinter::EmitGlobalConstant(const Constant *CV) {
   uint64_t Size = TM.getDataLayout()->getTypeAllocSize(CV->getType());
   if (Size)
-    emitGlobalConstantImpl(CV, AddrSpace, *this);
+    emitGlobalConstantImpl(CV, *this);
   else if (MAI->hasSubsectionsViaSymbols()) {
     // If the global has zero size, emit a single byte so that two labels don't
     // look like they are at the same location.
-    OutStreamer.EmitIntValue(0, 1, AddrSpace);
+    OutStreamer.EmitIntValue(0, 1);
   }
 }
 
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index e6d67e8..c141d60 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -33,7 +33,7 @@ using namespace llvm;
 //===----------------------------------------------------------------------===//
 
 /// EmitSLEB128 - emit the specified signed leb128 value.
-void AsmPrinter::EmitSLEB128(int Value, const char *Desc) const {
+void AsmPrinter::EmitSLEB128(int64_t Value, const char *Desc) const {
   if (isVerbose() && Desc)
     OutStreamer.AddComment(Desc);
 
@@ -41,7 +41,7 @@ void AsmPrinter::EmitSLEB128(int Value, const char *Desc) const {
 }
 
 /// EmitULEB128 - emit the specified signed leb128 value.
-void AsmPrinter::EmitULEB128(unsigned Value, const char *Desc,
+void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc,
                              unsigned PadTo) const {
   if (isVerbose() && Desc)
     OutStreamer.AddComment(Desc);
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index abfa330..d8e9c95 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -213,7 +213,7 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
       } else {
         unsigned OpFlags = MI->getOperand(OpNo).getImm();
         ++OpNo;  // Skip over the ID number.
-        
+
         if (InlineAsm::isMemKind(OpFlags)) {
           Error = AP->PrintAsmMemoryOperand(MI, OpNo, InlineAsmVariant,
                                             /*Modifier*/ 0, OS);
diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt
index 8d15c06..65e7bee 100644
--- a/lib/CodeGen/AsmPrinter/CMakeLists.txt
+++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt
@@ -13,3 +13,5 @@ add_llvm_library(LLVMAsmPrinter
   OcamlGCPrinter.cpp
   Win64Exception.cpp
   )
+
+add_dependencies(LLVMAsmPrinter intrinsics_gen)
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index 5f451a7..ab03861 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "DIE.h"
+#include "DwarfDebug.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/IR/DataLayout.h"
@@ -23,6 +24,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/MD5.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -120,6 +122,18 @@ DIE *DIE::getCompileUnit() {
   llvm_unreachable("We should not have orphaned DIEs.");
 }
 
+DIEValue *DIE::findAttribute(unsigned Attribute) {
+  const SmallVectorImpl<DIEValue *> &Values = getValues();
+  const DIEAbbrev &Abbrevs = getAbbrev();
+
+  // Iterate through all the attributes until we find the one we're
+  // looking for, if we can't find it return NULL.
+  for (size_t i = 0; i < Values.size(); ++i)
+    if (Abbrevs.getData()[i].getAttribute() == Attribute)
+      return Values[i];
+  return NULL;
+}
+
 #ifndef NDEBUG
 void DIE::print(raw_ostream &O, unsigned IndentCount) const {
   const std::string Indent(IndentCount, ' ');
@@ -247,13 +261,39 @@ void DIEInteger::print(raw_ostream &O) const {
 #endif
 
 //===----------------------------------------------------------------------===//
+// DIEExpr Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit expression value.
+///
+void DIEExpr::EmitValue(AsmPrinter *AP, unsigned Form) const {
+  AP->OutStreamer.EmitValue(Expr, SizeOf(AP, Form));
+}
+
+/// SizeOf - Determine size of expression value in bytes.
+///
+unsigned DIEExpr::SizeOf(AsmPrinter *AP, unsigned Form) const {
+  if (Form == dwarf::DW_FORM_data4) return 4;
+  if (Form == dwarf::DW_FORM_sec_offset) return 4;
+  if (Form == dwarf::DW_FORM_strp) return 4;
+  return AP->getDataLayout().getPointerSize();
+}
+
+#ifndef NDEBUG
+void DIEExpr::print(raw_ostream &O) const {
+  O << "Expr: ";
+  Expr->print(O);
+}
+#endif
+
+//===----------------------------------------------------------------------===//
 // DIELabel Implementation
 //===----------------------------------------------------------------------===//
 
 /// EmitValue - Emit label value.
 ///
 void DIELabel::EmitValue(AsmPrinter *AP, unsigned Form) const {
-  AP->OutStreamer.EmitSymbolValue(Label, SizeOf(AP, Form));
+  AP->EmitLabelReference(Label, SizeOf(AP, Form));
 }
 
 /// SizeOf - Determine size of label value in bytes.
@@ -296,6 +336,29 @@ void DIEDelta::print(raw_ostream &O) const {
 #endif
 
 //===----------------------------------------------------------------------===//
+// DIEString Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit string value.
+///
+void DIEString::EmitValue(AsmPrinter *AP, unsigned Form) const {
+  Access->EmitValue(AP, Form);
+}
+
+/// SizeOf - Determine size of delta value in bytes.
+///
+unsigned DIEString::SizeOf(AsmPrinter *AP, unsigned Form) const {
+  return Access->SizeOf(AP, Form);
+}
+
+#ifndef NDEBUG
+void DIEString::print(raw_ostream &O) const {
+  O << "String: " << Str << "\tSymbol: ";
+  Access->print(O);
+}
+#endif
+
+//===----------------------------------------------------------------------===//
 // DIEEntry Implementation
 //===----------------------------------------------------------------------===//
 
@@ -305,6 +368,16 @@ void DIEEntry::EmitValue(AsmPrinter *AP, unsigned Form) const {
   AP->EmitInt32(Entry->getOffset());
 }
 
+unsigned DIEEntry::getRefAddrSize(AsmPrinter *AP) {
+  // DWARF4: References that use the attribute form DW_FORM_ref_addr are
+  // specified to be four bytes in the DWARF 32-bit format and eight bytes
+  // in the DWARF 64-bit format, while DWARF Version 2 specifies that such
+  // references have the same size as an address on the target system.
+  if (AP->getDwarfDebug()->getDwarfVersion() == 2)
+    return AP->getDataLayout().getPointerSize();
+  return sizeof(int32_t);
+}
+
 #ifndef NDEBUG
 void DIEEntry::print(raw_ostream &O) const {
   O << format("Die: 0x%lx", (long)(intptr_t)Entry);
diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h
index c29144d..bfd7d1d 100644
--- a/lib/CodeGen/AsmPrinter/DIE.h
+++ b/lib/CodeGen/AsmPrinter/DIE.h
@@ -18,11 +18,13 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Dwarf.h"
+#include "llvm/MC/MCExpr.h"
 #include <vector>
 
 namespace llvm {
   class AsmPrinter;
   class MCSymbol;
+  class MCSymbolRefExpr;
   class raw_ostream;
 
   //===--------------------------------------------------------------------===//
@@ -173,6 +175,10 @@ namespace llvm {
       Child->Parent = this;
     }
 
+    /// findAttribute - Find a value in the DIE with the attribute given, returns NULL
+    /// if no such attribute exists.
+    DIEValue *findAttribute(unsigned Attribute);
+
 #ifndef NDEBUG
     void print(raw_ostream &O, unsigned IndentCount = 0) const;
     void dump();
@@ -188,6 +194,7 @@ namespace llvm {
     enum {
       isInteger,
       isString,
+      isExpr,
       isLabel,
       isDelta,
       isEntry,
@@ -261,7 +268,35 @@ namespace llvm {
   };
 
   //===--------------------------------------------------------------------===//
-  /// DIELabel - A label expression DIE.
+  /// DIEExpr - An expression DIE.
+  //
+  class DIEExpr : public DIEValue {
+    const MCExpr *Expr;
+  public:
+    explicit DIEExpr(const MCExpr *E) : DIEValue(isExpr), Expr(E) {}
+
+    /// EmitValue - Emit expression value.
+    ///
+    virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
+
+    /// getValue - Get MCExpr.
+    ///
+    const MCExpr *getValue() const { return Expr; }
+
+    /// SizeOf - Determine size of expression value in bytes.
+    ///
+    virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
+
+    // Implement isa/cast/dyncast.
+    static bool classof(const DIEValue *E) { return E->getType() == isExpr; }
+
+#ifndef NDEBUG
+    virtual void print(raw_ostream &O) const;
+#endif
+  };
+
+  //===--------------------------------------------------------------------===//
+  /// DIELabel - A label DIE.
   //
   class DIELabel : public DIEValue {
     const MCSymbol *Label;
@@ -315,6 +350,36 @@ namespace llvm {
   };
 
   //===--------------------------------------------------------------------===//
+  /// DIEString - A container for string values.
+  ///
+  class DIEString : public DIEValue {
+    const DIEValue *Access;
+    const StringRef Str;
+
+  public:
+    DIEString(const DIEValue *Acc, const StringRef S)
+        : DIEValue(isString), Access(Acc), Str(S) {}
+
+    /// getString - Grab the string out of the object.
+    StringRef getString() const { return Str; }
+
+    /// EmitValue - Emit delta value.
+    ///
+    virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
+
+    /// SizeOf - Determine size of delta value in bytes.
+    ///
+    virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
+
+    // Implement isa/cast/dyncast.
+    static bool classof(const DIEValue *D) { return D->getType() == isString; }
+
+  #ifndef NDEBUG
+    virtual void print(raw_ostream &O) const;
+  #endif
+  };
+
+  //===--------------------------------------------------------------------===//
   /// DIEEntry - A pointer to another debug information entry.  An instance of
   /// this class can also be used as a proxy for a debug information entry not
   /// yet defined (ie. types.)
@@ -334,9 +399,13 @@ namespace llvm {
     /// SizeOf - Determine size of debug information entry in bytes.
     ///
     virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const {
-      return sizeof(int32_t);
+      return Form == dwarf::DW_FORM_ref_addr ? getRefAddrSize(AP) :
+                                               sizeof(int32_t);
     }
 
+    /// Returns size of a ref_addr entry.
+    static unsigned getRefAddrSize(AsmPrinter *AP);
+
     // Implement isa/cast/dyncast.
     static bool classof(const DIEValue *E) { return E->getType() == isEntry; }
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index c3b6c10..df8ca17 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -27,6 +27,7 @@
 #include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 
 using namespace llvm;
@@ -131,21 +132,24 @@ void CompileUnit::addSInt(DIE *Die, unsigned Attribute,
 /// into another table which gets us the static offset into the string
 /// table.
 void CompileUnit::addString(DIE *Die, unsigned Attribute, StringRef String) {
+  DIEValue *Value;
+  unsigned Form;
   if (!DD->useSplitDwarf()) {
     MCSymbol *Symb = DU->getStringPoolEntry(String);
-    DIEValue *Value;
     if (Asm->needsRelocationsForDwarfStringPool())
       Value = new (DIEValueAllocator) DIELabel(Symb);
     else {
       MCSymbol *StringPool = DU->getStringPoolSym();
       Value = new (DIEValueAllocator) DIEDelta(Symb, StringPool);
     }
-    Die->addValue(Attribute, dwarf::DW_FORM_strp, Value);
+    Form = dwarf::DW_FORM_strp;
   } else {
     unsigned idx = DU->getStringPoolIndex(String);
-    DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx);
-    Die->addValue(Attribute, dwarf::DW_FORM_GNU_str_index, Value);
+    Value = new (DIEValueAllocator) DIEInteger(idx);
+    Form = dwarf::DW_FORM_GNU_str_index;
   }
+  DIEValue *Str = new (DIEValueAllocator) DIEString(Value, String);
+  Die->addValue(Attribute, Form, Str);
 }
 
 /// addLocalString - Add a string attribute data and value. This is guaranteed
@@ -163,6 +167,14 @@ void CompileUnit::addLocalString(DIE *Die, unsigned Attribute,
   Die->addValue(Attribute, dwarf::DW_FORM_strp, Value);
 }
 
+/// addExpr - Add a Dwarf expression attribute data and value.
+///
+void CompileUnit::addExpr(DIE *Die, unsigned Attribute, unsigned Form,
+                          const MCExpr *Expr) {
+  DIEValue *Value = new (DIEValueAllocator) DIEExpr(Expr);
+  Die->addValue(Attribute, Form, Value);
+}
+
 /// addLabel - Add a Dwarf label attribute data and value.
 ///
 void CompileUnit::addLabel(DIE *Die, unsigned Attribute, unsigned Form,
@@ -194,16 +206,13 @@ void CompileUnit::addLabelAddress(DIE *Die, unsigned Attribute,
 /// addOpAddress - Add a dwarf op address data and value using the
 /// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index.
 ///
-void CompileUnit::addOpAddress(DIE *Die, MCSymbol *Sym) {
-
+void CompileUnit::addOpAddress(DIE *Die, const MCSymbol *Sym) {
   if (!DD->useSplitDwarf()) {
     addUInt(Die, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
     addLabel(Die, 0, dwarf::DW_FORM_udata, Sym);
   } else {
-    unsigned idx = DU->getAddrPoolIndex(Sym);
-    DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx);
     addUInt(Die, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_addr_index);
-    Die->addValue(0, dwarf::DW_FORM_GNU_addr_index, Value);
+    addUInt(Die, 0, dwarf::DW_FORM_GNU_addr_index, DU->getAddrPoolIndex(Sym));
   }
 }
 
@@ -235,7 +244,7 @@ void CompileUnit::addBlock(DIE *Die, unsigned Attribute, unsigned Form,
 /// entry.
 void CompileUnit::addSourceLine(DIE *Die, DIVariable V) {
   // Verify variable.
-  if (!V.Verify())
+  if (!V.isVariable())
     return;
 
   unsigned Line = V.getLineNumber();
@@ -253,7 +262,7 @@ void CompileUnit::addSourceLine(DIE *Die, DIVariable V) {
 /// entry.
 void CompileUnit::addSourceLine(DIE *Die, DIGlobalVariable G) {
   // Verify global variable.
-  if (!G.Verify())
+  if (!G.isGlobalVariable())
     return;
 
   unsigned Line = G.getLineNumber();
@@ -270,7 +279,7 @@ void CompileUnit::addSourceLine(DIE *Die, DIGlobalVariable G) {
 /// entry.
 void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) {
   // Verify subprogram.
-  if (!SP.Verify())
+  if (!SP.isSubprogram())
     return;
 
   // If the line number is 0, don't add it.
@@ -289,7 +298,7 @@ void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) {
 /// entry.
 void CompileUnit::addSourceLine(DIE *Die, DIType Ty) {
   // Verify type.
-  if (!Ty.Verify())
+  if (!Ty.isType())
     return;
 
   unsigned Line = Ty.getLineNumber();
@@ -306,7 +315,7 @@ void CompileUnit::addSourceLine(DIE *Die, DIType Ty) {
 /// entry.
 void CompileUnit::addSourceLine(DIE *Die, DIObjCProperty Ty) {
   // Verify type.
-  if (!Ty.Verify())
+  if (!Ty.isObjCProperty())
     return;
 
   unsigned Line = Ty.getLineNumber();
@@ -341,14 +350,15 @@ void CompileUnit::addSourceLine(DIE *Die, DINameSpace NS) {
 
 /// addVariableAddress - Add DW_AT_location attribute for a
 /// DbgVariable based on provided MachineLocation.
-void CompileUnit::addVariableAddress(DbgVariable *&DV, DIE *Die,
+void CompileUnit::addVariableAddress(const DbgVariable &DV, DIE *Die,
                                      MachineLocation Location) {
-  if (DV->variableHasComplexAddress())
+  if (DV.variableHasComplexAddress())
     addComplexAddress(DV, Die, dwarf::DW_AT_location, Location);
-  else if (DV->isBlockByrefVariable())
+  else if (DV.isBlockByrefVariable())
     addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location);
   else
-    addAddress(Die, dwarf::DW_AT_location, Location);
+    addAddress(Die, dwarf::DW_AT_location, Location,
+               DV.getVariable().isIndirect());
 }
 
 /// addRegisterOp - Add register operand.
@@ -384,13 +394,17 @@ void CompileUnit::addRegisterOffset(DIE *TheDie, unsigned Reg,
 /// addAddress - Add an address attribute to a die based on the location
 /// provided.
 void CompileUnit::addAddress(DIE *Die, unsigned Attribute,
-                             const MachineLocation &Location) {
+                             const MachineLocation &Location, bool Indirect) {
   DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
 
-  if (Location.isReg())
+  if (Location.isReg() && !Indirect)
     addRegisterOp(Block, Location.getReg());
-  else
+  else {
     addRegisterOffset(Block, Location.getReg(), Location.getOffset());
+    if (Indirect && !Location.isReg()) {
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+    }
+  }
 
   // Now attach the location information to the DIE.
   addBlock(Die, Attribute, 0, Block);
@@ -401,17 +415,17 @@ void CompileUnit::addAddress(DIE *Die, unsigned Attribute,
 /// given the extra address information encoded in the DIVariable, starting from
 /// the starting location.  Add the DWARF information to the die.
 ///
-void CompileUnit::addComplexAddress(DbgVariable *&DV, DIE *Die,
+void CompileUnit::addComplexAddress(const DbgVariable &DV, DIE *Die,
                                     unsigned Attribute,
                                     const MachineLocation &Location) {
   DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
-  unsigned N = DV->getNumAddrElements();
+  unsigned N = DV.getNumAddrElements();
   unsigned i = 0;
   if (Location.isReg()) {
-    if (N >= 2 && DV->getAddrElement(0) == DIBuilder::OpPlus) {
+    if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) {
       // If first address element is OpPlus then emit
       // DW_OP_breg + Offset instead of DW_OP_reg + Offset.
-      addRegisterOffset(Block, Location.getReg(), DV->getAddrElement(1));
+      addRegisterOffset(Block, Location.getReg(), DV.getAddrElement(1));
       i = 2;
     } else
       addRegisterOp(Block, Location.getReg());
@@ -420,10 +434,10 @@ void CompileUnit::addComplexAddress(DbgVariable *&DV, DIE *Die,
     addRegisterOffset(Block, Location.getReg(), Location.getOffset());
 
   for (;i < N; ++i) {
-    uint64_t Element = DV->getAddrElement(i);
+    uint64_t Element = DV.getAddrElement(i);
     if (Element == DIBuilder::OpPlus) {
       addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
-      addUInt(Block, 0, dwarf::DW_FORM_udata, DV->getAddrElement(++i));
+      addUInt(Block, 0, dwarf::DW_FORM_udata, DV.getAddrElement(++i));
     } else if (Element == DIBuilder::OpDeref) {
       if (!Location.isReg())
         addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
@@ -494,15 +508,15 @@ void CompileUnit::addComplexAddress(DbgVariable *&DV, DIE *Die,
 /// starting location.  Add the DWARF information to the die.  For
 /// more information, read large comment just above here.
 ///
-void CompileUnit::addBlockByrefAddress(DbgVariable *&DV, DIE *Die,
+void CompileUnit::addBlockByrefAddress(const DbgVariable &DV, DIE *Die,
                                        unsigned Attribute,
                                        const MachineLocation &Location) {
-  DIType Ty = DV->getType();
+  DIType Ty = DV.getType();
   DIType TmpTy = Ty;
   unsigned Tag = Ty.getTag();
   bool isPointer = false;
 
-  StringRef varName = DV->getName();
+  StringRef varName = DV.getName();
 
   if (Tag == dwarf::DW_TAG_pointer_type) {
     DIDerivedType DTy = DIDerivedType(Ty);
@@ -586,7 +600,7 @@ static bool isTypeSigned(DIType Ty, int *SizeInBits) {
 }
 
 /// addConstantValue - Add constant value entry in variable DIE.
-bool CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO,
+void CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO,
                                    DIType Ty) {
   // FIXME: This is a bit conservative/simple - it emits negative values at
   // their maximum bit width which is a bit unfortunate (& doesn't prefer
@@ -607,11 +621,10 @@ bool CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO,
     : addUInt(Block, 0, Form, MO.getImm());
 
   addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
-  return true;
 }
 
 /// addConstantFPValue - Add constant value entry in variable DIE.
-bool CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) {
+void CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) {
   assert (MO.isFPImm() && "Invalid machine operand!");
   DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
   APFloat FPImm = MO.getFPImm()->getValueAPF();
@@ -632,23 +645,21 @@ bool CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) {
             (unsigned char)0xFF & FltPtr[Start]);
 
   addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
-  return true;
 }
 
 /// addConstantFPValue - Add constant value entry in variable DIE.
-bool CompileUnit::addConstantFPValue(DIE *Die, const ConstantFP *CFP) {
-  return addConstantValue(Die, CFP->getValueAPF().bitcastToAPInt(), false);
+void CompileUnit::addConstantFPValue(DIE *Die, const ConstantFP *CFP) {
+  addConstantValue(Die, CFP->getValueAPF().bitcastToAPInt(), false);
 }
 
 /// addConstantValue - Add constant value entry in variable DIE.
-bool CompileUnit::addConstantValue(DIE *Die, const ConstantInt *CI,
+void CompileUnit::addConstantValue(DIE *Die, const ConstantInt *CI,
                                    bool Unsigned) {
-  return addConstantValue(Die, CI->getValue(), Unsigned);
+  addConstantValue(Die, CI->getValue(), Unsigned);
 }
 
 // addConstantValue - Add constant value entry in variable DIE.
-bool CompileUnit::addConstantValue(DIE *Die, const APInt &Val,
-                                   bool Unsigned) {
+void CompileUnit::addConstantValue(DIE *Die, const APInt &Val, bool Unsigned) {
   unsigned CIBitWidth = Val.getBitWidth();
   if (CIBitWidth <= 64) {
     unsigned form = 0;
@@ -664,7 +675,7 @@ bool CompileUnit::addConstantValue(DIE *Die, const APInt &Val,
       addUInt(Die, dwarf::DW_AT_const_value, form, Val.getZExtValue());
     else
       addSInt(Die, dwarf::DW_AT_const_value, form, Val.getSExtValue());
-    return true;
+    return;
   }
 
   DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
@@ -686,7 +697,6 @@ bool CompileUnit::addConstantValue(DIE *Die, const APInt &Val,
   }
 
   addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
-  return true;
 }
 
 /// addTemplateParams - Add template parameters into buffer.
@@ -727,7 +737,7 @@ void CompileUnit::addToContextOwner(DIE *Die, DIDescriptor Context) {
 /// given DIType.
 DIE *CompileUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
   DIType Ty(TyNode);
-  if (!Ty.Verify())
+  if (!Ty.isType())
     return NULL;
   DIE *TyDIE = getDIE(Ty);
   if (TyDIE)
@@ -766,7 +776,7 @@ DIE *CompileUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
 
 /// addType - Add a new type attribute to the specified entity.
 void CompileUnit::addType(DIE *Entity, DIType Ty, unsigned Attribute) {
-  if (!Ty.Verify())
+  if (!Ty.isType())
     return;
 
   // Check for pre-existence.
@@ -811,7 +821,7 @@ void CompileUnit::addPubTypes(DISubprogram SP) {
   DIArray Args = SPTy.getTypeArray();
   for (unsigned i = 0, e = Args.getNumElements(); i != e; ++i) {
     DIType ATy(Args.getElement(i));
-    if (!ATy.Verify())
+    if (!ATy.isType())
       continue;
     addGlobalType(ATy);
   }
@@ -827,7 +837,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) {
 
   if (BTy.getTag() == dwarf::DW_TAG_unspecified_type) {
     Buffer.setTag(dwarf::DW_TAG_unspecified_type);
-    // Unspecified types has only name, nothing else.
+    // An unspecified type only has a name attribute.
     return;
   }
 
@@ -871,6 +881,39 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) {
     addSourceLine(&Buffer, DTy);
 }
 
+/// Return true if the type is appropriately scoped to be contained inside
+/// its own type unit.
+static bool isTypeUnitScoped(DIType Ty) {
+  DIScope Parent = Ty.getContext();
+  while (Parent) {
+    // Don't generate a hash for anything scoped inside a function.
+    if (Parent.isSubprogram())
+      return false;
+    Parent = Parent.getContext();
+  }
+  return true;
+}
+
+/// Return true if the type should be split out into a type unit.
+static bool shouldCreateTypeUnit(DICompositeType CTy) {
+  unsigned Tag = CTy.getTag();
+
+  switch (Tag) {
+  case dwarf::DW_TAG_structure_type:
+  case dwarf::DW_TAG_union_type:
+  case dwarf::DW_TAG_enumeration_type:
+  case dwarf::DW_TAG_class_type:
+    // If this is a class, structure, union, or enumeration type
+    // that is not a declaration, is a type definition, and not scoped
+    // inside a function then separate this out as a type unit.
+    if (CTy.isForwardDecl() || !isTypeUnitScoped(CTy))
+      return 0;
+    return 1;
+  default:
+    return 0;
+  }
+}
+
 /// constructTypeDIE - Construct type DIE from DICompositeType.
 void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
   // Get core information.
@@ -897,7 +940,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
       }
     }
     DIType DTy = CTy.getTypeDerivedFrom();
-    if (DTy.Verify()) {
+    if (DTy.isType()) {
       addType(&Buffer, DTy);
       addUInt(&Buffer, dwarf::DW_AT_enum_class, dwarf::DW_FORM_flag, 1);
     }
@@ -937,16 +980,12 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
   case dwarf::DW_TAG_structure_type:
   case dwarf::DW_TAG_union_type:
   case dwarf::DW_TAG_class_type: {
-    // Add elements to structure type.
-    DIArray Elements = CTy.getTypeArray();
-
-    // A forward struct declared type may not have elements available.
-    unsigned N = Elements.getNumElements();
-    if (N == 0)
+    if (CTy.isForwardDecl())
       break;
 
     // Add elements to structure type.
-    for (unsigned i = 0; i < N; ++i) {
+    DIArray Elements = CTy.getTypeArray();
+    for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
       DIDescriptor Element = Elements.getElement(i);
       DIE *ElemDie = NULL;
       if (Element.isSubprogram()) {
@@ -1019,10 +1058,8 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
     if (DIDescriptor(ContainingType).isCompositeType())
       addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4,
                   getOrCreateTypeDIE(DIType(ContainingType)));
-    else {
-      DIDescriptor Context = CTy.getContext();
-      addToContextOwner(&Buffer, Context);
-    }
+    else
+      addToContextOwner(&Buffer, CTy.getContext());
 
     if (CTy.isObjcClassComplete())
       addFlag(&Buffer, dwarf::DW_AT_APPLE_objc_complete_type);
@@ -1070,6 +1107,10 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
       addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class,
               dwarf::DW_FORM_data1, RLang);
   }
+  // If this is a type applicable to a type unit it then add it to the
+  // list of types we'll compute a hash for later.
+  if (shouldCreateTypeUnit(CTy))
+    DD->addTypeUnitType(&Buffer);
 }
 
 /// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE
@@ -1082,7 +1123,8 @@ CompileUnit::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) {
 
   ParamDIE = new DIE(dwarf::DW_TAG_template_type_parameter);
   addType(ParamDIE, TP.getType());
-  addString(ParamDIE, dwarf::DW_AT_name, TP.getName());
+  if (!TP.getName().empty())
+    addString(ParamDIE, dwarf::DW_AT_name, TP.getName());
   return ParamDIE;
 }
 
@@ -1094,7 +1136,7 @@ CompileUnit::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV){
   if (ParamDIE)
     return ParamDIE;
 
-  ParamDIE = new DIE(dwarf::DW_TAG_template_value_parameter);
+  ParamDIE = new DIE(TPV.getTag());
   addType(ParamDIE, TPV.getType());
   if (!TPV.getName().empty())
     addString(ParamDIE, dwarf::DW_AT_name, TPV.getName());
@@ -1110,6 +1152,14 @@ CompileUnit::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV){
       // parameter, rather than a pointer to it.
       addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value);
       addBlock(ParamDIE, dwarf::DW_AT_location, 0, Block);
+    } else if (TPV.getTag() == dwarf::DW_TAG_GNU_template_template_param) {
+      assert(isa<MDString>(Val));
+      addString(ParamDIE, dwarf::DW_AT_GNU_template_name,
+                cast<MDString>(Val)->getString());
+    } else if (TPV.getTag() == dwarf::DW_TAG_GNU_template_parameter_pack) {
+      assert(isa<MDNode>(Val));
+      DIArray A(cast<MDNode>(Val));
+      addTemplateParams(*ParamDIE, A);
     }
   }
 
@@ -1280,7 +1330,7 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
     return;
 
   DIGlobalVariable GV(N);
-  if (!GV.Verify())
+  if (!GV.isGlobalVariable())
     return;
 
   DIDescriptor GVContext = GV.getContext();
@@ -1332,7 +1382,29 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
   if (isGlobalVariable) {
     addToAccelTable = true;
     DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
-    addOpAddress(Block, Asm->Mang->getSymbol(GV.getGlobal()));
+    const MCSymbol *Sym = Asm->Mang->getSymbol(GV.getGlobal());
+    if (GV.getGlobal()->isThreadLocal()) {
+      // FIXME: Make this work with -gsplit-dwarf.
+      unsigned PointerSize = Asm->getDataLayout().getPointerSize();
+      assert((PointerSize == 4 || PointerSize == 8) &&
+             "Add support for other sizes if necessary");
+      const MCExpr *Expr =
+          Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym);
+      // Based on GCC's support for TLS:
+      if (!DD->useSplitDwarf()) {
+        // 1) Start with a constNu of the appropriate pointer size
+        addUInt(Block, 0, dwarf::DW_FORM_data1,
+                PointerSize == 4 ? dwarf::DW_OP_const4u : dwarf::DW_OP_const8u);
+        // 2) containing the (relocated) address of the TLS variable
+        addExpr(Block, 0, dwarf::DW_FORM_udata, Expr);
+      } else {
+        addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index);
+        addUInt(Block, 0, dwarf::DW_FORM_udata, DU->getAddrPoolIndex(Expr));
+      }
+      // 3) followed by a custom OP to tell the debugger about TLS (presumably)
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_lo_user);
+    } else
+      addOpAddress(Block, Sym);
     // Do not create specification DIE if context is either compile unit
     // or a subprogram.
     if (GVContext && GV.isDefinition() && !GVContext.isCompileUnit() &&
@@ -1394,8 +1466,6 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
     if (GV.getLinkageName() != "" && GV.getName() != GV.getLinkageName())
       addAccelName(GV.getLinkageName(), AddrDIE);
   }
-
-  return;
 }
 
 /// constructSubrangeDIE - Construct subrange DIE from DISubrange.
@@ -1484,7 +1554,8 @@ void CompileUnit::constructContainingTypeDIEs() {
 }
 
 /// constructVariableDIE - Construct a DIE for the given DbgVariable.
-DIE *CompileUnit::constructVariableDIE(DbgVariable *DV, bool isScopeAbstract) {
+DIE *CompileUnit::constructVariableDIE(DbgVariable *DV,
+                                       bool isScopeAbstract) {
   StringRef Name = DV->getName();
 
   // Translate tag to proper Dwarf tag.
@@ -1515,59 +1586,31 @@ DIE *CompileUnit::constructVariableDIE(DbgVariable *DV, bool isScopeAbstract) {
 
   unsigned Offset = DV->getDotDebugLocOffset();
   if (Offset != ~0U) {
-    addLabel(VariableDie, dwarf::DW_AT_location,
-                         dwarf::DW_FORM_data4,
-                         Asm->GetTempSymbol("debug_loc", Offset));
+    addLabel(VariableDie, dwarf::DW_AT_location, dwarf::DW_FORM_data4,
+             Asm->GetTempSymbol("debug_loc", Offset));
     DV->setDIE(VariableDie);
     return VariableDie;
   }
 
   // Check if variable is described by a DBG_VALUE instruction.
   if (const MachineInstr *DVInsn = DV->getMInsn()) {
-    bool updated = false;
-    if (DVInsn->getNumOperands() == 3) {
-      if (DVInsn->getOperand(0).isReg()) {
-        const MachineOperand RegOp = DVInsn->getOperand(0);
-        const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo();
-        if (DVInsn->getOperand(1).isImm() &&
-            TRI->getFrameRegister(*Asm->MF) == RegOp.getReg()) {
-          unsigned FrameReg = 0;
-          const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
-          int Offset =
-            TFI->getFrameIndexReference(*Asm->MF,
-                                        DVInsn->getOperand(1).getImm(),
-                                        FrameReg);
-          MachineLocation Location(FrameReg, Offset);
-          addVariableAddress(DV, VariableDie, Location);
-
-        } else if (RegOp.getReg())
-          addVariableAddress(DV, VariableDie,
-                                         MachineLocation(RegOp.getReg()));
-        updated = true;
-      }
-      else if (DVInsn->getOperand(0).isImm())
-        updated =
-          addConstantValue(VariableDie, DVInsn->getOperand(0),
-                                       DV->getType());
-      else if (DVInsn->getOperand(0).isFPImm())
-        updated =
-          addConstantFPValue(VariableDie, DVInsn->getOperand(0));
-      else if (DVInsn->getOperand(0).isCImm())
-        updated =
-          addConstantValue(VariableDie,
-                                       DVInsn->getOperand(0).getCImm(),
-                                       DV->getType().isUnsignedDIType());
-    } else {
-      addVariableAddress(DV, VariableDie,
-                                     Asm->getDebugValueLocation(DVInsn));
-      updated = true;
-    }
-    if (!updated) {
-      // If variableDie is not updated then DBG_VALUE instruction does not
-      // have valid variable info.
-      delete VariableDie;
-      return NULL;
-    }
+    assert(DVInsn->getNumOperands() == 3);
+    if (DVInsn->getOperand(0).isReg()) {
+      const MachineOperand RegOp = DVInsn->getOperand(0);
+      // If the second operand is an immediate, this is an indirect value.
+      if (DVInsn->getOperand(1).isImm()) {
+        MachineLocation Location(RegOp.getReg(), DVInsn->getOperand(1).getImm());
+        addVariableAddress(*DV, VariableDie, Location);
+      } else if (RegOp.getReg())
+        addVariableAddress(*DV, VariableDie, MachineLocation(RegOp.getReg()));
+    } else if (DVInsn->getOperand(0).isImm())
+      addConstantValue(VariableDie, DVInsn->getOperand(0), DV->getType());
+    else if (DVInsn->getOperand(0).isFPImm())
+      addConstantFPValue(VariableDie, DVInsn->getOperand(0));
+    else if (DVInsn->getOperand(0).isCImm())
+      addConstantValue(VariableDie, DVInsn->getOperand(0).getCImm(),
+                       DV->getType().isUnsignedDIType());
+
     DV->setDIE(VariableDie);
     return VariableDie;
   } else {
@@ -1579,7 +1622,7 @@ DIE *CompileUnit::constructVariableDIE(DbgVariable *DV, bool isScopeAbstract) {
       int Offset =
         TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
       MachineLocation Location(FrameReg, Offset);
-      addVariableAddress(DV, VariableDie, Location);
+      addVariableAddress(*DV, VariableDie, Location);
     }
   }
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index e1af572..3908b37 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -19,6 +19,7 @@
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/DebugInfo.h"
+#include "llvm/MC/MCExpr.h"
 
 namespace llvm {
 
@@ -96,7 +97,7 @@ class CompileUnit {
 
 public:
   CompileUnit(unsigned UID, unsigned L, DIE *D, const MDNode *N, AsmPrinter *A,
-	      DwarfDebug *DW, DwarfUnits *DWU);
+              DwarfDebug *DW, DwarfUnits *DWU);
   ~CompileUnit();
 
   // Accessors.
@@ -149,13 +150,13 @@ public:
     DIEs.push_back(Die);
   }
   void addAccelType(StringRef Name, std::pair<DIE *, unsigned> Die) {
-    std::vector<std::pair<DIE*, unsigned > > &DIEs = AccelTypes[Name];
+    std::vector<std::pair<DIE *, unsigned> > &DIEs = AccelTypes[Name];
     DIEs.push_back(Die);
   }
 
   /// getDIE - Returns the debug information entry map slot for the
   /// specified debug variable.
-  DIE *getDIE(const MDNode *N) { return MDNodeToDieMap.lookup(N); }
+  DIE *getDIE(const MDNode *N) const { return MDNodeToDieMap.lookup(N); }
 
   DIEBlock *getDIEBlock() {
     return new (DIEValueAllocator) DIEBlock();
@@ -168,12 +169,8 @@ public:
 
   /// getDIEEntry - Returns the debug information entry for the specified
   /// debug variable.
-  DIEEntry *getDIEEntry(const MDNode *N) {
-    DenseMap<const MDNode *, DIEEntry *>::iterator I =
-      MDNodeToDIEEntryMap.find(N);
-    if (I == MDNodeToDIEEntryMap.end())
-      return NULL;
-    return I->second;
+  DIEEntry *getDIEEntry(const MDNode *N) const {
+    return MDNodeToDIEEntryMap.lookup(N);
   }
 
   /// insertDIEEntry - Insert debug information entry into the map.
@@ -217,6 +214,11 @@ public:
   ///
   void addLocalString(DIE *Die, unsigned Attribute, const StringRef Str);
 
+  /// addExpr - Add a Dwarf expression attribute data and value.
+  ///
+  void addExpr(DIE *Die, unsigned Attribute, unsigned Form,
+               const MCExpr *Expr);
+
   /// addLabel - Add a Dwarf label attribute data and value.
   ///
   void addLabel(DIE *Die, unsigned Attribute, unsigned Form,
@@ -230,7 +232,8 @@ public:
   /// addOpAddress - Add a dwarf op address data and value using the
   /// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index.
   ///
-  void addOpAddress(DIE *Die, MCSymbol *Label);
+  void addOpAddress(DIE *Die, const MCSymbol *Label);
+  void addOpAddress(DIE *Die, const MCSymbolRefExpr *Label);
 
   /// addDelta - Add a label delta attribute data and value.
   ///
@@ -257,16 +260,16 @@ public:
   /// addAddress - Add an address attribute to a die based on the location
   /// provided.
   void addAddress(DIE *Die, unsigned Attribute,
-                  const MachineLocation &Location);
+                  const MachineLocation &Location, bool Indirect = false);
 
   /// addConstantValue - Add constant value entry in variable DIE.
-  bool addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty);
-  bool addConstantValue(DIE *Die, const ConstantInt *CI, bool Unsigned);
-  bool addConstantValue(DIE *Die, const APInt &Val, bool Unsigned);
+  void addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty);
+  void addConstantValue(DIE *Die, const ConstantInt *CI, bool Unsigned);
+  void addConstantValue(DIE *Die, const APInt &Val, bool Unsigned);
 
   /// addConstantFPValue - Add constant value entry in variable DIE.
-  bool addConstantFPValue(DIE *Die, const MachineOperand &MO);
-  bool addConstantFPValue(DIE *Die, const ConstantFP *CFP);
+  void addConstantFPValue(DIE *Die, const MachineOperand &MO);
+  void addConstantFPValue(DIE *Die, const ConstantFP *CFP);
 
   /// addTemplateParams - Add template parameters in buffer.
   void addTemplateParams(DIE &Buffer, DIArray TParams);
@@ -282,7 +285,7 @@ public:
   /// (navigating the extra location information encoded in the type) based on
   /// the starting location.  Add the DWARF information to the die.
   ///
-  void addComplexAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute,
+  void addComplexAddress(const DbgVariable &DV, DIE *Die, unsigned Attribute,
                          const MachineLocation &Location);
 
   // FIXME: Should be reformulated in terms of addComplexAddress.
@@ -292,12 +295,13 @@ public:
   /// starting location.  Add the DWARF information to the die.  Obsolete,
   /// please use addComplexAddress instead.
   ///
-  void addBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute,
+  void addBlockByrefAddress(const DbgVariable &DV, DIE *Die, unsigned Attribute,
                             const MachineLocation &Location);
 
   /// addVariableAddress - Add DW_AT_location attribute for a
   /// DbgVariable based on provided MachineLocation.
-  void addVariableAddress(DbgVariable *&DV, DIE *Die, MachineLocation Location);
+  void addVariableAddress(const DbgVariable &DV, DIE *Die,
+                          MachineLocation Location);
 
   /// addToContextOwner - Add Die into the list of its context owner's children.
   void addToContextOwner(DIE *Die, DIDescriptor Context);
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 7915e2f..979c0c3 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -36,6 +36,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/MD5.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/ValueHandle.h"
@@ -46,54 +47,60 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
-static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print",
-                                              cl::Hidden,
-     cl::desc("Disable debug info printing"));
+static cl::opt<bool>
+DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden,
+                         cl::desc("Disable debug info printing"));
 
-static cl::opt<bool> UnknownLocations("use-unknown-locations", cl::Hidden,
-     cl::desc("Make an absence of debug location information explicit."),
-     cl::init(false));
+static cl::opt<bool> UnknownLocations(
+    "use-unknown-locations", cl::Hidden,
+    cl::desc("Make an absence of debug location information explicit."),
+    cl::init(false));
 
-static cl::opt<bool> GenerateDwarfPubNamesSection("generate-dwarf-pubnames",
-     cl::Hidden, cl::init(false),
-     cl::desc("Generate DWARF pubnames section"));
+static cl::opt<bool>
+GenerateDwarfPubNamesSection("generate-dwarf-pubnames", cl::Hidden,
+                             cl::init(false),
+                             cl::desc("Generate DWARF pubnames section"));
 
-namespace {
-  enum DefaultOnOff {
-    Default, Enable, Disable
-  };
-}
+static cl::opt<bool>
+GenerateODRHash("generate-odr-hash", cl::Hidden,
+                cl::desc("Add an ODR hash to external type DIEs."),
+                cl::init(false));
 
-static cl::opt<DefaultOnOff> DwarfAccelTables("dwarf-accel-tables", cl::Hidden,
-     cl::desc("Output prototype dwarf accelerator tables."),
-     cl::values(
-                clEnumVal(Default, "Default for platform"),
-                clEnumVal(Enable, "Enabled"),
-                clEnumVal(Disable, "Disabled"),
-                clEnumValEnd),
-     cl::init(Default));
-
-static cl::opt<DefaultOnOff> DarwinGDBCompat("darwin-gdb-compat", cl::Hidden,
-     cl::desc("Compatibility with Darwin gdb."),
-     cl::values(
-                clEnumVal(Default, "Default for platform"),
-                clEnumVal(Enable, "Enabled"),
-                clEnumVal(Disable, "Disabled"),
-                clEnumValEnd),
-     cl::init(Default));
-
-static cl::opt<DefaultOnOff> SplitDwarf("split-dwarf", cl::Hidden,
-     cl::desc("Output prototype dwarf split debug info."),
-     cl::values(
-                clEnumVal(Default, "Default for platform"),
-                clEnumVal(Enable, "Enabled"),
-                clEnumVal(Disable, "Disabled"),
-                clEnumValEnd),
-     cl::init(Default));
+namespace {
+enum DefaultOnOff {
+  Default,
+  Enable,
+  Disable
+};
+}
+
+static cl::opt<DefaultOnOff>
+DwarfAccelTables("dwarf-accel-tables", cl::Hidden,
+                 cl::desc("Output prototype dwarf accelerator tables."),
+                 cl::values(clEnumVal(Default, "Default for platform"),
+                            clEnumVal(Enable, "Enabled"),
+                            clEnumVal(Disable, "Disabled"), clEnumValEnd),
+                 cl::init(Default));
+
+static cl::opt<DefaultOnOff>
+DarwinGDBCompat("darwin-gdb-compat", cl::Hidden,
+                cl::desc("Compatibility with Darwin gdb."),
+                cl::values(clEnumVal(Default, "Default for platform"),
+                           clEnumVal(Enable, "Enabled"),
+                           clEnumVal(Disable, "Disabled"), clEnumValEnd),
+                cl::init(Default));
+
+static cl::opt<DefaultOnOff>
+SplitDwarf("split-dwarf", cl::Hidden,
+           cl::desc("Output prototype dwarf split debug info."),
+           cl::values(clEnumVal(Default, "Default for platform"),
+                      clEnumVal(Enable, "Enabled"),
+                      clEnumVal(Disable, "Disabled"), clEnumValEnd),
+           cl::init(Default));
 
 namespace {
-  const char *DWARFGroupName = "DWARF Emission";
-  const char *DbgTimerName = "DWARF Debug Writer";
+  const char *const DWARFGroupName = "DWARF Emission";
+  const char *const DbgTimerName = "DWARF Debug Writer";
 
   struct CompareFirst {
     template <typename T> bool operator()(const T &lhs, const T &rhs) const {
@@ -162,6 +169,14 @@ DIType DbgVariable::getType() const {
 
 } // end llvm namespace
 
+/// Return Dwarf Version by checking module flags.
+static unsigned getDwarfVersionFromModule(const Module *M) {
+  Value *Val = M->getModuleFlag("Dwarf Version");
+  if (!Val)
+    return dwarf::DWARF_VERSION;
+  return cast<ConstantInt>(Val)->getZExtValue();
+}
+
 DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
   : Asm(A), MMI(Asm->MMI), FirstCU(0),
     AbbreviationsSet(InitAbbreviationsSetSize),
@@ -204,6 +219,8 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
   else
     HasSplitDwarf = SplitDwarf == Enable ? true : false;
 
+  DwarfVersion = getDwarfVersionFromModule(MMI->getModule());
+
   {
     NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
     beginModule();
@@ -247,22 +264,21 @@ unsigned DwarfUnits::getStringPoolIndex(StringRef Str) {
   return Entry.second;
 }
 
-unsigned DwarfUnits::getAddrPoolIndex(MCSymbol *Sym) {
-  std::pair<MCSymbol*, unsigned> &Entry = AddressPool[Sym];
-  if (Entry.first) return Entry.second;
+unsigned DwarfUnits::getAddrPoolIndex(const MCSymbol *Sym) {
+  return getAddrPoolIndex(MCSymbolRefExpr::Create(Sym, Asm->OutContext));
+}
 
-  Entry.second = NextAddrPoolNumber++;
-  Entry.first = Sym;
-  return Entry.second;
+unsigned DwarfUnits::getAddrPoolIndex(const MCExpr *Sym) {
+  std::pair<DenseMap<const MCExpr *, unsigned>::iterator, bool> P =
+      AddressPool.insert(std::make_pair(Sym, NextAddrPoolNumber));
+  if (P.second)
+    ++NextAddrPoolNumber;
+  return P.first->second;
 }
 
 // Define a unique number for the abbreviation.
 //
 void DwarfUnits::assignAbbrevNumber(DIEAbbrev &Abbrev) {
-  // Profile the node so that we can make it unique.
-  FoldingSetNodeID ID;
-  Abbrev.Profile(ID);
-
   // Check the set for priors.
   DIEAbbrev *InSet = AbbreviationsSet->GetOrInsertNode(&Abbrev);
 
@@ -423,11 +439,11 @@ DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU,
   if (Scope->isAbstractScope())
     return ScopeDIE;
 
-  const SmallVector<InsnRange, 4> &Ranges = Scope->getRanges();
+  const SmallVectorImpl<InsnRange> &Ranges = Scope->getRanges();
   if (Ranges.empty())
     return 0;
 
-  SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin();
+  // If we have multiple ranges, emit them into the range section.
   if (Ranges.size() > 1) {
     // .debug_range section has not been laid out yet. Emit offset in
     // .debug_range as a uint, size 4, for now. emitDIE will handle
@@ -435,16 +451,20 @@ DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU,
     TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4,
                    DebugRangeSymbols.size()
                    * Asm->getDataLayout().getPointerSize());
-    for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(),
+    for (SmallVectorImpl<InsnRange>::const_iterator RI = Ranges.begin(),
          RE = Ranges.end(); RI != RE; ++RI) {
       DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first));
       DebugRangeSymbols.push_back(getLabelAfterInsn(RI->second));
     }
+
+    // Terminate the range list.
     DebugRangeSymbols.push_back(NULL);
     DebugRangeSymbols.push_back(NULL);
     return ScopeDIE;
   }
 
+  // Construct the address range for this DIE.
+  SmallVectorImpl<InsnRange>::const_iterator RI = Ranges.begin();
   MCSymbol *Start = getLabelBeforeInsn(RI->first);
   MCSymbol *End = getLabelAfterInsn(RI->second);
 
@@ -463,7 +483,7 @@ DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU,
 // represent this concrete inlined copy of the function.
 DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
                                           LexicalScope *Scope) {
-  const SmallVector<InsnRange, 4> &Ranges = Scope->getRanges();
+  const SmallVectorImpl<InsnRange> &Ranges = Scope->getRanges();
   assert(Ranges.empty() == false &&
          "LexicalScope does not have instruction markers!");
 
@@ -477,18 +497,6 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
     return NULL;
   }
 
-  SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin();
-  MCSymbol *StartLabel = getLabelBeforeInsn(RI->first);
-  MCSymbol *EndLabel = getLabelAfterInsn(RI->second);
-
-  if (StartLabel == 0 || EndLabel == 0) {
-    llvm_unreachable("Unexpected Start and End labels for an inlined scope!");
-  }
-  assert(StartLabel->isDefined() &&
-         "Invalid starting label for an inlined scope!");
-  assert(EndLabel->isDefined() &&
-         "Invalid end label for an inlined scope!");
-
   DIE *ScopeDIE = new DIE(dwarf::DW_TAG_inlined_subroutine);
   TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin,
                      dwarf::DW_FORM_ref4, OriginDIE);
@@ -500,7 +508,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
     TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4,
                    DebugRangeSymbols.size()
                    * Asm->getDataLayout().getPointerSize());
-    for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(),
+    for (SmallVectorImpl<InsnRange>::const_iterator RI = Ranges.begin(),
          RE = Ranges.end(); RI != RE; ++RI) {
       DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first));
       DebugRangeSymbols.push_back(getLabelAfterInsn(RI->second));
@@ -508,32 +516,46 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
     DebugRangeSymbols.push_back(NULL);
     DebugRangeSymbols.push_back(NULL);
   } else {
+    SmallVectorImpl<InsnRange>::const_iterator RI = Ranges.begin();
+    MCSymbol *StartLabel = getLabelBeforeInsn(RI->first);
+    MCSymbol *EndLabel = getLabelAfterInsn(RI->second);
+
+    if (StartLabel == 0 || EndLabel == 0)
+      llvm_unreachable("Unexpected Start and End labels for an inlined scope!");
+
+    assert(StartLabel->isDefined() &&
+           "Invalid starting label for an inlined scope!");
+    assert(EndLabel->isDefined() && "Invalid end label for an inlined scope!");
+
     TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_low_pc, StartLabel);
     TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_high_pc, EndLabel);
   }
 
   InlinedSubprogramDIEs.insert(OriginDIE);
 
-  // Track the start label for this inlined function.
-  //.debug_inlined section specification does not clearly state how
-  // to emit inlined scope that is split into multiple instruction ranges.
-  // For now, use first instruction range and emit low_pc/high_pc pair and
-  // corresponding .debug_inlined section entry for this pair.
-  DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator
-    I = InlineInfo.find(InlinedSP);
-
-  if (I == InlineInfo.end()) {
-    InlineInfo[InlinedSP].push_back(std::make_pair(StartLabel, ScopeDIE));
-    InlinedSPNodes.push_back(InlinedSP);
-  } else
-    I->second.push_back(std::make_pair(StartLabel, ScopeDIE));
-
+  // Add the call site information to the DIE.
   DILocation DL(Scope->getInlinedAt());
   TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0,
                  getOrCreateSourceID(DL.getFilename(), DL.getDirectory(),
                                      TheCU->getUniqueID()));
   TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber());
 
+  // Track the start label for this inlined function.
+  //.debug_inlined section specification does not clearly state how
+  // to emit inlined scopes that are split into multiple instruction ranges.
+  // For now, use the first instruction range and emit low_pc/high_pc pair and
+  // corresponding the .debug_inlined section entry for this pair.
+  if (Asm->MAI->doesDwarfUseInlineInfoSection()) {
+    MCSymbol *StartLabel = getLabelBeforeInsn(Ranges.begin()->first);
+    InlineInfoMap::iterator I = InlineInfo.find(InlinedSP);
+
+    if (I == InlineInfo.end()) {
+      InlineInfo[InlinedSP].push_back(std::make_pair(StartLabel, ScopeDIE));
+      InlinedSPNodes.push_back(InlinedSP);
+    } else
+      I->second.push_back(std::make_pair(StartLabel, ScopeDIE));
+  }
+
   // Add name to the name table, we do this here because we're guaranteed
   // to have concrete versions of our DW_TAG_inlined_subprogram nodes.
   addSubprogramNames(TheCU, InlinedSP, ScopeDIE);
@@ -566,14 +588,14 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
         }
 
   // Collect lexical scope children first.
-  const SmallVector<DbgVariable *, 8> &Variables = ScopeVariables.lookup(Scope);
+  const SmallVectorImpl<DbgVariable *> &Variables =ScopeVariables.lookup(Scope);
   for (unsigned i = 0, N = Variables.size(); i < N; ++i)
     if (DIE *Variable =
         TheCU->constructVariableDIE(Variables[i], Scope->isAbstractScope())) {
       Children.push_back(Variable);
       if (Variables[i]->isObjectPointer()) ObjectPointer = Variable;
     }
-  const SmallVector<LexicalScope *, 4> &Scopes = Scope->getChildren();
+  const SmallVectorImpl<LexicalScope *> &Scopes = Scope->getChildren();
   for (unsigned j = 0, M = Scopes.size(); j < M; ++j)
     if (DIE *Nested = constructScopeDIE(TheCU, Scopes[j]))
       Children.push_back(Nested);
@@ -601,14 +623,15 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
     if (Children.empty() && Range.first == Range.second)
       return NULL;
     ScopeDIE = constructLexicalScopeDIE(TheCU, Scope);
-    for (ImportedEntityMap::const_iterator i = Range.first; i != Range.second; ++i)
+    for (ImportedEntityMap::const_iterator i = Range.first; i != Range.second;
+         ++i)
       constructImportedEntityDIE(TheCU, i->second, ScopeDIE);
   }
 
   if (!ScopeDIE) return NULL;
 
   // Add children
-  for (SmallVector<DIE *, 8>::iterator I = Children.begin(),
+  for (SmallVectorImpl<DIE *>::iterator I = Children.begin(),
          E = Children.end(); I != E; ++I)
     ScopeDIE->addChild(*I);
 
@@ -812,7 +835,8 @@ void DwarfDebug::constructImportedEntityDIE(CompileUnit *TheCU,
                                         TheCU->getUniqueID());
   TheCU->addUInt(IMDie, dwarf::DW_AT_decl_file, 0, FileID);
   TheCU->addUInt(IMDie, dwarf::DW_AT_decl_line, 0, Module.getLineNumber());
-  TheCU->addDIEEntry(IMDie, dwarf::DW_AT_import, dwarf::DW_FORM_ref4, EntityDie);
+  TheCU->addDIEEntry(IMDie, dwarf::DW_AT_import, dwarf::DW_FORM_ref4,
+                     EntityDie);
   StringRef Name = Module.getName();
   if (!Name.empty())
     TheCU->addString(IMDie, dwarf::DW_AT_name, Name);
@@ -910,7 +934,7 @@ void DwarfDebug::collectDeadVariables() {
       for (unsigned i = 0, e = Subprograms.getNumElements(); i != e; ++i) {
         DISubprogram SP(Subprograms.getElement(i));
         if (ProcessedSPNodes.count(SP) != 0) continue;
-        if (!SP.Verify()) continue;
+        if (!SP.isSubprogram()) continue;
         if (!SP.isDefinition()) continue;
         DIArray Variables = SP.getVariables();
         if (Variables.getNumElements() == 0) continue;
@@ -926,10 +950,10 @@ void DwarfDebug::collectDeadVariables() {
         DIE *ScopeDIE = SPCU->getDIE(SP);
         for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) {
           DIVariable DV(Variables.getElement(vi));
-          if (!DV.Verify()) continue;
-          DbgVariable *NewVar = new DbgVariable(DV, NULL);
+          if (!DV.isVariable()) continue;
+          DbgVariable NewVar(DV, NULL);
           if (DIE *VariableDIE =
-              SPCU->constructVariableDIE(NewVar, Scope->isAbstractScope()))
+              SPCU->constructVariableDIE(&NewVar, Scope->isAbstractScope()))
             ScopeDIE->addChild(VariableDIE);
         }
       }
@@ -938,6 +962,136 @@ void DwarfDebug::collectDeadVariables() {
   DeleteContainerSeconds(DeadFnScopeMap);
 }
 
+// Type Signature [7.27] computation code.
+typedef ArrayRef<uint8_t> HashValue;
+
+/// \brief Grabs the string in whichever attribute is passed in and returns
+/// a reference to it. Returns "" if the attribute doesn't exist.
+static StringRef getDIEStringAttr(DIE *Die, unsigned Attr) {
+  DIEValue *V = Die->findAttribute(Attr);
+
+  if (DIEString *S = dyn_cast_or_null<DIEString>(V))
+    return S->getString();
+
+  return StringRef("");
+}
+
+/// \brief Adds the string in \p Str to the hash in \p Hash. This also hashes
+/// a trailing NULL with the string.
+static void addStringToHash(MD5 &Hash, StringRef Str) {
+  DEBUG(dbgs() << "Adding string " << Str << " to hash.\n");
+  Hash.update(Str);
+  Hash.update(makeArrayRef((uint8_t)'\0'));
+}
+
+// FIXME: These are copied and only slightly modified out of LEB128.h.
+
+/// \brief Adds the unsigned in \p N to the hash in \p Hash. This also encodes
+/// the unsigned as a ULEB128.
+static void addULEB128ToHash(MD5 &Hash, uint64_t Value) {
+  DEBUG(dbgs() << "Adding ULEB128 " << Value << " to hash.\n");
+  do {
+    uint8_t Byte = Value & 0x7f;
+    Value >>= 7;
+    if (Value != 0)
+      Byte |= 0x80; // Mark this byte to show that more bytes will follow.
+    Hash.update(Byte);
+  } while (Value != 0);
+}
+
+/// \brief Including \p Parent adds the context of Parent to \p Hash.
+static void addParentContextToHash(MD5 &Hash, DIE *Parent) {
+
+  DEBUG(dbgs() << "Adding parent context to hash...\n");
+
+  // [7.27.2] For each surrounding type or namespace beginning with the
+  // outermost such construct...
+  SmallVector<DIE *, 1> Parents;
+  while (Parent->getTag() != dwarf::DW_TAG_compile_unit) {
+    Parents.push_back(Parent);
+    Parent = Parent->getParent();
+  }
+
+  // Reverse iterate over our list to go from the outermost construct to the
+  // innermost.
+  for (SmallVectorImpl<DIE *>::reverse_iterator I = Parents.rbegin(),
+                                                E = Parents.rend();
+       I != E; ++I) {
+    DIE *Die = *I;
+
+    // ... Append the letter "C" to the sequence...
+    addULEB128ToHash(Hash, 'C');
+
+    // ... Followed by the DWARF tag of the construct...
+    addULEB128ToHash(Hash, Die->getTag());
+
+    // ... Then the name, taken from the DW_AT_name attribute.
+    StringRef Name = getDIEStringAttr(Die, dwarf::DW_AT_name);
+    DEBUG(dbgs() << "... adding context: " << Name << "\n");
+    if (!Name.empty())
+      addStringToHash(Hash, Name);
+  }
+}
+
+/// This is based on the type signature computation given in section 7.27 of the
+/// DWARF4 standard. It is the md5 hash of a flattened description of the DIE with
+/// the exception that we are hashing only the context and the name of the type.
+static void addDIEODRSignature(MD5 &Hash, CompileUnit *CU, DIE *Die) {
+
+  // Add the contexts to the hash. We won't be computing the ODR hash for
+  // function local types so it's safe to use the generic context hashing
+  // algorithm here.
+  // FIXME: If we figure out how to account for linkage in some way we could
+  // actually do this with a slight modification to the parent hash algorithm.
+  DIE *Parent = Die->getParent();
+  if (Parent)
+    addParentContextToHash(Hash, Parent);
+
+  // Add the current DIE information.
+
+  // Add the DWARF tag of the DIE.
+  addULEB128ToHash(Hash, Die->getTag());
+
+  // Add the name of the type to the hash.
+  addStringToHash(Hash, getDIEStringAttr(Die, dwarf::DW_AT_name));
+
+  // Now get the result.
+  MD5::MD5Result Result;
+  Hash.final(Result);
+
+  // ... take the least significant 8 bytes and store those as the attribute.
+  // Our MD5 implementation always returns its results in little endian, swap
+  // bytes appropriately.
+  uint64_t Signature = *reinterpret_cast<support::ulittle64_t *>(Result + 8);
+
+  // FIXME: This should be added onto the type unit, not the type, but this
+  // works as an intermediate stage.
+  CU->addUInt(Die, dwarf::DW_AT_GNU_odr_signature, dwarf::DW_FORM_data8,
+              Signature);
+}
+
+/// Return true if the current DIE is contained within an anonymous namespace.
+static bool isContainedInAnonNamespace(DIE *Die) {
+  DIE *Parent = Die->getParent();
+
+  while (Parent) {
+    if (Parent->getTag() == dwarf::DW_TAG_namespace &&
+        getDIEStringAttr(Parent, dwarf::DW_AT_name) == "")
+      return true;
+    Parent = Parent->getParent();
+  }
+
+  return false;
+}
+
+/// Test if the current CU language is C++ and that we have
+/// a named type that is not contained in an anonymous namespace.
+static bool shouldAddODRHash(CompileUnit *CU, DIE *Die) {
+  return CU->getLanguage() == dwarf::DW_LANG_C_plus_plus &&
+         getDIEStringAttr(Die, dwarf::DW_AT_name) != "" &&
+         !isContainedInAnonNamespace(Die);
+ }
+
 void DwarfDebug::finalizeModuleInfo() {
   // Collect info for variables that were optimized out.
   collectDeadVariables();
@@ -953,6 +1107,18 @@ void DwarfDebug::finalizeModuleInfo() {
     TheCU->constructContainingTypeDIEs();
   }
 
+  // Split out type units and conditionally add an ODR tag to the split
+  // out type.
+  // FIXME: Do type splitting.
+  for (unsigned i = 0, e = TypeUnits.size(); i != e; ++i) {
+    MD5 Hash;
+    DIE *Die = TypeUnits[i];
+    // If we've requested ODR hashes and it's applicable for an ODR hash then
+    // add the ODR signature now.
+    if (GenerateODRHash && shouldAddODRHash(CUMap.begin()->second, Die))
+      addDIEODRSignature(Hash, CUMap.begin()->second, Die);
+  }
+
    // Compute DIE offsets and sizes.
   InfoHolder.computeSizeAndOffsets();
   if (useSplitDwarf())
@@ -1074,7 +1240,7 @@ void DwarfDebug::endModule() {
          E = CUMap.end(); I != E; ++I)
     delete I->second;
 
-  for (SmallVector<CompileUnit *, 1>::iterator I = SkeletonCUs.begin(),
+  for (SmallVectorImpl<CompileUnit *>::iterator I = SkeletonCUs.begin(),
          E = SkeletonCUs.end(); I != E; ++I)
     delete *I;
 
@@ -1160,7 +1326,8 @@ static bool isDbgValueInDefinedReg(const MachineInstr *MI) {
   assert(MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!");
   return MI->getNumOperands() == 3 &&
          MI->getOperand(0).isReg() && MI->getOperand(0).getReg() &&
-         MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0;
+         (MI->getOperand(1).isImm() ||
+          (MI->getOperand(1).isReg() && MI->getOperand(1).getReg() == 0U));
 }
 
 // Get .debug_loc entry for the instruction range starting at MI.
@@ -1170,16 +1337,12 @@ static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm,
                                          const MachineInstr *MI) {
   const MDNode *Var =  MI->getOperand(MI->getNumOperands() - 1).getMetadata();
 
-  if (MI->getNumOperands() != 3) {
-    MachineLocation MLoc = Asm->getDebugValueLocation(MI);
-    return DotDebugLocEntry(FLabel, SLabel, MLoc, Var);
-  }
-  if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm()) {
+  assert(MI->getNumOperands() == 3);
+  if (MI->getOperand(0).isReg()) {
     MachineLocation MLoc;
-    // TODO: Currently an offset of 0 in a DBG_VALUE means
-    // we need to generate a direct register value.
-    // There is no way to specify an indirect value with offset 0.
-    if (MI->getOperand(1).getImm() == 0)
+    // If the second operand is an immediate, this is a
+    // register-indirect address.
+    if (!MI->getOperand(1).isImm())
       MLoc.set(MI->getOperand(0).getReg());
     else
       MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
@@ -1200,7 +1363,7 @@ void
 DwarfDebug::collectVariableInfo(const MachineFunction *MF,
                                 SmallPtrSet<const MDNode *, 16> &Processed) {
 
-  // collection info from MMI table.
+  // Grab the variable info that was squirreled away in the MMI side-table.
   collectVariableInfoFromMMITable(MF, Processed);
 
   for (SmallVectorImpl<const MDNode*>::const_iterator
@@ -1293,7 +1456,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
   DIArray Variables = DISubprogram(FnScope->getScopeNode()).getVariables();
   for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) {
     DIVariable DV(Variables.getElement(i));
-    if (!DV || !DV.Verify() || !Processed.insert(DV))
+    if (!DV || !DV.isVariable() || !Processed.insert(DV))
       continue;
     if (LexicalScope *Scope = LScopes.findLexicalScope(DV.getContext()))
       addScopeVariable(Scope, new DbgVariable(DV, NULL));
@@ -1390,19 +1553,19 @@ void DwarfDebug::identifyScopeMarkers() {
   while (!WorkList.empty()) {
     LexicalScope *S = WorkList.pop_back_val();
 
-    const SmallVector<LexicalScope *, 4> &Children = S->getChildren();
+    const SmallVectorImpl<LexicalScope *> &Children = S->getChildren();
     if (!Children.empty())
-      for (SmallVector<LexicalScope *, 4>::const_iterator SI = Children.begin(),
+      for (SmallVectorImpl<LexicalScope *>::const_iterator SI = Children.begin(),
              SE = Children.end(); SI != SE; ++SI)
         WorkList.push_back(*SI);
 
     if (S->isAbstractScope())
       continue;
 
-    const SmallVector<InsnRange, 4> &Ranges = S->getRanges();
+    const SmallVectorImpl<InsnRange> &Ranges = S->getRanges();
     if (Ranges.empty())
       continue;
-    for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(),
+    for (SmallVectorImpl<InsnRange>::const_iterator RI = Ranges.begin(),
            RE = Ranges.end(); RI != RE; ++RI) {
       assert(RI->first && "InsnRange does not have first instruction!");
       assert(RI->second && "InsnRange does not have second instruction!");
@@ -1424,7 +1587,7 @@ static MDNode *getScopeNode(DebugLoc DL, const LLVMContext &Ctx) {
 static DebugLoc getFnDebugLoc(DebugLoc DL, const LLVMContext &Ctx) {
   const MDNode *Scope = getScopeNode(DL, Ctx);
   DISubprogram SP = getDISubprogram(Scope);
-  if (SP.Verify()) {
+  if (SP.isSubprogram()) {
     // Check for number of operands since the compatibility is
     // cheap here.
     if (SP->getNumOperands() > 19)
@@ -1492,7 +1655,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
           // The first mention of a function argument gets the FunctionBeginSym
           // label, so arguments are visible when breaking at function entry.
           DIVariable DV(Var);
-          if (DV.Verify() && DV.getTag() == dwarf::DW_TAG_arg_variable &&
+          if (DV.isVariable() && DV.getTag() == dwarf::DW_TAG_arg_variable &&
               DISubprogram(getDISubprogram(DV.getContext()))
                 .describes(MF->getFunction()))
             LabelsBeforeInsn[MI] = FunctionBeginSym;
@@ -1522,11 +1685,9 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
                 DEBUG(dbgs() << "Dropping DBG_VALUE for empty range:\n"
                       << "\t" << *Prev << "\n");
                 History.pop_back();
-              }
-              else {
+              } else if (llvm::next(PrevMBB) != PrevMBB->getParent()->end())
                 // Terminate after LastMI.
                 History.push_back(LastMI);
-              }
             }
           }
         }
@@ -1595,7 +1756,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
       if (LastMI == PrevMBB->end())
         // Drop DBG_VALUE for empty range.
         History.pop_back();
-      else {
+      else if (PrevMBB != &PrevMBB->getParent()->back()) {
         // Terminate after LastMI.
         History.push_back(LastMI);
       }
@@ -1680,12 +1841,12 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
   for (unsigned i = 0, e = AList.size(); i != e; ++i) {
     LexicalScope *AScope = AList[i];
     DISubprogram SP(AScope->getScopeNode());
-    if (SP.Verify()) {
+    if (SP.isSubprogram()) {
       // Collect info for variables that were optimized out.
       DIArray Variables = SP.getVariables();
       for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) {
         DIVariable DV(Variables.getElement(i));
-        if (!DV || !DV.Verify() || !ProcessedVars.insert(DV))
+        if (!DV || !DV.isVariable() || !ProcessedVars.insert(DV))
           continue;
         // Check that DbgVariable for DV wasn't created earlier, when
         // findAbstractVariable() was called for inlined instance of DV.
@@ -1707,7 +1868,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
     TheCU->addFlag(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr);
 
   // Clear debug info
-  for (DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8> >::iterator
+  for (ScopeVariablesMap::iterator
          I = ScopeVariables.begin(), E = ScopeVariables.end(); I != E; ++I)
     DeleteContainerPointers(I->second);
   ScopeVariables.clear();
@@ -1904,7 +2065,8 @@ void DwarfDebug::emitDIE(DIE *Die, std::vector<DIEAbbrev *> *Abbrevs) {
         DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
         Addr += Holder.getCUOffset(Origin->getCompileUnit());
       }
-      Asm->EmitInt32(Addr);
+      Asm->OutStreamer.EmitIntValue(Addr,
+          Form == dwarf::DW_FORM_ref_addr ? DIEEntry::getRefAddrSize(Asm) : 4);
       break;
     }
     case dwarf::DW_AT_ranges: {
@@ -1988,7 +2150,7 @@ void DwarfUnits::emitUnits(DwarfDebug *DD,
     Asm->OutStreamer.AddComment("Length of Compilation Unit Info");
     Asm->EmitInt32(ContentSize);
     Asm->OutStreamer.AddComment("DWARF version number");
-    Asm->EmitInt16(dwarf::DWARF_VERSION);
+    Asm->EmitInt16(DD->getDwarfVersion());
     Asm->OutStreamer.AddComment("Offset Into Abbrev. Section");
     Asm->EmitSectionOffset(Asm->GetTempSymbol(ASection->getLabelBeginName()),
                            ASectionSym);
@@ -2229,7 +2391,7 @@ void DwarfDebug::emitDebugPubnames() {
     Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_begin", ID));
 
     Asm->OutStreamer.AddComment("DWARF Version");
-    Asm->EmitInt16(dwarf::DWARF_VERSION);
+    Asm->EmitInt16(DwarfVersion);
 
     Asm->OutStreamer.AddComment("Offset of Compilation Unit Info");
     Asm->EmitSectionOffset(Asm->GetTempSymbol(ISec->getLabelBeginName(), ID),
@@ -2251,7 +2413,7 @@ void DwarfDebug::emitDebugPubnames() {
 
       if (Asm->isVerbose())
         Asm->OutStreamer.AddComment("External Name");
-      Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1), 0);
+      Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1));
     }
 
     Asm->OutStreamer.AddComment("End Mark");
@@ -2276,7 +2438,7 @@ void DwarfDebug::emitDebugPubTypes() {
                                                   TheCU->getUniqueID()));
 
     if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DWARF Version");
-    Asm->EmitInt16(dwarf::DWARF_VERSION);
+    Asm->EmitInt16(DwarfVersion);
 
     Asm->OutStreamer.AddComment("Offset of Compilation Unit Info");
     const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection();
@@ -2363,24 +2525,18 @@ void DwarfUnits::emitAddresses(const MCSection *AddrSection) {
   // Start the dwarf addr section.
   Asm->OutStreamer.SwitchSection(AddrSection);
 
-  // Get all of the string pool entries and put them in an array by their ID so
-  // we can sort them.
-  SmallVector<std::pair<unsigned,
-                        std::pair<MCSymbol*, unsigned>* >, 64> Entries;
+  // Order the address pool entries by ID
+  SmallVector<const MCExpr *, 64> Entries(AddressPool.size());
 
-  for (DenseMap<MCSymbol*, std::pair<MCSymbol*, unsigned> >::iterator
-         I = AddressPool.begin(), E = AddressPool.end();
+  for (DenseMap<const MCExpr *, unsigned>::iterator I = AddressPool.begin(),
+                                                    E = AddressPool.end();
        I != E; ++I)
-    Entries.push_back(std::make_pair(I->second.second, &(I->second)));
-
-  array_pod_sort(Entries.begin(), Entries.end());
+    Entries[I->second] = I->first;
 
   for (unsigned i = 0, e = Entries.size(); i != e; ++i) {
-    // Emit a label for reference from debug information entries.
-    MCSymbol *Sym = Entries[i].second->first;
-    if (Sym)
-      Asm->EmitLabelReference(Entries[i].second->first,
-                              Asm->getDataLayout().getPointerSize());
+    // Emit an expression for reference from debug information entries.
+    if (const MCExpr *Expr = Entries[i])
+      Asm->OutStreamer.EmitValue(Expr, Asm->getDataLayout().getPointerSize());
     else
       Asm->OutStreamer.EmitIntValue(0, Asm->getDataLayout().getPointerSize());
   }
@@ -2393,7 +2549,7 @@ void DwarfDebug::emitDebugStr() {
   Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection());
 }
 
-// Emit visible names into a debug loc section.
+// Emit locations into the debug loc section.
 void DwarfDebug::emitDebugLoc() {
   if (DotDebugLocEntries.empty())
     return;
@@ -2422,9 +2578,9 @@ void DwarfDebug::emitDebugLoc() {
       Asm->OutStreamer.EmitIntValue(0, Size);
       Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", index));
     } else {
-      Asm->OutStreamer.EmitSymbolValue(Entry.Begin, Size);
-      Asm->OutStreamer.EmitSymbolValue(Entry.End, Size);
-      DIVariable DV(Entry.Variable);
+      Asm->OutStreamer.EmitSymbolValue(Entry.getBeginSym(), Size);
+      Asm->OutStreamer.EmitSymbolValue(Entry.getEndSym(), Size);
+      DIVariable DV(Entry.getVariable());
       Asm->OutStreamer.AddComment("Loc expr size");
       MCSymbol *begin = Asm->OutStreamer.getContext().CreateTempSymbol();
       MCSymbol *end = Asm->OutStreamer.getContext().CreateTempSymbol();
@@ -2444,17 +2600,18 @@ void DwarfDebug::emitDebugLoc() {
           Asm->EmitULEB128(Entry.getInt());
         }
       } else if (Entry.isLocation()) {
+        MachineLocation Loc = Entry.getLoc();
         if (!DV.hasComplexAddress())
           // Regular entry.
-          Asm->EmitDwarfRegOp(Entry.Loc);
+          Asm->EmitDwarfRegOp(Loc, DV.isIndirect());
         else {
           // Complex address entry.
           unsigned N = DV.getNumAddrElements();
           unsigned i = 0;
           if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) {
-            if (Entry.Loc.getOffset()) {
+            if (Loc.getOffset()) {
               i = 2;
-              Asm->EmitDwarfRegOp(Entry.Loc);
+              Asm->EmitDwarfRegOp(Loc, DV.isIndirect());
               Asm->OutStreamer.AddComment("DW_OP_deref");
               Asm->EmitInt8(dwarf::DW_OP_deref);
               Asm->OutStreamer.AddComment("DW_OP_plus_uconst");
@@ -2463,12 +2620,12 @@ void DwarfDebug::emitDebugLoc() {
             } else {
               // If first address element is OpPlus then emit
               // DW_OP_breg + Offset instead of DW_OP_reg + Offset.
-              MachineLocation Loc(Entry.Loc.getReg(), DV.getAddrElement(1));
-              Asm->EmitDwarfRegOp(Loc);
+              MachineLocation TLoc(Loc.getReg(), DV.getAddrElement(1));
+              Asm->EmitDwarfRegOp(TLoc, DV.isIndirect());
               i = 2;
             }
           } else {
-            Asm->EmitDwarfRegOp(Entry.Loc);
+            Asm->EmitDwarfRegOp(Loc, DV.isIndirect());
           }
 
           // Emit remaining complex address elements.
@@ -2478,7 +2635,7 @@ void DwarfDebug::emitDebugLoc() {
               Asm->EmitInt8(dwarf::DW_OP_plus_uconst);
               Asm->EmitULEB128(DV.getAddrElement(++i));
             } else if (Element == DIBuilder::OpDeref) {
-              if (!Entry.Loc.isReg())
+              if (!Loc.isReg())
                 Asm->EmitInt8(dwarf::DW_OP_deref);
             } else
               llvm_unreachable("unknown Opcode found in complex address");
@@ -2559,7 +2716,7 @@ void DwarfDebug::emitDebugInlineInfo() {
   Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_inlined_begin", 1));
 
   Asm->OutStreamer.AddComment("Dwarf Version");
-  Asm->EmitInt16(dwarf::DWARF_VERSION);
+  Asm->EmitInt16(DwarfVersion);
   Asm->OutStreamer.AddComment("Address Size (in bytes)");
   Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
 
@@ -2567,8 +2724,7 @@ void DwarfDebug::emitDebugInlineInfo() {
          E = InlinedSPNodes.end(); I != E; ++I) {
 
     const MDNode *Node = *I;
-    DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator II
-      = InlineInfo.find(Node);
+    InlineInfoMap::iterator II = InlineInfo.find(Node);
     SmallVectorImpl<InlineInfoLabels> &Labels = II->second;
     DISubprogram SP(Node);
     StringRef LName = SP.getLinkageName();
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 6cc792b..e14f9b1 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -41,7 +41,6 @@ class DIEAbbrev;
 class DIE;
 class DIEBlock;
 class DIEEntry;
-class DwarfDebug;
 
 //===----------------------------------------------------------------------===//
 /// \brief This class is used to record source line correspondence.
@@ -63,13 +62,12 @@ public:
 
 /// \brief This struct describes location entries emitted in the .debug_loc
 /// section.
-typedef struct DotDebugLocEntry {
+class DotDebugLocEntry {
+  // Begin and end symbols for the address range that this location is valid.
   const MCSymbol *Begin;
   const MCSymbol *End;
-  MachineLocation Loc;
-  const MDNode *Variable;
-  bool Merged;
-  bool Constant;
+
+  // Type of entry that this represents.
   enum EntryType {
     E_Location,
     E_Integer,
@@ -83,23 +81,42 @@ typedef struct DotDebugLocEntry {
     const ConstantFP *CFP;
     const ConstantInt *CIP;
   } Constants;
-  DotDebugLocEntry()
-    : Begin(0), End(0), Variable(0), Merged(false),
-      Constant(false) { Constants.Int = 0;}
+
+  // The location in the machine frame.
+  MachineLocation Loc;
+
+  // The variable to which this location entry corresponds.
+  const MDNode *Variable;
+
+  // Whether this location has been merged.
+  bool Merged;
+
+public:
+  DotDebugLocEntry() : Begin(0), End(0), Variable(0), Merged(false) {
+    Constants.Int = 0;
+  }
   DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, MachineLocation &L,
                    const MDNode *V)
-    : Begin(B), End(E), Loc(L), Variable(V), Merged(false),
-      Constant(false) { Constants.Int = 0; EntryKind = E_Location; }
+      : Begin(B), End(E), Loc(L), Variable(V), Merged(false) {
+    Constants.Int = 0;
+    EntryKind = E_Location;
+  }
   DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, int64_t i)
-    : Begin(B), End(E), Variable(0), Merged(false),
-      Constant(true) { Constants.Int = i; EntryKind = E_Integer; }
+      : Begin(B), End(E), Variable(0), Merged(false) {
+    Constants.Int = i;
+    EntryKind = E_Integer;
+  }
   DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantFP *FPtr)
-    : Begin(B), End(E), Variable(0), Merged(false),
-      Constant(true) { Constants.CFP = FPtr; EntryKind = E_ConstantFP; }
+      : Begin(B), End(E), Variable(0), Merged(false) {
+    Constants.CFP = FPtr;
+    EntryKind = E_ConstantFP;
+  }
   DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E,
                    const ConstantInt *IPtr)
-    : Begin(B), End(E), Variable(0), Merged(false),
-      Constant(true) { Constants.CIP = IPtr; EntryKind = E_ConstantInt; }
+      : Begin(B), End(E), Variable(0), Merged(false) {
+    Constants.CIP = IPtr;
+    EntryKind = E_ConstantInt;
+  }
 
   /// \brief Empty entries are also used as a trigger to emit temp label. Such
   /// labels are referenced is used to find debug_loc offset for a given DIE.
@@ -115,10 +132,14 @@ typedef struct DotDebugLocEntry {
   bool isInt() const         { return EntryKind == E_Integer; }
   bool isConstantFP() const  { return EntryKind == E_ConstantFP; }
   bool isConstantInt() const { return EntryKind == E_ConstantInt; }
-  int64_t getInt()                    { return Constants.Int; }
-  const ConstantFP *getConstantFP()   { return Constants.CFP; }
-  const ConstantInt *getConstantInt() { return Constants.CIP; }
-} DotDebugLocEntry;
+  int64_t getInt() const                    { return Constants.Int; }
+  const ConstantFP *getConstantFP() const   { return Constants.CFP; }
+  const ConstantInt *getConstantInt() const { return Constants.CIP; }
+  const MDNode *getVariable() const { return Variable; }
+  const MCSymbol *getBeginSym() const { return Begin; }
+  const MCSymbol *getEndSym() const { return End; }
+  MachineLocation getLoc() const { return Loc; }
+};
 
 //===----------------------------------------------------------------------===//
 /// \brief This class is used to track local variable information.
@@ -172,15 +193,15 @@ public:
   }
 
   bool variableHasComplexAddress()   const {
-    assert(Var.Verify() && "Invalid complex DbgVariable!");
+    assert(Var.isVariable() && "Invalid complex DbgVariable!");
     return Var.hasComplexAddress();
   }
   bool isBlockByrefVariable()        const {
-    assert(Var.Verify() && "Invalid complex DbgVariable!");
+    assert(Var.isVariable() && "Invalid complex DbgVariable!");
     return Var.isBlockByrefVariable();
   }
   unsigned getNumAddrElements()      const {
-    assert(Var.Verify() && "Invalid complex DbgVariable!");
+    assert(Var.isVariable() && "Invalid complex DbgVariable!");
     return Var.getNumAddrElements();
   }
   uint64_t getAddrElement(unsigned i) const {
@@ -189,16 +210,6 @@ public:
   DIType getType() const;
 };
 
-
-// A String->Symbol mapping of strings used by indirect
-// references.
-typedef StringMap<std::pair<MCSymbol*, unsigned>,
-                  BumpPtrAllocator&> StrPool;
-
-// A Symbol->pair<Symbol, unsigned> mapping of addresses used by indirect
-// references.
-typedef DenseMap<MCSymbol *, std::pair<MCSymbol *, unsigned> > AddrPool;
-
 /// \brief Collects and handles information specific to a particular
 /// collection of units.
 class DwarfUnits {
@@ -215,21 +226,28 @@ class DwarfUnits {
   SmallVector<CompileUnit *, 1> CUs;
 
   // Collection of strings for this unit and assorted symbols.
+  // A String->Symbol mapping of strings used by indirect
+  // references.
+  typedef StringMap<std::pair<MCSymbol*, unsigned>,
+                    BumpPtrAllocator&> StrPool;
   StrPool StringPool;
   unsigned NextStringPoolNumber;
   std::string StringPref;
 
   // Collection of addresses for this unit and assorted labels.
+  // A Symbol->unsigned mapping of addresses used by indirect
+  // references.
+  typedef DenseMap<const MCExpr *, unsigned> AddrPool;
   AddrPool AddressPool;
   unsigned NextAddrPoolNumber;
 
 public:
   DwarfUnits(AsmPrinter *AP, FoldingSet<DIEAbbrev> *AS,
              std::vector<DIEAbbrev *> *A, const char *Pref,
-             BumpPtrAllocator &DA) :
-    Asm(AP), AbbreviationsSet(AS), Abbreviations(A),
-    StringPool(DA), NextStringPoolNumber(0), StringPref(Pref),
-    AddressPool(), NextAddrPoolNumber(0) {}
+             BumpPtrAllocator &DA)
+      : Asm(AP), AbbreviationsSet(AS), Abbreviations(A), StringPool(DA),
+        NextStringPoolNumber(0), StringPref(Pref), AddressPool(),
+        NextAddrPoolNumber(0) {}
 
   /// \brief Compute the size and offset of a DIE given an incoming Offset.
   unsigned computeSizeAndOffset(DIE *Die, unsigned Offset);
@@ -245,14 +263,15 @@ public:
 
   /// \brief Emit all of the units to the section listed with the given
   /// abbreviation section.
-  void emitUnits(DwarfDebug *, const MCSection *, const MCSection *,
-                 const MCSymbol *);
+  void emitUnits(DwarfDebug *DD, const MCSection *USection,
+                 const MCSection *ASection, const MCSymbol *ASectionSym);
 
   /// \brief Emit all of the strings to the section given.
-  void emitStrings(const MCSection *, const MCSection *, const MCSymbol *);
+  void emitStrings(const MCSection *StrSection, const MCSection *OffsetSection,
+                   const MCSymbol *StrSecSym);
 
   /// \brief Emit all of the addresses to the section given.
-  void emitAddresses(const MCSection *);
+  void emitAddresses(const MCSection *AddrSection);
 
   /// \brief Returns the entry into the start of the pool.
   MCSymbol *getStringPoolSym();
@@ -270,7 +289,8 @@ public:
 
   /// \brief Returns the index into the address pool with the given
   /// label/symbol.
-  unsigned getAddrPoolIndex(MCSymbol *);
+  unsigned getAddrPoolIndex(const MCExpr *Sym);
+  unsigned getAddrPoolIndex(const MCSymbol *Sym);
 
   /// \brief Returns the address pool.
   AddrPool *getAddrPool() { return &AddressPool; }
@@ -291,10 +311,7 @@ class DwarfDebug {
   // All DIEValues are allocated through this allocator.
   BumpPtrAllocator DIEValueAllocator;
 
-  //===--------------------------------------------------------------------===//
-  // Attribute used to construct specific Dwarf sections.
-  //
-
+  // Handle to the a compile unit used for the inline extension handling.
   CompileUnit *FirstCU;
 
   // Maps MDNode with its corresponding CompileUnit.
@@ -318,7 +335,7 @@ class DwarfDebug {
   // Provides a unique id per text section.
   SetVector<const MCSection*> SectionMap;
 
-  // List of Arguments (DbgValues) for current function.
+  // List of arguments for current function.
   SmallVector<DbgVariable *, 8> CurrentFnArguments;
 
   LexicalScopes LScopes;
@@ -327,7 +344,9 @@ class DwarfDebug {
   DenseMap<const MDNode *, DIE *> AbstractSPDies;
 
   // Collection of dbg variables of a scope.
-  DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8> > ScopeVariables;
+  typedef DenseMap<LexicalScope *,
+                   SmallVector<DbgVariable *, 8> > ScopeVariablesMap;
+  ScopeVariablesMap ScopeVariables;
 
   // Collection of abstract variables.
   DenseMap<const MDNode *, DbgVariable *> AbstractVariables;
@@ -342,7 +361,9 @@ class DwarfDebug {
   // Keep track of inlined functions and their location.  This
   // information is used to populate the debug_inlined section.
   typedef std::pair<const MCSymbol *, DIE *> InlineInfoLabels;
-  DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> > InlineInfo;
+  typedef DenseMap<const MDNode *,
+                   SmallVector<InlineInfoLabels, 4> > InlineInfoMap;
+  InlineInfoMap InlineInfo;
   SmallVector<const MDNode *, 4> InlinedSPNodes;
 
   // This is a collection of subprogram MDNodes that are processed to
@@ -387,7 +408,7 @@ class DwarfDebug {
   MCSymbol *DwarfAbbrevDWOSectionSym, *DwarfStrDWOSectionSym;
 
   // As an optimization, there is no need to emit an entry in the directory
-  // table for the same directory as DW_at_comp_dir.
+  // table for the same directory as DW_AT_comp_dir.
   StringRef CompilationDir;
 
   // Counter for assigning globally unique IDs for CUs.
@@ -402,10 +423,20 @@ class DwarfDebug {
   // Whether or not we're emitting info for older versions of gdb on darwin.
   bool IsDarwinGDBCompat;
 
+  // Holder for imported entities.
+  typedef SmallVector<std::pair<const MDNode *, const MDNode *>, 32>
+    ImportedEntityMap;
+  ImportedEntityMap ScopesWithImportedEntities;
+
+  // Holder for types that are going to be extracted out into a type unit.
+  std::vector<DIE *> TypeUnits;
+
   // DWARF5 Experimental Options
   bool HasDwarfAccelTables;
   bool HasSplitDwarf;
 
+  unsigned DwarfVersion;
+
   // Separated Dwarf Variables
   // In general these will all be for bits that are left in the
   // original object file, rather than things that are meant
@@ -423,10 +454,6 @@ class DwarfDebug {
   // Holder for the skeleton information.
   DwarfUnits SkeletonHolder;
 
-  typedef SmallVector<std::pair<const MDNode *, const MDNode *>, 32>
-    ImportedEntityMap;
-  ImportedEntityMap ScopesWithImportedEntities;
-
 private:
 
   void addScopeVariable(LexicalScope *LS, DbgVariable *Var);
@@ -627,6 +654,10 @@ public:
   /// \brief Process end of an instruction.
   void endInstruction(const MachineInstr *MI);
 
+  /// \brief Add a DIE to the set of types that we're going to pull into
+  /// type units.
+  void addTypeUnitType(DIE *Die) { TypeUnits.push_back(Die); }
+
   /// \brief Look up the source id with the given directory and source file
   /// names. If none currently exists, create a new id and insert it in the
   /// SourceIds map.
@@ -649,6 +680,9 @@ public:
   /// \brief Returns whether or not to change the current debug info for the
   /// split dwarf proposal support.
   bool useSplitDwarf() { return HasSplitDwarf; }
+
+  /// Returns the Dwarf Version.
+  unsigned getDwarfVersion() const { return DwarfVersion; }
 };
 } // End of namespace llvm
 
diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp
index 92a5bb7..b48b817 100644
--- a/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -26,18 +26,20 @@ using namespace llvm;
 namespace {
 
 class BasicTTI : public ImmutablePass, public TargetTransformInfo {
-  const TargetLoweringBase *TLI;
+  const TargetMachine *TM;
 
   /// Estimate the overhead of scalarizing an instruction. Insert and Extract
   /// are set if the result needs to be inserted and/or extracted from vectors.
   unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
 
+  const TargetLoweringBase *getTLI() const { return TM->getTargetLowering(); }
+
 public:
-  BasicTTI() : ImmutablePass(ID), TLI(0) {
+  BasicTTI() : ImmutablePass(ID), TM(0) {
     llvm_unreachable("This pass cannot be directly constructed");
   }
 
-  BasicTTI(const TargetLoweringBase *TLI) : ImmutablePass(ID), TLI(TLI) {
+  BasicTTI(const TargetMachine *TM) : ImmutablePass(ID), TM(TM) {
     initializeBasicTTIPass(*PassRegistry::getPassRegistry());
   }
 
@@ -63,6 +65,8 @@ public:
     return this;
   }
 
+  virtual bool hasBranchDivergence() const;
+
   /// \name Scalar TTI Implementations
   /// @{
 
@@ -106,7 +110,7 @@ public:
   virtual unsigned getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy,
                                          ArrayRef<Type*> Tys) const;
   virtual unsigned getNumberOfParts(Type *Tp) const;
-  virtual unsigned getAddressComputationCost(Type *Ty) const;
+  virtual unsigned getAddressComputationCost(Type *Ty, bool IsComplex) const;
 
   /// @}
 };
@@ -118,17 +122,18 @@ INITIALIZE_AG_PASS(BasicTTI, TargetTransformInfo, "basictti",
 char BasicTTI::ID = 0;
 
 ImmutablePass *
-llvm::createBasicTargetTransformInfoPass(const TargetLoweringBase *TLI) {
-  return new BasicTTI(TLI);
+llvm::createBasicTargetTransformInfoPass(const TargetMachine *TM) {
+  return new BasicTTI(TM);
 }
 
+bool BasicTTI::hasBranchDivergence() const { return false; }
 
 bool BasicTTI::isLegalAddImmediate(int64_t imm) const {
-  return TLI->isLegalAddImmediate(imm);
+  return getTLI()->isLegalAddImmediate(imm);
 }
 
 bool BasicTTI::isLegalICmpImmediate(int64_t imm) const {
-  return TLI->isLegalICmpImmediate(imm);
+  return getTLI()->isLegalICmpImmediate(imm);
 }
 
 bool BasicTTI::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
@@ -139,7 +144,7 @@ bool BasicTTI::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
   AM.BaseOffs = BaseOffset;
   AM.HasBaseReg = HasBaseReg;
   AM.Scale = Scale;
-  return TLI->isLegalAddressingMode(AM, Ty);
+  return getTLI()->isLegalAddressingMode(AM, Ty);
 }
 
 int BasicTTI::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
@@ -150,27 +155,28 @@ int BasicTTI::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
   AM.BaseOffs = BaseOffset;
   AM.HasBaseReg = HasBaseReg;
   AM.Scale = Scale;
-  return TLI->getScalingFactorCost(AM, Ty);
+  return getTLI()->getScalingFactorCost(AM, Ty);
 }
 
 bool BasicTTI::isTruncateFree(Type *Ty1, Type *Ty2) const {
-  return TLI->isTruncateFree(Ty1, Ty2);
+  return getTLI()->isTruncateFree(Ty1, Ty2);
 }
 
 bool BasicTTI::isTypeLegal(Type *Ty) const {
-  EVT T = TLI->getValueType(Ty);
-  return TLI->isTypeLegal(T);
+  EVT T = getTLI()->getValueType(Ty);
+  return getTLI()->isTypeLegal(T);
 }
 
 unsigned BasicTTI::getJumpBufAlignment() const {
-  return TLI->getJumpBufAlignment();
+  return getTLI()->getJumpBufAlignment();
 }
 
 unsigned BasicTTI::getJumpBufSize() const {
-  return TLI->getJumpBufSize();
+  return getTLI()->getJumpBufSize();
 }
 
 bool BasicTTI::shouldBuildLookupTables() const {
+  const TargetLoweringBase *TLI = getTLI();
   return TLI->supportJumpTables() &&
       (TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
        TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
@@ -213,6 +219,7 @@ unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
                                           OperandValueKind,
                                           OperandValueKind) const {
   // Check if any of the operands are vector operands.
+  const TargetLoweringBase *TLI = getTLI();
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
   assert(ISD && "Invalid opcode");
 
@@ -259,6 +266,7 @@ unsigned BasicTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
 
 unsigned BasicTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
                                     Type *Src) const {
+  const TargetLoweringBase *TLI = getTLI();
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
   assert(ISD && "Invalid opcode");
 
@@ -352,6 +360,7 @@ unsigned BasicTTI::getCFInstrCost(unsigned Opcode) const {
 
 unsigned BasicTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
                                       Type *CondTy) const {
+  const TargetLoweringBase *TLI = getTLI();
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
   assert(ISD && "Invalid opcode");
 
@@ -396,7 +405,7 @@ unsigned BasicTTI::getMemoryOpCost(unsigned Opcode, Type *Src,
                                    unsigned Alignment,
                                    unsigned AddressSpace) const {
   assert(!Src->isVoidTy() && "Invalid type");
-  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
+  std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Src);
 
   // Assume that all loads of legal types cost 1.
   return LT.first;
@@ -437,12 +446,18 @@ unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
   case Intrinsic::floor:   ISD = ISD::FFLOOR; break;
   case Intrinsic::ceil:    ISD = ISD::FCEIL;  break;
   case Intrinsic::trunc:   ISD = ISD::FTRUNC; break;
+  case Intrinsic::nearbyint:
+                           ISD = ISD::FNEARBYINT; break;
   case Intrinsic::rint:    ISD = ISD::FRINT;  break;
   case Intrinsic::pow:     ISD = ISD::FPOW;   break;
   case Intrinsic::fma:     ISD = ISD::FMA;    break;
   case Intrinsic::fmuladd: ISD = ISD::FMA;    break; // FIXME: mul + add?
+  case Intrinsic::lifetime_start:
+  case Intrinsic::lifetime_end:
+    return 0;
   }
 
+  const TargetLoweringBase *TLI = getTLI();
   std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(RetTy);
 
   if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
@@ -476,10 +491,10 @@ unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
 }
 
 unsigned BasicTTI::getNumberOfParts(Type *Tp) const {
-  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+  std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Tp);
   return LT.first;
 }
 
-unsigned BasicTTI::getAddressComputationCost(Type *Ty) const {
+unsigned BasicTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
   return 0;
 }
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index bf0ea5a..9cd4208 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -406,7 +406,8 @@ void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
 /// MBB so that the part before the iterator falls into the part starting at the
 /// iterator.  This returns the new MBB.
 MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
-                                            MachineBasicBlock::iterator BBI1) {
+                                            MachineBasicBlock::iterator BBI1,
+                                            const BasicBlock *BB) {
   if (!TII->isLegalToSplitMBBAt(CurMBB, BBI1))
     return 0;
 
@@ -414,7 +415,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
 
   // Create the fall-through block.
   MachineFunction::iterator MBBI = &CurMBB;
-  MachineBasicBlock *NewMBB =MF.CreateMachineBasicBlock(CurMBB.getBasicBlock());
+  MachineBasicBlock *NewMBB =MF.CreateMachineBasicBlock(BB);
   CurMBB.getParent()->insert(++MBBI, NewMBB);
 
   // Move all the successors of this block to the specified block.
@@ -647,6 +648,7 @@ void BranchFolder::RemoveBlocksWithHash(unsigned CurHash,
 /// CreateCommonTailOnlyBlock - None of the blocks to be tail-merged consist
 /// only of the common tail.  Create a block that does by splitting one.
 bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
+                                             MachineBasicBlock *SuccBB,
                                              unsigned maxCommonTailLength,
                                              unsigned &commonTailIndex) {
   commonTailIndex = 0;
@@ -676,7 +678,12 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
   DEBUG(dbgs() << "\nSplitting BB#" << MBB->getNumber() << ", size "
                << maxCommonTailLength);
 
-  MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI);
+  // If the split block unconditionally falls-thru to SuccBB, it will be
+  // merged. In control flow terms it should then take SuccBB's name. e.g. If
+  // SuccBB is an inner loop, the common tail is still part of the inner loop.
+  const BasicBlock *BB = (SuccBB && MBB->succ_size() == 1) ?
+    SuccBB->getBasicBlock() : MBB->getBasicBlock();
+  MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI, BB);
   if (!newMBB) {
     DEBUG(dbgs() << "... failed!");
     return false;
@@ -784,7 +791,7 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
          !SameTails[commonTailIndex].tailIsWholeBlock())) {
       // None of the blocks consist entirely of the common tail.
       // Split a block so that one does.
-      if (!CreateCommonTailOnlyBlock(PredBB,
+      if (!CreateCommonTailOnlyBlock(PredBB, SuccBB,
                                      maxCommonTailLength, commonTailIndex)) {
         RemoveBlocksWithHash(CurHash, SuccBB, PredBB);
         continue;
diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h
index df795df..26bdca9 100644
--- a/lib/CodeGen/BranchFolding.h
+++ b/lib/CodeGen/BranchFolding.h
@@ -100,13 +100,15 @@ namespace llvm {
     void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
                                  MachineBasicBlock *NewDest);
     MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB,
-                                  MachineBasicBlock::iterator BBI1);
+                                  MachineBasicBlock::iterator BBI1,
+                                  const BasicBlock *BB);
     unsigned ComputeSameTails(unsigned CurHash, unsigned minCommonTailLength,
                               MachineBasicBlock *SuccBB,
                               MachineBasicBlock *PredBB);
     void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB,
                                                 MachineBasicBlock* PredBB);
     bool CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
+                                   MachineBasicBlock *SuccBB,
                                    unsigned maxCommonTailLength,
                                    unsigned &commonTailIndex);
 
diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp
index 38ae17d..b03c325 100644
--- a/lib/CodeGen/CalcSpillWeights.cpp
+++ b/lib/CodeGen/CalcSpillWeights.cpp
@@ -12,6 +12,7 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -33,6 +34,7 @@ INITIALIZE_PASS_END(CalculateSpillWeights, "calcspillweights",
 
 void CalculateSpillWeights::getAnalysisUsage(AnalysisUsage &au) const {
   au.addRequired<LiveIntervals>();
+  au.addRequired<MachineBlockFrequencyInfo>();
   au.addRequired<MachineLoopInfo>();
   au.setPreservesAll();
   MachineFunctionPass::getAnalysisUsage(au);
@@ -45,7 +47,8 @@ bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &MF) {
 
   LiveIntervals &LIS = getAnalysis<LiveIntervals>();
   MachineRegisterInfo &MRI = MF.getRegInfo();
-  VirtRegAuxInfo VRAI(MF, LIS, getAnalysis<MachineLoopInfo>());
+  VirtRegAuxInfo VRAI(MF, LIS, getAnalysis<MachineLoopInfo>(),
+                      getAnalysis<MachineBlockFrequencyInfo>());
   for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
     unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
     if (MRI.reg_nodbg_empty(Reg))
@@ -107,12 +110,12 @@ static bool isRematerializable(const LiveInterval &LI,
   return true;
 }
 
-void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
+void
+VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
   MachineRegisterInfo &mri = MF.getRegInfo();
   const TargetRegisterInfo &tri = *MF.getTarget().getRegisterInfo();
   MachineBasicBlock *mbb = 0;
   MachineLoop *loop = 0;
-  unsigned loopDepth = 0;
   bool isExiting = false;
   float totalWeight = 0;
   SmallPtrSet<MachineInstr*, 8> visited;
@@ -140,14 +143,14 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
       if (mi->getParent() != mbb) {
         mbb = mi->getParent();
         loop = Loops.getLoopFor(mbb);
-        loopDepth = loop ? loop->getLoopDepth() : 0;
         isExiting = loop ? loop->isLoopExiting(mbb) : false;
       }
 
       // Calculate instr weight.
       bool reads, writes;
       tie(reads, writes) = mi->readsWritesVirtualRegister(li.reg);
-      weight = LiveIntervals::getSpillWeight(writes, reads, loopDepth);
+      weight = LiveIntervals::getSpillWeight(
+          writes, reads, MBFI.getBlockFreq(mi->getParent()));
 
       // Give extra weight to what looks like a loop induction variable update.
       if (writes && isExiting && LIS.isLiveOutOfMBB(li, mbb))
diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp
index 75f4b96..fcfc9dc 100644
--- a/lib/CodeGen/CallingConvLower.cpp
+++ b/lib/CodeGen/CallingConvLower.cpp
@@ -24,7 +24,7 @@
 using namespace llvm;
 
 CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf,
-                 const TargetMachine &tm, SmallVector<CCValAssign, 16> &locs,
+                 const TargetMachine &tm, SmallVectorImpl<CCValAssign> &locs,
                  LLVMContext &C)
   : CallingConv(CC), IsVarArg(isVarArg), MF(mf), TM(tm),
     TRI(*TM.getRegisterInfo()), Locs(locs), Context(C),
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index d4955b3..18c8e0a 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -361,7 +361,7 @@ findSuitableFreeRegister(RegRefIter RegRefBegin,
                          unsigned AntiDepReg,
                          unsigned LastNewReg,
                          const TargetRegisterClass *RC,
-                         SmallVector<unsigned, 2> &Forbid)
+                         SmallVectorImpl<unsigned> &Forbid)
 {
   ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(RC);
   for (unsigned i = 0; i != Order.size(); ++i) {
@@ -388,7 +388,7 @@ findSuitableFreeRegister(RegRefIter RegRefBegin,
       continue;
     // If NewReg overlaps any of the forbidden registers, we can't use it.
     bool Forbidden = false;
-    for (SmallVector<unsigned, 2>::iterator it = Forbid.begin(),
+    for (SmallVectorImpl<unsigned>::iterator it = Forbid.begin(),
            ite = Forbid.end(); it != ite; ++it)
       if (TRI->regsOverlap(NewReg, *it)) {
         Forbidden = true;
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
index df13dd3..565d20b 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -103,7 +103,7 @@ class TargetRegisterInfo;
                                       unsigned AntiDepReg,
                                       unsigned LastNewReg,
                                       const TargetRegisterClass *RC,
-                                      SmallVector<unsigned, 2> &Forbid);
+                                      SmallVectorImpl<unsigned> &Forbid);
   };
 }
 
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index 05ac58a..c7c1752 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -32,7 +32,7 @@ STATISTIC(NumResumesLowered, "Number of resume calls lowered");
 
 namespace {
   class DwarfEHPrepare : public FunctionPass {
-    const TargetLoweringBase *TLI;
+    const TargetMachine *TM;
 
     // RewindFunction - _Unwind_Resume or the target equivalent.
     Constant *RewindFunction;
@@ -42,8 +42,8 @@ namespace {
 
   public:
     static char ID; // Pass identification, replacement for typeid.
-    DwarfEHPrepare(const TargetLoweringBase *TLI) :
-      FunctionPass(ID), TLI(TLI), RewindFunction(0) {
+    DwarfEHPrepare(const TargetMachine *TM) :
+      FunctionPass(ID), TM(TM), RewindFunction(0) {
         initializeDominatorTreePass(*PassRegistry::getPassRegistry());
       }
 
@@ -59,8 +59,8 @@ namespace {
 
 char DwarfEHPrepare::ID = 0;
 
-FunctionPass *llvm::createDwarfEHPass(const TargetLoweringBase *TLI) {
-  return new DwarfEHPrepare(TLI);
+FunctionPass *llvm::createDwarfEHPass(const TargetMachine *TM) {
+  return new DwarfEHPrepare(TM);
 }
 
 /// GetExceptionObject - Return the exception object from the value passed into
@@ -117,6 +117,7 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
     return false;
 
   // Find the rewind function if we didn't already.
+  const TargetLowering *TLI = TM->getTargetLowering();
   if (!RewindFunction) {
     LLVMContext &Ctx = Resumes[0]->getContext();
     FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx),
diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp
index 562a610..e277f5c 100644
--- a/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/lib/CodeGen/ExecutionDepsFix.cpp
@@ -573,7 +573,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
   // Kill off any remaining uses that don't match available, and build a list of
   // incoming DomainValues that we want to merge.
   SmallVector<LiveReg, 4> Regs;
-  for (SmallVector<int, 4>::iterator i=used.begin(), e=used.end(); i!=e; ++i) {
+  for (SmallVectorImpl<int>::iterator i=used.begin(), e=used.end(); i!=e; ++i) {
     int rx = *i;
     const LiveReg &LR = LiveRegs[rx];
     // This useless DomainValue could have been missed above.
@@ -583,7 +583,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
     }
     // Sorted insertion.
     bool Inserted = false;
-    for (SmallVector<LiveReg, 4>::iterator i = Regs.begin(), e = Regs.end();
+    for (SmallVectorImpl<LiveReg>::iterator i = Regs.begin(), e = Regs.end();
            i != e && !Inserted; ++i) {
       if (LR.Def < i->Def) {
         Inserted = true;
@@ -614,7 +614,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
       continue;
 
     // If latest didn't merge, it is useless now. Kill all registers using it.
-    for (SmallVector<int,4>::iterator i=used.begin(), e=used.end(); i != e; ++i)
+    for (SmallVectorImpl<int>::iterator i=used.begin(), e=used.end(); i!=e; ++i)
       if (LiveRegs[*i].Value == Latest)
         kill(*i);
   }
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index f4485d5..1ae7e3b 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -720,9 +720,9 @@ bool IfConverter::FeasibilityAnalysis(BBInfo &BBI,
   if (BBI.IsDone || BBI.IsUnpredicable)
     return false;
 
-  // If it is already predicated, check if its predicate subsumes the new
-  // predicate.
-  if (BBI.Predicate.size() && !TII->SubsumesPredicate(BBI.Predicate, Pred))
+  // If it is already predicated, check if the new predicate subsumes
+  // its predicate.
+  if (BBI.Predicate.size() && !TII->SubsumesPredicate(Pred, BBI.Predicate))
     return false;
 
   if (BBI.BrCond.size()) {
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index 3bdc30d..8910652 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -22,8 +22,10 @@
 #include "llvm/CodeGen/LiveRangeEdit.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -64,6 +66,7 @@ class InlineSpiller : public Spiller {
   MachineRegisterInfo &MRI;
   const TargetInstrInfo &TII;
   const TargetRegisterInfo &TRI;
+  const MachineBlockFrequencyInfo &MBFI;
 
   // Variables that are valid during spill(), but used by multiple methods.
   LiveRangeEdit *Edit;
@@ -147,7 +150,8 @@ public:
       MFI(*mf.getFrameInfo()),
       MRI(mf.getRegInfo()),
       TII(*mf.getTarget().getInstrInfo()),
-      TRI(*mf.getTarget().getRegisterInfo()) {}
+      TRI(*mf.getTarget().getRegisterInfo()),
+      MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()) {}
 
   void spill(LiveRangeEdit &);
 
@@ -1050,6 +1054,34 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
                        : TII.foldMemoryOperand(MI, FoldOps, StackSlot);
   if (!FoldMI)
     return false;
+
+  // Remove LIS for any dead defs in the original MI not in FoldMI.
+  for (MIBundleOperands MO(MI); MO.isValid(); ++MO) {
+    if (!MO->isReg())
+      continue;
+    unsigned Reg = MO->getReg();
+    if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) ||
+        MRI.isReserved(Reg)) {
+      continue;
+    }
+    MIBundleOperands::PhysRegInfo RI =
+      MIBundleOperands(FoldMI).analyzePhysReg(Reg, &TRI);
+    if (MO->readsReg()) {
+      assert(RI.Reads && "Cannot fold physreg reader");
+      continue;
+    }
+    if (RI.Defines)
+      continue;
+    // FoldMI does not define this physreg. Remove the LI segment.
+    assert(MO->isDead() && "Cannot fold physreg def");
+    for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) {
+      if (LiveInterval *LI = LIS.getCachedRegUnit(*Units)) {
+        SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot();
+        if (VNInfo *VNI = LI->getVNInfoAt(Idx))
+          LI->removeValNo(VNI);
+      }
+    }
+  }
   LIS.ReplaceMachineInstrInMaps(MI, FoldMI);
   MI->eraseFromParent();
 
@@ -1120,18 +1152,14 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
     // Debug values are not allowed to affect codegen.
     if (MI->isDebugValue()) {
       // Modify DBG_VALUE now that the value is in a spill slot.
-      uint64_t Offset = MI->getOperand(1).getImm();
+      bool IsIndirect = MI->getOperand(1).isImm();
+      uint64_t Offset = IsIndirect ? MI->getOperand(1).getImm() : 0;
       const MDNode *MDPtr = MI->getOperand(2).getMetadata();
       DebugLoc DL = MI->getDebugLoc();
-      if (MachineInstr *NewDV = TII.emitFrameIndexDebugValue(MF, StackSlot,
-                                                           Offset, MDPtr, DL)) {
-        DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI);
-        MachineBasicBlock *MBB = MI->getParent();
-        MBB->insert(MBB->erase(MI), NewDV);
-      } else {
-        DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI);
-        MI->eraseFromParent();
-      }
+      DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI);
+      MachineBasicBlock *MBB = MI->getParent();
+      BuildMI(*MBB, MBB->erase(MI), DL, TII.get(TargetOpcode::DBG_VALUE))
+          .addFrameIndex(StackSlot).addImm(Offset).addMetadata(MDPtr);
       continue;
     }
 
@@ -1294,5 +1322,5 @@ void InlineSpiller::spill(LiveRangeEdit &edit) {
   if (!RegsToSpill.empty())
     spillAll();
 
-  Edit->calculateRegClassAndHint(MF, Loops);
+  Edit->calculateRegClassAndHint(MF, Loops, MBFI);
 }
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index f647fd6..6c9b2e5 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -83,7 +83,7 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple,
 }
 
 void LLVMTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
-  PM.add(createBasicTargetTransformInfoPass(getTargetLowering()));
+  PM.add(createBasicTargetTransformInfoPass(this));
 }
 
 /// addPassesToX helper drives creation and initialization of TargetPassConfig.
@@ -115,7 +115,6 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM,
     new MachineModuleInfo(*TM->getMCAsmInfo(), *TM->getRegisterInfo(),
                           &TM->getTargetLowering()->getObjFileLowering());
   PM.add(MMI);
-  MCContext *Context = &MMI->getContext(); // Return the MCContext by-ref.
 
   // Set up a MachineFunction for the rest of CodeGen to work on.
   PM.add(new MachineFunctionAnalysis(*TM));
@@ -134,7 +133,7 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM,
 
   PassConfig->setInitialized();
 
-  return Context;
+  return &MMI->getContext();
 }
 
 bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
@@ -164,6 +163,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
 
   const MCAsmInfo &MAI = *getMCAsmInfo();
   const MCRegisterInfo &MRI = *getRegisterInfo();
+  const MCInstrInfo &MII = *getInstrInfo();
   const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
   OwningPtr<MCStreamer> AsmStreamer;
 
@@ -171,16 +171,13 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
   case CGFT_AssemblyFile: {
     MCInstPrinter *InstPrinter =
       getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI,
-                                      *getInstrInfo(),
-                                      Context->getRegisterInfo(), STI);
+                                      MII, MRI, STI);
 
     // Create a code emitter if asked to show the encoding.
     MCCodeEmitter *MCE = 0;
     MCAsmBackend *MAB = 0;
     if (ShowMCEncoding) {
-      const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
-      MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI, STI,
-                                            *Context);
+      MCE = getTarget().createMCCodeEmitter(MII, MRI, STI, *Context);
       MAB = getTarget().createMCAsmBackend(getTargetTriple(), TargetCPU);
     }
 
@@ -198,8 +195,8 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
   case CGFT_ObjectFile: {
     // Create the code emitter for the target if it exists.  If not, .o file
     // emission fails.
-    MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI,
-                                                         STI, *Context);
+    MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, STI,
+                                                         *Context);
     MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple(),
                                                        TargetCPU);
     if (MCE == 0 || MAB == 0)
diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp
index 8172154..ffe407a 100644
--- a/lib/CodeGen/LexicalScopes.cpp
+++ b/lib/CodeGen/LexicalScopes.cpp
@@ -212,15 +212,15 @@ LexicalScope *LexicalScopes::getOrCreateAbstractScope(const MDNode *N) {
 
 /// constructScopeNest
 void LexicalScopes::constructScopeNest(LexicalScope *Scope) {
-  assert (Scope && "Unable to calculate scop edominance graph!");
+  assert (Scope && "Unable to calculate scope dominance graph!");
   SmallVector<LexicalScope *, 4> WorkStack;
   WorkStack.push_back(Scope);
   unsigned Counter = 0;
   while (!WorkStack.empty()) {
     LexicalScope *WS = WorkStack.back();
-    const SmallVector<LexicalScope *, 4> &Children = WS->getChildren();
+    const SmallVectorImpl<LexicalScope *> &Children = WS->getChildren();
     bool visitedChildren = false;
-    for (SmallVector<LexicalScope *, 4>::const_iterator SI = Children.begin(),
+    for (SmallVectorImpl<LexicalScope *>::const_iterator SI = Children.begin(),
            SE = Children.end(); SI != SE; ++SI) {
       LexicalScope *ChildScope = *SI;
       if (!ChildScope->getDFSOut()) {
@@ -279,8 +279,8 @@ getMachineBasicBlocks(DebugLoc DL,
     return;
   }
 
-  SmallVector<InsnRange, 4> &InsnRanges = Scope->getRanges();
-  for (SmallVector<InsnRange, 4>::iterator I = InsnRanges.begin(),
+  SmallVectorImpl<InsnRange> &InsnRanges = Scope->getRanges();
+  for (SmallVectorImpl<InsnRange>::iterator I = InsnRanges.begin(),
          E = InsnRanges.end(); I != E; ++I) {
     InsnRange &R = *I;
     MBBs.insert(R.first->getParent());
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index 0b117ac..85bed46 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -108,6 +108,7 @@ class LDVImpl;
 class UserValue {
   const MDNode *variable; ///< The debug info variable we are part of.
   unsigned offset;        ///< Byte offset into variable.
+  bool IsIndirect;        ///< true if this is a register-indirect+offset value.
   DebugLoc dl;            ///< The debug location for the variable. This is
                           ///< used by dwarf writer to find lexical scope.
   UserValue *leader;      ///< Equivalence class leader.
@@ -134,9 +135,10 @@ class UserValue {
 
 public:
   /// UserValue - Create a new UserValue.
-  UserValue(const MDNode *var, unsigned o, DebugLoc L,
+  UserValue(const MDNode *var, unsigned o, bool i, DebugLoc L,
             LocMap::Allocator &alloc)
-    : variable(var), offset(o), dl(L), leader(this), next(0), locInts(alloc)
+    : variable(var), offset(o), IsIndirect(i), dl(L), leader(this),
+      next(0), locInts(alloc)
   {}
 
   /// getLeader - Get the leader of this value's equivalence class.
@@ -299,7 +301,8 @@ class LDVImpl {
   UVMap userVarMap;
 
   /// getUserValue - Find or create a UserValue.
-  UserValue *getUserValue(const MDNode *Var, unsigned Offset, DebugLoc DL);
+  UserValue *getUserValue(const MDNode *Var, unsigned Offset,
+                          bool IsIndirect, DebugLoc DL);
 
   /// lookupVirtReg - Find the EC leader for VirtReg or null.
   UserValue *lookupVirtReg(unsigned VirtReg);
@@ -414,7 +417,7 @@ void UserValue::mapVirtRegs(LDVImpl *LDV) {
 }
 
 UserValue *LDVImpl::getUserValue(const MDNode *Var, unsigned Offset,
-                                 DebugLoc DL) {
+                                 bool IsIndirect, DebugLoc DL) {
   UserValue *&Leader = userVarMap[Var];
   if (Leader) {
     UserValue *UV = Leader->getLeader();
@@ -424,7 +427,7 @@ UserValue *LDVImpl::getUserValue(const MDNode *Var, unsigned Offset,
         return UV;
   }
 
-  UserValue *UV = new UserValue(Var, Offset, DL, allocator);
+  UserValue *UV = new UserValue(Var, Offset, IsIndirect, DL, allocator);
   userValues.push_back(UV);
   Leader = UserValue::merge(Leader, UV);
   return UV;
@@ -445,15 +448,17 @@ UserValue *LDVImpl::lookupVirtReg(unsigned VirtReg) {
 bool LDVImpl::handleDebugValue(MachineInstr *MI, SlotIndex Idx) {
   // DBG_VALUE loc, offset, variable
   if (MI->getNumOperands() != 3 ||
-      !MI->getOperand(1).isImm() || !MI->getOperand(2).isMetadata()) {
+      !(MI->getOperand(1).isReg() || MI->getOperand(1).isImm()) ||
+      !MI->getOperand(2).isMetadata()) {
     DEBUG(dbgs() << "Can't handle " << *MI);
     return false;
   }
 
   // Get or create the UserValue for (variable,offset).
-  unsigned Offset = MI->getOperand(1).getImm();
+  bool IsIndirect = MI->getOperand(1).isImm();
+  unsigned Offset = IsIndirect ? MI->getOperand(1).getImm() : 0;
   const MDNode *Var = MI->getOperand(2).getMetadata();
-  UserValue *UV = getUserValue(Var, Offset, MI->getDebugLoc());
+  UserValue *UV = getUserValue(Var, Offset, IsIndirect, MI->getDebugLoc());
   UV->addDef(Idx, MI->getOperand(0));
   return true;
 }
@@ -921,19 +926,12 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx,
   MachineOperand &Loc = locations[LocNo];
   ++NumInsertedDebugValues;
 
-  // Frame index locations may require a target callback.
-  if (Loc.isFI()) {
-    MachineInstr *MI = TII.emitFrameIndexDebugValue(*MBB->getParent(),
-                                          Loc.getIndex(), offset, variable, 
-                                                    findDebugLoc());
-    if (MI) {
-      MBB->insert(I, MI);
-      return;
-    }
-  }
-  // This is not a frame index, or the target is happy with a standard FI.
-  BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE))
-    .addOperand(Loc).addImm(offset).addMetadata(variable);
+  if (Loc.isReg())
+    BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE),
+            IsIndirect, Loc.getReg(), offset, variable);
+  else
+    BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE))
+      .addOperand(Loc).addImm(offset).addMetadata(variable);
 }
 
 void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
@@ -992,4 +990,3 @@ void LiveDebugVariables::dump() {
     static_cast<LDVImpl*>(pImpl)->print(dbgs());
 }
 #endif
-
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index 861e99b..6be6bf3 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -415,7 +415,7 @@ void LiveInterval::removeValNo(VNInfo *ValNo) {
 void LiveInterval::join(LiveInterval &Other,
                         const int *LHSValNoAssignments,
                         const int *RHSValNoAssignments,
-                        SmallVector<VNInfo*, 16> &NewVNInfo,
+                        SmallVectorImpl<VNInfo *> &NewVNInfo,
                         MachineRegisterInfo *MRI) {
   verify();
 
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 1ca2d46..3680943 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -28,6 +28,7 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/IR/Value.h"
+#include "llvm/Support/BlockFrequency.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -51,6 +52,14 @@ INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
 INITIALIZE_PASS_END(LiveIntervals, "liveintervals",
                 "Live Interval Analysis", false, false)
 
+#ifndef NDEBUG
+static cl::opt<bool> EnablePrecomputePhysRegs(
+  "precompute-phys-liveness", cl::Hidden,
+  cl::desc("Eagerly compute live intervals for all physreg units."));
+#else
+static bool EnablePrecomputePhysRegs = false;
+#endif // NDEBUG
+
 void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesCFG();
   AU.addRequired<AliasAnalysis>();
@@ -115,6 +124,12 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
   computeRegMasks();
   computeLiveInRegUnits();
 
+  if (EnablePrecomputePhysRegs) {
+    // For stress testing, precompute live ranges of all physical register
+    // units, including reserved registers.
+    for (unsigned i = 0, e = TRI->getNumRegUnits(); i != e; ++i)
+      getRegUnit(i);
+  }
   DEBUG(dump());
   return true;
 }
@@ -605,21 +620,9 @@ LiveIntervals::hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const {
 }
 
 float
-LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) {
-  // Limit the loop depth ridiculousness.
-  if (loopDepth > 200)
-    loopDepth = 200;
-
-  // The loop depth is used to roughly estimate the number of times the
-  // instruction is executed. Something like 10^d is simple, but will quickly
-  // overflow a float. This expression behaves like 10^d for small d, but is
-  // more tempered for large d. At d=200 we get 6.7e33 which leaves a bit of
-  // headroom before overflow.
-  // By the way, powf() might be unavailable here. For consistency,
-  // We may take pow(double,double).
-  float lc = std::pow(1 + (100.0 / (loopDepth + 10)), (double)loopDepth);
-
-  return (isDef + isUse) * lc;
+LiveIntervals::getSpillWeight(bool isDef, bool isUse, BlockFrequency freq) {
+  const float Scale = 1.0f / BlockFrequency::getEntryFrequency();
+  return (isDef + isUse) * (freq.getFrequency() * Scale);
 }
 
 LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg,
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index 7793e96..792ef54 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -13,7 +13,6 @@
 
 #define DEBUG_TYPE "regalloc"
 #include "llvm/CodeGen/LiveRangeEdit.h"
-#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
@@ -216,108 +215,122 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
   return true;
 }
 
-void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
-                                      ArrayRef<unsigned> RegsBeingSpilled) {
-  SetVector<LiveInterval*,
-            SmallVector<LiveInterval*, 8>,
-            SmallPtrSet<LiveInterval*, 8> > ToShrink;
+/// Find all live intervals that need to shrink, then remove the instruction.
+void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) {
+  assert(MI->allDefsAreDead() && "Def isn't really dead");
+  SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot();
 
-  for (;;) {
-    // Erase all dead defs.
-    while (!Dead.empty()) {
-      MachineInstr *MI = Dead.pop_back_val();
-      assert(MI->allDefsAreDead() && "Def isn't really dead");
-      SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot();
-
-      // Never delete inline asm.
-      if (MI->isInlineAsm()) {
-        DEBUG(dbgs() << "Won't delete: " << Idx << '\t' << *MI);
-        continue;
-      }
+  // Never delete a bundled instruction.
+  if (MI->isBundled()) {
+    return;
+  }
+  // Never delete inline asm.
+  if (MI->isInlineAsm()) {
+    DEBUG(dbgs() << "Won't delete: " << Idx << '\t' << *MI);
+    return;
+  }
 
-      // Use the same criteria as DeadMachineInstructionElim.
-      bool SawStore = false;
-      if (!MI->isSafeToMove(&TII, 0, SawStore)) {
-        DEBUG(dbgs() << "Can't delete: " << Idx << '\t' << *MI);
-        continue;
-      }
+  // Use the same criteria as DeadMachineInstructionElim.
+  bool SawStore = false;
+  if (!MI->isSafeToMove(&TII, 0, SawStore)) {
+    DEBUG(dbgs() << "Can't delete: " << Idx << '\t' << *MI);
+    return;
+  }
 
-      DEBUG(dbgs() << "Deleting dead def " << Idx << '\t' << *MI);
-
-      // Collect virtual registers to be erased after MI is gone.
-      SmallVector<unsigned, 8> RegsToErase;
-      bool ReadsPhysRegs = false;
-
-      // Check for live intervals that may shrink
-      for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
-             MOE = MI->operands_end(); MOI != MOE; ++MOI) {
-        if (!MOI->isReg())
-          continue;
-        unsigned Reg = MOI->getReg();
-        if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
-          // Check if MI reads any unreserved physregs.
-          if (Reg && MOI->readsReg() && !MRI.isReserved(Reg))
-            ReadsPhysRegs = true;
-          continue;
-        }
-        LiveInterval &LI = LIS.getInterval(Reg);
-
-        // Shrink read registers, unless it is likely to be expensive and
-        // unlikely to change anything. We typically don't want to shrink the
-        // PIC base register that has lots of uses everywhere.
-        // Always shrink COPY uses that probably come from live range splitting.
-        if (MI->readsVirtualRegister(Reg) &&
-            (MI->isCopy() || MOI->isDef() || MRI.hasOneNonDBGUse(Reg) ||
-             LI.killedAt(Idx)))
-          ToShrink.insert(&LI);
-
-        // Remove defined value.
-        if (MOI->isDef()) {
-          if (VNInfo *VNI = LI.getVNInfoAt(Idx)) {
-            if (TheDelegate)
-              TheDelegate->LRE_WillShrinkVirtReg(LI.reg);
-            LI.removeValNo(VNI);
-            if (LI.empty())
-              RegsToErase.push_back(Reg);
+  DEBUG(dbgs() << "Deleting dead def " << Idx << '\t' << *MI);
+
+  // Collect virtual registers to be erased after MI is gone.
+  SmallVector<unsigned, 8> RegsToErase;
+  bool ReadsPhysRegs = false;
+
+  // Check for live intervals that may shrink
+  for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
+         MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+    if (!MOI->isReg())
+      continue;
+    unsigned Reg = MOI->getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
+      // Check if MI reads any unreserved physregs.
+      if (Reg && MOI->readsReg() && !MRI.isReserved(Reg))
+        ReadsPhysRegs = true;
+      else if (MOI->isDef()) {
+        for (MCRegUnitIterator Units(Reg, MRI.getTargetRegisterInfo());
+             Units.isValid(); ++Units) {
+          if (LiveInterval *LI = LIS.getCachedRegUnit(*Units)) {
+            if (VNInfo *VNI = LI->getVNInfoAt(Idx))
+              LI->removeValNo(VNI);
           }
         }
       }
-
-      // Currently, we don't support DCE of physreg live ranges. If MI reads
-      // any unreserved physregs, don't erase the instruction, but turn it into
-      // a KILL instead. This way, the physreg live ranges don't end up
-      // dangling.
-      // FIXME: It would be better to have something like shrinkToUses() for
-      // physregs. That could potentially enable more DCE and it would free up
-      // the physreg. It would not happen often, though.
-      if (ReadsPhysRegs) {
-        MI->setDesc(TII.get(TargetOpcode::KILL));
-        // Remove all operands that aren't physregs.
-        for (unsigned i = MI->getNumOperands(); i; --i) {
-          const MachineOperand &MO = MI->getOperand(i-1);
-          if (MO.isReg() && TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
-            continue;
-          MI->RemoveOperand(i-1);
-        }
-        DEBUG(dbgs() << "Converted physregs to:\t" << *MI);
-      } else {
+      continue;
+    }
+    LiveInterval &LI = LIS.getInterval(Reg);
+
+    // Shrink read registers, unless it is likely to be expensive and
+    // unlikely to change anything. We typically don't want to shrink the
+    // PIC base register that has lots of uses everywhere.
+    // Always shrink COPY uses that probably come from live range splitting.
+    if (MI->readsVirtualRegister(Reg) &&
+        (MI->isCopy() || MOI->isDef() || MRI.hasOneNonDBGUse(Reg) ||
+         LI.killedAt(Idx)))
+      ToShrink.insert(&LI);
+
+    // Remove defined value.
+    if (MOI->isDef()) {
+      if (VNInfo *VNI = LI.getVNInfoAt(Idx)) {
         if (TheDelegate)
-          TheDelegate->LRE_WillEraseInstruction(MI);
-        LIS.RemoveMachineInstrFromMaps(MI);
-        MI->eraseFromParent();
-        ++NumDCEDeleted;
+          TheDelegate->LRE_WillShrinkVirtReg(LI.reg);
+        LI.removeValNo(VNI);
+        if (LI.empty())
+          RegsToErase.push_back(Reg);
       }
+    }
+  }
 
-      // Erase any virtregs that are now empty and unused. There may be <undef>
-      // uses around. Keep the empty live range in that case.
-      for (unsigned i = 0, e = RegsToErase.size(); i != e; ++i) {
-        unsigned Reg = RegsToErase[i];
-        if (LIS.hasInterval(Reg) && MRI.reg_nodbg_empty(Reg)) {
-          ToShrink.remove(&LIS.getInterval(Reg));
-          eraseVirtReg(Reg);
-        }
-      }
+  // Currently, we don't support DCE of physreg live ranges. If MI reads
+  // any unreserved physregs, don't erase the instruction, but turn it into
+  // a KILL instead. This way, the physreg live ranges don't end up
+  // dangling.
+  // FIXME: It would be better to have something like shrinkToUses() for
+  // physregs. That could potentially enable more DCE and it would free up
+  // the physreg. It would not happen often, though.
+  if (ReadsPhysRegs) {
+    MI->setDesc(TII.get(TargetOpcode::KILL));
+    // Remove all operands that aren't physregs.
+    for (unsigned i = MI->getNumOperands(); i; --i) {
+      const MachineOperand &MO = MI->getOperand(i-1);
+      if (MO.isReg() && TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+        continue;
+      MI->RemoveOperand(i-1);
     }
+    DEBUG(dbgs() << "Converted physregs to:\t" << *MI);
+  } else {
+    if (TheDelegate)
+      TheDelegate->LRE_WillEraseInstruction(MI);
+    LIS.RemoveMachineInstrFromMaps(MI);
+    MI->eraseFromParent();
+    ++NumDCEDeleted;
+  }
+
+  // Erase any virtregs that are now empty and unused. There may be <undef>
+  // uses around. Keep the empty live range in that case.
+  for (unsigned i = 0, e = RegsToErase.size(); i != e; ++i) {
+    unsigned Reg = RegsToErase[i];
+    if (LIS.hasInterval(Reg) && MRI.reg_nodbg_empty(Reg)) {
+      ToShrink.remove(&LIS.getInterval(Reg));
+      eraseVirtReg(Reg);
+    }
+  }
+}
+
+void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
+                                      ArrayRef<unsigned> RegsBeingSpilled) {
+  ToShrinkSet ToShrink;
+
+  for (;;) {
+    // Erase all dead defs.
+    while (!Dead.empty())
+      eliminateDeadDef(Dead.pop_back_val(), ToShrink);
 
     if (ToShrink.empty())
       break;
@@ -331,7 +344,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
       TheDelegate->LRE_WillShrinkVirtReg(LI->reg);
     if (!LIS.shrinkToUses(LI, &Dead))
       continue;
-    
+
     // Don't create new intervals for a register being spilled.
     // The new intervals would have to be spilled anyway so its not worth it.
     // Also they currently aren't spilled so creating them and not spilling
@@ -343,7 +356,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
         break;
       }
     }
-    
+
     if (BeingSpilled) continue;
 
     // LI may have been separated, create new intervals.
@@ -374,9 +387,11 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
   }
 }
 
-void LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF,
-                                             const MachineLoopInfo &Loops) {
-  VirtRegAuxInfo VRAI(MF, LIS, Loops);
+void
+LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF,
+                                        const MachineLoopInfo &Loops,
+                                        const MachineBlockFrequencyInfo &MBFI) {
+  VirtRegAuxInfo VRAI(MF, LIS, Loops, MBFI);
   for (iterator I = begin(), E = end(); I != E; ++I) {
     LiveInterval &LI = **I;
     if (MRI.recomputeRegClass(LI.reg, MF.getTarget()))
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 4e83fc8..ed55d7a 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -441,7 +441,7 @@ void LiveVariables::HandleRegMask(const MachineOperand &MO) {
 }
 
 void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI,
-                                     SmallVector<unsigned, 4> &Defs) {
+                                     SmallVectorImpl<unsigned> &Defs) {
   // What parts of the register are previously defined?
   SmallSet<unsigned, 32> Live;
   if (PhysRegDef[Reg] || PhysRegUse[Reg]) {
@@ -484,7 +484,7 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI,
 }
 
 void LiveVariables::UpdatePhysRegDefs(MachineInstr *MI,
-                                      SmallVector<unsigned, 4> &Defs) {
+                                      SmallVectorImpl<unsigned> &Defs) {
   while (!Defs.empty()) {
     unsigned Reg = Defs.back();
     Defs.pop_back();
@@ -609,9 +609,9 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
     // if they have PHI nodes, and if so, we simulate an assignment at the end
     // of the current block.
     if (!PHIVarInfo[MBB->getNumber()].empty()) {
-      SmallVector<unsigned, 4>& VarInfoVec = PHIVarInfo[MBB->getNumber()];
+      SmallVectorImpl<unsigned> &VarInfoVec = PHIVarInfo[MBB->getNumber()];
 
-      for (SmallVector<unsigned, 4>::iterator I = VarInfoVec.begin(),
+      for (SmallVectorImpl<unsigned>::iterator I = VarInfoVec.begin(),
              E = VarInfoVec.end(); I != E; ++I)
         // Mark it alive only in the block we are representing.
         MarkVirtRegAliveInBlock(getVarInfo(*I),MRI->getVRegDef(*I)->getParent(),
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 78e9950..5633271 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -19,6 +19,7 @@
 #include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SlotIndexes.h"
@@ -51,7 +52,7 @@ MCSymbol *MachineBasicBlock::getSymbol() const {
   if (!CachedMCSymbol) {
     const MachineFunction *MF = getParent();
     MCContext &Ctx = MF->getContext();
-    const char *Prefix = Ctx.getAsmInfo().getPrivateGlobalPrefix();
+    const char *Prefix = Ctx.getAsmInfo()->getPrivateGlobalPrefix();
     CachedMCSymbol = Ctx.GetOrCreateSymbol(Twine(Prefix) + "BB" +
                                            Twine(MF->getFunctionNumber()) +
                                            "_" + Twine(getNumber()));
@@ -341,6 +342,38 @@ bool MachineBasicBlock::isLiveIn(unsigned Reg) const {
   return I != livein_end();
 }
 
+unsigned
+MachineBasicBlock::addLiveIn(unsigned PhysReg, const TargetRegisterClass *RC) {
+  assert(getParent() && "MBB must be inserted in function");
+  assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && "Expected physreg");
+  assert(RC && "Register class is required");
+  assert((isLandingPad() || this == &getParent()->front()) &&
+         "Only the entry block and landing pads can have physreg live ins");
+
+  bool LiveIn = isLiveIn(PhysReg);
+  iterator I = SkipPHIsAndLabels(begin()), E = end();
+  MachineRegisterInfo &MRI = getParent()->getRegInfo();
+  const TargetInstrInfo &TII = *getParent()->getTarget().getInstrInfo();
+
+  // Look for an existing copy.
+  if (LiveIn)
+    for (;I != E && I->isCopy(); ++I)
+      if (I->getOperand(1).getReg() == PhysReg) {
+        unsigned VirtReg = I->getOperand(0).getReg();
+        if (!MRI.constrainRegClass(VirtReg, RC))
+          llvm_unreachable("Incompatible live-in register class.");
+        return VirtReg;
+      }
+
+  // No luck, create a virtual register.
+  unsigned VirtReg = MRI.createVirtualRegister(RC);
+  BuildMI(*this, I, DebugLoc(), TII.get(TargetOpcode::COPY), VirtReg)
+    .addReg(PhysReg, RegState::Kill);
+  if (!LiveIn)
+    addLiveIn(PhysReg);
+  return VirtReg;
+}
+
 void MachineBasicBlock::moveBefore(MachineBasicBlock *NewAfter) {
   getParent()->splice(NewAfter, this);
 }
diff --git a/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/lib/CodeGen/MachineBlockFrequencyInfo.cpp
index 070daf2..e269d24 100644
--- a/lib/CodeGen/MachineBlockFrequencyInfo.cpp
+++ b/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -50,11 +50,6 @@ bool MachineBlockFrequencyInfo::runOnMachineFunction(MachineFunction &F) {
   return false;
 }
 
-/// getblockFreq - Return block frequency. Return 0 if we don't have the
-/// information. Please note that initial frequency is equal to 1024. It means
-/// that we should not rely on the value itself, but only on the comparison to
-/// the other block frequencies. We do this to avoid using of floating points.
-///
 BlockFrequency MachineBlockFrequencyInfo::
 getBlockFreq(const MachineBasicBlock *MBB) const {
   return MBFI->getBlockFreq(MBB);
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 61d8d38..d228286 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -84,11 +84,11 @@ namespace {
     bool hasLivePhysRegDefUses(const MachineInstr *MI,
                                const MachineBasicBlock *MBB,
                                SmallSet<unsigned,8> &PhysRefs,
-                               SmallVector<unsigned,2> &PhysDefs,
+                               SmallVectorImpl<unsigned> &PhysDefs,
                                bool &PhysUseDef) const;
     bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
                           SmallSet<unsigned,8> &PhysRefs,
-                          SmallVector<unsigned,2> &PhysDefs,
+                          SmallVectorImpl<unsigned> &PhysDefs,
                           bool &NonLocal) const;
     bool isCSECandidate(MachineInstr *MI);
     bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
@@ -193,7 +193,7 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
 bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
                                        const MachineBasicBlock *MBB,
                                        SmallSet<unsigned,8> &PhysRefs,
-                                       SmallVector<unsigned,2> &PhysDefs,
+                                       SmallVectorImpl<unsigned> &PhysDefs,
                                        bool &PhysUseDef) const{
   // First, add all uses to PhysRefs.
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
@@ -244,7 +244,7 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
 
 bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
                                   SmallSet<unsigned,8> &PhysRefs,
-                                  SmallVector<unsigned,2> &PhysDefs,
+                                  SmallVectorImpl<unsigned> &PhysDefs,
                                   bool &NonLocal) const {
   // For now conservatively returns false if the common subexpression is
   // not in the same basic block as the given instruction. The only exception
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 04321f3..0703df0 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -17,6 +17,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -54,23 +55,28 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
                                  GCModuleInfo* gmi)
   : Fn(F), Target(TM), Ctx(mmi.getContext()), MMI(mmi), GMI(gmi) {
   if (TM.getRegisterInfo())
-    RegInfo = new (Allocator) MachineRegisterInfo(*TM.getRegisterInfo());
+    RegInfo = new (Allocator) MachineRegisterInfo(TM);
   else
     RegInfo = 0;
+
   MFInfo = 0;
-  FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameLowering(),
-                                               TM.Options.RealignStack);
+  FrameInfo =
+    new (Allocator) MachineFrameInfo(TM,!F->hasFnAttribute("no-realign-stack"));
+
   if (Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
                                        Attribute::StackAlignment))
     FrameInfo->ensureMaxAlignment(Fn->getAttributes().
                                 getStackAlignment(AttributeSet::FunctionIndex));
-  ConstantPool = new (Allocator) MachineConstantPool(TM.getDataLayout());
+
+  ConstantPool = new (Allocator) MachineConstantPool(TM);
   Alignment = TM.getTargetLowering()->getMinFunctionAlignment();
+
   // FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn.
   if (!Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
                                         Attribute::OptimizeForSize))
     Alignment = std::max(Alignment,
                          TM.getTargetLowering()->getPrefFunctionAlignment());
+
   FunctionNumber = FunctionNum;
   JumpTableInfo = 0;
 }
@@ -456,11 +462,15 @@ MCSymbol *MachineFunction::getPICBaseSymbol() const {
 //  MachineFrameInfo implementation
 //===----------------------------------------------------------------------===//
 
+const TargetFrameLowering *MachineFrameInfo::getFrameLowering() const {
+  return TM.getFrameLowering();
+}
+
 /// ensureMaxAlignment - Make sure the function is at least Align bytes
 /// aligned.
 void MachineFrameInfo::ensureMaxAlignment(unsigned Align) {
-  if (!TFI.isStackRealignable() || !RealignOption)
-    assert(Align <= TFI.getStackAlignment() &&
+  if (!getFrameLowering()->isStackRealignable() || !RealignOption)
+    assert(Align <= getFrameLowering()->getStackAlignment() &&
            "For targets without stack realignment, Align is out of limit!");
   if (MaxAlignment < Align) MaxAlignment = Align;
 }
@@ -482,8 +492,10 @@ static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align,
 int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment,
                       bool isSS, bool MayNeedSP, const AllocaInst *Alloca) {
   assert(Size != 0 && "Cannot allocate zero size stack objects!");
-  Alignment = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption,
-                                  Alignment, TFI.getStackAlignment());
+  Alignment =
+    clampStackAlignment(!getFrameLowering()->isStackRealignable() ||
+                          !RealignOption,
+                        Alignment, getFrameLowering()->getStackAlignment());
   Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, MayNeedSP,
                                 Alloca));
   int Index = (int)Objects.size() - NumFixedObjects - 1;
@@ -498,8 +510,10 @@ int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment,
 ///
 int MachineFrameInfo::CreateSpillStackObject(uint64_t Size,
                                              unsigned Alignment) {
-  Alignment = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption,
-                                  Alignment, TFI.getStackAlignment()); 
+  Alignment =
+    clampStackAlignment(!getFrameLowering()->isStackRealignable() ||
+                          !RealignOption,
+                        Alignment, getFrameLowering()->getStackAlignment()); 
   CreateStackObject(Size, Alignment, true, false);
   int Index = (int)Objects.size() - NumFixedObjects - 1;
   ensureMaxAlignment(Alignment);
@@ -513,8 +527,10 @@ int MachineFrameInfo::CreateSpillStackObject(uint64_t Size,
 ///
 int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment) {
   HasVarSizedObjects = true;
-  Alignment = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption,
-                                  Alignment, TFI.getStackAlignment()); 
+  Alignment =
+    clampStackAlignment(!getFrameLowering()->isStackRealignable() ||
+                          !RealignOption,
+                        Alignment, getFrameLowering()->getStackAlignment()); 
   Objects.push_back(StackObject(0, Alignment, 0, false, false, true, 0));
   ensureMaxAlignment(Alignment);
   return (int)Objects.size()-NumFixedObjects-1;
@@ -532,10 +548,12 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
   // the incoming frame position.  If the frame object is at offset 32 and
   // the stack is guaranteed to be 16-byte aligned, then we know that the
   // object is 16-byte aligned.
-  unsigned StackAlign = TFI.getStackAlignment();
+  unsigned StackAlign = getFrameLowering()->getStackAlignment();
   unsigned Align = MinAlign(SPOffset, StackAlign);
-  Align = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption,
-                              Align, TFI.getStackAlignment()); 
+  Align =
+    clampStackAlignment(!getFrameLowering()->isStackRealignable() ||
+                          !RealignOption,
+                        Align, getFrameLowering()->getStackAlignment()); 
   Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable,
                                               /*isSS*/   false,
                                               /*NeedSP*/ false,
@@ -769,6 +787,10 @@ void MachineJumpTableInfo::dump() const { print(dbgs()); }
 
 void MachineConstantPoolValue::anchor() { }
 
+const DataLayout *MachineConstantPool::getDataLayout() const {
+  return TM.getDataLayout();
+}
+
 Type *MachineConstantPoolEntry::getType() const {
   if (isMachineConstantPoolEntry())
     return Val.MachineCPVal->getType();
@@ -850,7 +872,8 @@ unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C,
   // FIXME, this could be made much more efficient for large constant pools.
   for (unsigned i = 0, e = Constants.size(); i != e; ++i)
     if (!Constants[i].isMachineConstantPoolEntry() &&
-        CanShareConstantPoolEntry(Constants[i].Val.ConstVal, C, TD)) {
+        CanShareConstantPoolEntry(Constants[i].Val.ConstVal, C,
+                                  getDataLayout())) {
       if ((unsigned)Constants[i].getAlignment() < Alignment)
         Constants[i].Alignment = Alignment;
       return i;
@@ -887,7 +910,7 @@ void MachineConstantPool::print(raw_ostream &OS) const {
     if (Constants[i].isMachineConstantPoolEntry())
       Constants[i].Val.MachineCPVal->print(OS);
     else
-      OS << *(const Value*)Constants[i].Val.ConstVal;
+      WriteAsOperand(OS, Constants[i].Val.ConstVal, /*PrintType=*/false);
     OS << ", align=" << Constants[i].getAlignment();
     OS << "\n";
   }
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 32d0668..06bb80a 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -1253,32 +1253,6 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
   return true;
 }
 
-/// isSafeToReMat - Return true if it's safe to rematerialize the specified
-/// instruction which defined the specified register instead of copying it.
-bool MachineInstr::isSafeToReMat(const TargetInstrInfo *TII,
-                                 AliasAnalysis *AA,
-                                 unsigned DstReg) const {
-  bool SawStore = false;
-  if (!TII->isTriviallyReMaterializable(this, AA) ||
-      !isSafeToMove(TII, AA, SawStore))
-    return false;
-  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = getOperand(i);
-    if (!MO.isReg())
-      continue;
-    // FIXME: For now, do not remat any instruction with register operands.
-    // Later on, we can loosen the restriction is the register operands have
-    // not been modified between the def and use. Note, this is different from
-    // MachineSink because the code is no longer in two-address form (at least
-    // partially).
-    if (MO.isUse())
-      return false;
-    else if (!MO.isDead() && MO.getReg() != DstReg)
-      return false;
-  }
-  return true;
-}
-
 /// hasOrderedMemoryRef - Return true if this instruction may have an ordered
 /// or volatile memory reference, or if the information describing the memory
 /// reference is not available. Return false if it is known to have no ordered
@@ -1411,8 +1385,10 @@ static void printDebugLoc(DebugLoc DL, const MachineFunction *MF,
   const LLVMContext &Ctx = MF->getFunction()->getContext();
   if (!DL.isUnknown()) {          // Print source line info.
     DIScope Scope(DL.getScope(Ctx));
+    assert((!Scope || Scope.isScope()) &&
+      "Scope of a DebugLoc should be null or a DIScope.");
     // Omit the directory, because it's likely to be long and uninteresting.
-    if (Scope.Verify())
+    if (Scope)
       CommentOS << Scope.getFilename();
     else
       CommentOS << "<unknown>";
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index ed3ed4d..6ad4e39 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -172,7 +172,7 @@ namespace {
                    BitVector &PhysRegDefs,
                    BitVector &PhysRegClobbers,
                    SmallSet<int, 32> &StoredFIs,
-                   SmallVector<CandidateInfo, 32> &Candidates);
+                   SmallVectorImpl<CandidateInfo> &Candidates);
 
     /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the
     /// current loop.
@@ -404,7 +404,7 @@ void MachineLICM::ProcessMI(MachineInstr *MI,
                             BitVector &PhysRegDefs,
                             BitVector &PhysRegClobbers,
                             SmallSet<int, 32> &StoredFIs,
-                            SmallVector<CandidateInfo, 32> &Candidates) {
+                            SmallVectorImpl<CandidateInfo> &Candidates) {
   bool RuledOut = false;
   bool HasNonInvariantUse = false;
   unsigned Def = 0;
@@ -1084,7 +1084,7 @@ bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost,
       return true;
 
     for (unsigned i = BackTrace.size(); i != 0; --i) {
-      SmallVector<unsigned, 8> &RP = BackTrace[i-1];
+      SmallVectorImpl<unsigned> &RP = BackTrace[i-1];
       if (RP[RCId] + Cost >= Limit)
         return true;
     }
@@ -1130,7 +1130,7 @@ void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) {
 
   // Update register pressure of blocks from loop header to current block.
   for (unsigned i = 0, e = BackTrace.size(); i != e; ++i) {
-    SmallVector<unsigned, 8> &RP = BackTrace[i];
+    SmallVectorImpl<unsigned> &RP = BackTrace[i];
     for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end();
          CI != CE; ++CI) {
       unsigned RCId = CI->first;
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index a916bd6..bb54284 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -253,13 +253,12 @@ void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) {
 MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI,
                                      const MCRegisterInfo &MRI,
                                      const MCObjectFileInfo *MOFI)
-  : ImmutablePass(ID), Context(MAI, MRI, MOFI, 0, false) {
+  : ImmutablePass(ID), Context(&MAI, &MRI, MOFI, 0, false) {
   initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
 }
 
 MachineModuleInfo::MachineModuleInfo()
-  : ImmutablePass(ID),
-    Context(*(MCAsmInfo*)0, *(MCRegisterInfo*)0, (MCObjectFileInfo*)0) {
+  : ImmutablePass(ID), Context(0, 0, 0) {
   llvm_unreachable("This MachineModuleInfo constructor should never be called, "
                    "MMI should always be explicitly constructed by "
                    "LLVMTargetMachine");
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 68372f6..7f2c0ca 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -19,16 +19,18 @@
 
 using namespace llvm;
 
-MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI)
-  : TRI(&TRI), IsSSA(true), TracksLiveness(true) {
+MachineRegisterInfo::MachineRegisterInfo(const TargetMachine &TM)
+  : TM(TM), IsSSA(true), TracksLiveness(true) {
   VRegInfo.reserve(256);
   RegAllocHints.reserve(256);
-  UsedRegUnits.resize(TRI.getNumRegUnits());
-  UsedPhysRegMask.resize(TRI.getNumRegs());
+  UsedRegUnits.resize(getTargetRegisterInfo()->getNumRegUnits());
+  UsedPhysRegMask.resize(getTargetRegisterInfo()->getNumRegs());
 
   // Create the physreg use/def lists.
-  PhysRegUseDefLists = new MachineOperand*[TRI.getNumRegs()];
-  memset(PhysRegUseDefLists, 0, sizeof(MachineOperand*)*TRI.getNumRegs());
+  PhysRegUseDefLists =
+    new MachineOperand*[getTargetRegisterInfo()->getNumRegs()];
+  memset(PhysRegUseDefLists, 0,
+         sizeof(MachineOperand*)*getTargetRegisterInfo()->getNumRegs());
 }
 
 MachineRegisterInfo::~MachineRegisterInfo() {
@@ -50,7 +52,8 @@ MachineRegisterInfo::constrainRegClass(unsigned Reg,
   const TargetRegisterClass *OldRC = getRegClass(Reg);
   if (OldRC == RC)
     return RC;
-  const TargetRegisterClass *NewRC = TRI->getCommonSubClass(OldRC, RC);
+  const TargetRegisterClass *NewRC =
+    getTargetRegisterInfo()->getCommonSubClass(OldRC, RC);
   if (!NewRC || NewRC == OldRC)
     return NewRC;
   if (NewRC->getNumRegs() < MinNumRegs)
@@ -63,7 +66,8 @@ bool
 MachineRegisterInfo::recomputeRegClass(unsigned Reg, const TargetMachine &TM) {
   const TargetInstrInfo *TII = TM.getInstrInfo();
   const TargetRegisterClass *OldRC = getRegClass(Reg);
-  const TargetRegisterClass *NewRC = TRI->getLargestLegalSuperClass(OldRC);
+  const TargetRegisterClass *NewRC =
+    getTargetRegisterInfo()->getLargestLegalSuperClass(OldRC);
 
   // Stop early if there is no room to grow.
   if (NewRC == OldRC)
@@ -73,14 +77,16 @@ MachineRegisterInfo::recomputeRegClass(unsigned Reg, const TargetMachine &TM) {
   for (reg_nodbg_iterator I = reg_nodbg_begin(Reg), E = reg_nodbg_end(); I != E;
        ++I) {
     const TargetRegisterClass *OpRC =
-      I->getRegClassConstraint(I.getOperandNo(), TII, TRI);
+      I->getRegClassConstraint(I.getOperandNo(), TII,
+                               getTargetRegisterInfo());
     if (unsigned SubIdx = I.getOperand().getSubReg()) {
       if (OpRC)
-        NewRC = TRI->getMatchingSuperRegClass(NewRC, OpRC, SubIdx);
+        NewRC = getTargetRegisterInfo()->getMatchingSuperRegClass(NewRC, OpRC,
+                                                                  SubIdx);
       else
-        NewRC = TRI->getSubClassWithSubReg(NewRC, SubIdx);
+        NewRC = getTargetRegisterInfo()->getSubClassWithSubReg(NewRC, SubIdx);
     } else if (OpRC)
-      NewRC = TRI->getCommonSubClass(NewRC, OpRC);
+      NewRC = getTargetRegisterInfo()->getCommonSubClass(NewRC, OpRC);
     if (!NewRC || NewRC == OldRC)
       return false;
   }
@@ -126,24 +132,28 @@ void MachineRegisterInfo::verifyUseList(unsigned Reg) const {
     MachineOperand *MO = &I.getOperand();
     MachineInstr *MI = MO->getParent();
     if (!MI) {
-      errs() << PrintReg(Reg, TRI) << " use list MachineOperand " << MO
+      errs() << PrintReg(Reg, getTargetRegisterInfo())
+             << " use list MachineOperand " << MO
              << " has no parent instruction.\n";
       Valid = false;
     }
     MachineOperand *MO0 = &MI->getOperand(0);
     unsigned NumOps = MI->getNumOperands();
     if (!(MO >= MO0 && MO < MO0+NumOps)) {
-      errs() << PrintReg(Reg, TRI) << " use list MachineOperand " << MO
+      errs() << PrintReg(Reg, getTargetRegisterInfo())
+             << " use list MachineOperand " << MO
              << " doesn't belong to parent MI: " << *MI;
       Valid = false;
     }
     if (!MO->isReg()) {
-      errs() << PrintReg(Reg, TRI) << " MachineOperand " << MO << ": " << *MO
+      errs() << PrintReg(Reg, getTargetRegisterInfo())
+             << " MachineOperand " << MO << ": " << *MO
              << " is not a register\n";
       Valid = false;
     }
     if (MO->getReg() != Reg) {
-      errs() << PrintReg(Reg, TRI) << " use-list MachineOperand " << MO << ": "
+      errs() << PrintReg(Reg, getTargetRegisterInfo())
+             << " use-list MachineOperand " << MO << ": "
              << *MO << " is the wrong register\n";
       Valid = false;
     }
@@ -156,7 +166,7 @@ void MachineRegisterInfo::verifyUseLists() const {
 #ifndef NDEBUG
   for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i)
     verifyUseList(TargetRegisterInfo::index2VirtReg(i));
-  for (unsigned i = 1, e = TRI->getNumRegs(); i != e; ++i)
+  for (unsigned i = 1, e = getTargetRegisterInfo()->getNumRegs(); i != e; ++i)
     verifyUseList(i);
 #endif
 }
@@ -390,8 +400,8 @@ void MachineRegisterInfo::dumpUses(unsigned Reg) const {
 #endif
 
 void MachineRegisterInfo::freezeReservedRegs(const MachineFunction &MF) {
-  ReservedRegs = TRI->getReservedRegs(MF);
-  assert(ReservedRegs.size() == TRI->getNumRegs() &&
+  ReservedRegs = getTargetRegisterInfo()->getReservedRegs(MF);
+  assert(ReservedRegs.size() == getTargetRegisterInfo()->getNumRegs() &&
          "Invalid ReservedRegs vector from target");
 }
 
@@ -401,7 +411,8 @@ bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg,
 
   // Check if any overlapping register is modified, or allocatable so it may be
   // used later.
-  for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI)
+  for (MCRegAliasIterator AI(PhysReg, getTargetRegisterInfo(), true);
+       AI.isValid(); ++AI)
     if (!def_empty(*AI) || isAllocatable(*AI))
       return false;
   return true;
diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp
index bb6aad7..17f0af8 100644
--- a/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/lib/CodeGen/MachineSSAUpdater.cpp
@@ -77,7 +77,7 @@ unsigned MachineSSAUpdater::GetValueAtEndOfBlock(MachineBasicBlock *BB) {
 
 static
 unsigned LookForIdenticalPHI(MachineBasicBlock *BB,
-          SmallVector<std::pair<MachineBasicBlock*, unsigned>, 8> &PredValues) {
+        SmallVectorImpl<std::pair<MachineBasicBlock*, unsigned> > &PredValues) {
   if (BB->empty())
     return 0;
 
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index fff6b2b..a6c5a9f 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -21,6 +21,7 @@
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
 #include "llvm/CodeGen/ScheduleDFS.h"
@@ -30,6 +31,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/GraphWriter.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
 #include <queue>
 
 using namespace llvm;
@@ -51,11 +53,6 @@ static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
 static bool ViewMISchedDAGs = false;
 #endif // NDEBUG
 
-// FIXME: remove this flag after initial testing. It should always be a good
-// thing.
-static cl::opt<bool> EnableCopyConstrain("misched-vcopy", cl::Hidden,
-    cl::desc("Constrain vreg copies."), cl::init(true));
-
 static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
   cl::desc("Enable load clustering."), cl::init(true));
 
@@ -207,7 +204,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
   const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
 
   if (VerifyScheduling) {
-    DEBUG(LIS->print(dbgs()));
+    DEBUG(LIS->dump());
     MF->verify(this, "Before machine scheduling.");
   }
   RegClassInfo->runOnMachineFunction(*MF);
@@ -297,7 +294,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
     Scheduler->finishBlock();
   }
   Scheduler->finalizeSchedule();
-  DEBUG(LIS->print(dbgs()));
+  DEBUG(LIS->dump());
   if (VerifyScheduling)
     MF->verify(this, "After machine scheduling.");
   return true;
@@ -309,7 +306,7 @@ void MachineScheduler::print(raw_ostream &O, const Module* m) const {
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void ReadyQueue::dump() {
-  dbgs() << "  " << Name << ": ";
+  dbgs() << Name << ": ";
   for (unsigned i = 0, e = Queue.size(); i < e; ++i)
     dbgs() << Queue[i]->NodeNum << " ";
   dbgs() << "\n";
@@ -467,7 +464,7 @@ void ScheduleDAGMI::initRegPressure() {
   // Close the RPTracker to finalize live ins.
   RPTracker.closeRegion();
 
-  DEBUG(RPTracker.getPressure().dump(TRI));
+  DEBUG(RPTracker.dump());
 
   // Initialize the live ins and live outs.
   TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
@@ -479,6 +476,13 @@ void ScheduleDAGMI::initRegPressure() {
   TopRPTracker.closeTop();
   BotRPTracker.closeBottom();
 
+  BotRPTracker.initLiveThru(RPTracker);
+  if (!BotRPTracker.getLiveThru().empty()) {
+    TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());
+    DEBUG(dbgs() << "Live Thru: ";
+          dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
+  };
+
   // Account for liveness generated by the region boundary.
   if (LiveRegionEnd != RegionEnd)
     BotRPTracker.recede();
@@ -491,12 +495,13 @@ void ScheduleDAGMI::initRegPressure() {
   const std::vector<unsigned> &RegionPressure =
     RPTracker.getPressure().MaxSetPressure;
   for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
-    unsigned Limit = TRI->getRegPressureSetLimit(i);
-    DEBUG(dbgs() << TRI->getRegPressureSetName(i)
-          << "Limit " << Limit
-          << " Actual " << RegionPressure[i] << "\n");
-    if (RegionPressure[i] > Limit)
+    unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
+    if (RegionPressure[i] > Limit) {
+      DEBUG(dbgs() << TRI->getRegPressureSetName(i)
+            << " Limit " << Limit
+            << " Actual " << RegionPressure[i] << "\n");
       RegionCriticalPSets.push_back(PressureElement(i, 0));
+    }
   }
   DEBUG(dbgs() << "Excess PSets: ";
         for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i)
@@ -517,7 +522,7 @@ updateScheduledPressure(const std::vector<unsigned> &NewMaxPressure) {
   }
   DEBUG(
     for (unsigned i = 0, e = NewMaxPressure.size(); i < e; ++i) {
-      unsigned Limit = TRI->getRegPressureSetLimit(i);
+      unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
       if (NewMaxPressure[i] > Limit ) {
         dbgs() << "  " << TRI->getRegPressureSetName(i) << ": "
                << NewMaxPressure[i] << " > " << Limit << "\n";
@@ -581,7 +586,8 @@ void ScheduleDAGMI::schedule() {
 /// Build the DAG and setup three register pressure trackers.
 void ScheduleDAGMI::buildDAGWithRegPressure() {
   // Initialize the register pressure tracker used by buildSchedGraph.
-  RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
+  RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
+                 /*TrackUntiedDefs=*/true);
 
   // Account for liveness generate by the region boundary.
   if (LiveRegionEnd != RegionEnd)
@@ -1019,6 +1025,12 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG) {
                                GlobalSegment->start)) {
       return;
     }
+    // If the prior global segment may be defined by the same two-address
+    // instruction that also defines LocalLI, then can't make a hole here.
+    if (SlotIndex::isSameInstr(llvm::prior(GlobalSegment)->start,
+                               LocalLI->beginIndex())) {
+      return;
+    }
     // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
     // it would be a disconnected component in the live range.
     assert(llvm::prior(GlobalSegment)->start < LocalLI->beginIndex() &&
@@ -1101,7 +1113,7 @@ void CopyConstrain::apply(ScheduleDAGMI *DAG) {
 }
 
 //===----------------------------------------------------------------------===//
-// ConvergingScheduler - Implementation of the standard MachineSchedStrategy.
+// ConvergingScheduler - Implementation of the generic MachineSchedStrategy.
 //===----------------------------------------------------------------------===//
 
 namespace {
@@ -1112,10 +1124,9 @@ public:
   /// Represent the type of SchedCandidate found within a single queue.
   /// pickNodeBidirectional depends on these listed by decreasing priority.
   enum CandReason {
-    NoCand, PhysRegCopy, SingleExcess, SingleCritical, Cluster, Weak,
+    NoCand, PhysRegCopy, RegExcess, RegCritical, Cluster, Weak, RegMax,
     ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce,
-    TopDepthReduce, TopPathReduce, SingleMax, MultiPressure, NextDefUse,
-    NodeOrder};
+    TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder};
 
 #ifndef NDEBUG
   static const char *getReasonStr(ConvergingScheduler::CandReason Reason);
@@ -1160,6 +1171,9 @@ public:
     // The reason for this candidate.
     CandReason Reason;
 
+    // Set of reasons that apply to multiple candidates.
+    uint32_t RepeatReasonSet;
+
     // Register pressure values for the best candidate.
     RegPressureDelta RPDelta;
 
@@ -1167,7 +1181,7 @@ public:
     SchedResourceDelta ResDelta;
 
     SchedCandidate(const CandPolicy &policy)
-    : Policy(policy), SU(NULL), Reason(NoCand) {}
+      : Policy(policy), SU(NULL), Reason(NoCand), RepeatReasonSet(0) {}
 
     bool isValid() const { return SU; }
 
@@ -1180,6 +1194,9 @@ public:
       ResDelta = Best.ResDelta;
     }
 
+    bool isRepeat(CandReason R) { return RepeatReasonSet & (1 << R); }
+    void setRepeat(CandReason R) { RepeatReasonSet |= (1 << R); }
+
     void initResourceDelta(const ScheduleDAGMI *DAG,
                            const TargetSchedModel *SchedModel);
   };
@@ -1189,32 +1206,21 @@ public:
     // Critical path through the DAG in expected latency.
     unsigned CriticalPath;
 
+    // Scaled count of micro-ops left to schedule.
+    unsigned RemIssueCount;
+
     // Unscheduled resources
     SmallVector<unsigned, 16> RemainingCounts;
-    // Critical resource for the unscheduled zone.
-    unsigned CritResIdx;
-    // Number of micro-ops left to schedule.
-    unsigned RemainingMicroOps;
 
     void reset() {
       CriticalPath = 0;
+      RemIssueCount = 0;
       RemainingCounts.clear();
-      CritResIdx = 0;
-      RemainingMicroOps = 0;
     }
 
     SchedRemainder() { reset(); }
 
     void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel);
-
-    unsigned getMaxRemainingCount(const TargetSchedModel *SchedModel) const {
-      if (!SchedModel->hasInstrSchedModel())
-        return 0;
-
-      return std::max(
-        RemainingMicroOps * SchedModel->getMicroOpFactor(),
-        RemainingCounts[CritResIdx]);
-    }
   };
 
   /// Each Scheduling boundary is associated with ready queues. It tracks the
@@ -1235,8 +1241,13 @@ public:
 
     ScheduleHazardRecognizer *HazardRec;
 
+    /// Number of cycles it takes to issue the instructions scheduled in this
+    /// zone. It is defined as: scheduled-micro-ops / issue-width + stalls.
+    /// See getStalls().
     unsigned CurrCycle;
-    unsigned IssueCount;
+
+    /// Micro-ops issued in the current cycle
+    unsigned CurrMOps;
 
     /// MinReadyCycle - Cycle of the soonest available instruction.
     unsigned MinReadyCycle;
@@ -1244,20 +1255,35 @@ public:
     // The expected latency of the critical path in this scheduled zone.
     unsigned ExpectedLatency;
 
-    // Resources used in the scheduled zone beyond this boundary.
-    SmallVector<unsigned, 16> ResourceCounts;
+    // The latency of dependence chains leading into this zone.
+    // For each node scheduled bottom-up: DLat = max DLat, N.Depth.
+    // For each cycle scheduled: DLat -= 1.
+    unsigned DependentLatency;
+
+    /// Count the scheduled (issued) micro-ops that can be retired by
+    /// time=CurrCycle assuming the first scheduled instr is retired at time=0.
+    unsigned RetiredMOps;
+
+    // Count scheduled resources that have been executed. Resources are
+    // considered executed if they become ready in the time that it takes to
+    // saturate any resource including the one in question. Counts are scaled
+    // for direct comparison with other resources. Counts can be compared with
+    // MOps * getMicroOpFactor and Latency * getLatencyFactor.
+    SmallVector<unsigned, 16> ExecutedResCounts;
+
+    /// Cache the max count for a single resource.
+    unsigned MaxExecutedResCount;
 
     // Cache the critical resources ID in this scheduled zone.
-    unsigned CritResIdx;
+    unsigned ZoneCritResIdx;
 
     // Is the scheduled region resource limited vs. latency limited.
     bool IsResourceLimited;
 
-    unsigned ExpectedCount;
-
 #ifndef NDEBUG
-    // Remember the greatest min operand latency.
-    unsigned MaxMinLatency;
+    // Remember the greatest operand latency as an upper bound on the number of
+    // times we should retry the pending queue because of a hazard.
+    unsigned MaxObservedLatency;
 #endif
 
     void reset() {
@@ -1270,19 +1296,20 @@ public:
       NextSUs.clear();
       HazardRec = 0;
       CurrCycle = 0;
-      IssueCount = 0;
+      CurrMOps = 0;
       MinReadyCycle = UINT_MAX;
       ExpectedLatency = 0;
-      ResourceCounts.resize(1);
-      assert(!ResourceCounts[0] && "nonzero count for bad resource");
-      CritResIdx = 0;
+      DependentLatency = 0;
+      RetiredMOps = 0;
+      MaxExecutedResCount = 0;
+      ZoneCritResIdx = 0;
       IsResourceLimited = false;
-      ExpectedCount = 0;
 #ifndef NDEBUG
-      MaxMinLatency = 0;
+      MaxObservedLatency = 0;
 #endif
       // Reserve a zero-count for invalid CritResIdx.
-      ResourceCounts.resize(1);
+      ExecutedResCounts.resize(1);
+      assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
     }
 
     /// Pending queues extend the ready queues with the same ID and the
@@ -1303,25 +1330,60 @@ public:
       return Available.getID() == ConvergingScheduler::TopQID;
     }
 
+#ifndef NDEBUG
+    const char *getResourceName(unsigned PIdx) {
+      if (!PIdx)
+        return "MOps";
+      return SchedModel->getProcResource(PIdx)->Name;
+    }
+#endif
+
+    /// Get the number of latency cycles "covered" by the scheduled
+    /// instructions. This is the larger of the critical path within the zone
+    /// and the number of cycles required to issue the instructions.
+    unsigned getScheduledLatency() const {
+      return std::max(ExpectedLatency, CurrCycle);
+    }
+
     unsigned getUnscheduledLatency(SUnit *SU) const {
-      if (isTop())
-        return SU->getHeight();
-      return SU->getDepth() + SU->Latency;
+      return isTop() ? SU->getHeight() : SU->getDepth();
+    }
+
+    unsigned getResourceCount(unsigned ResIdx) const {
+      return ExecutedResCounts[ResIdx];
     }
 
+    /// Get the scaled count of scheduled micro-ops and resources, including
+    /// executed resources.
     unsigned getCriticalCount() const {
-      return ResourceCounts[CritResIdx];
+      if (!ZoneCritResIdx)
+        return RetiredMOps * SchedModel->getMicroOpFactor();
+      return getResourceCount(ZoneCritResIdx);
+    }
+
+    /// Get a scaled count for the minimum execution time of the scheduled
+    /// micro-ops that are ready to execute by getExecutedCount. Notice the
+    /// feedback loop.
+    unsigned getExecutedCount() const {
+      return std::max(CurrCycle * SchedModel->getLatencyFactor(),
+                      MaxExecutedResCount);
     }
 
     bool checkHazard(SUnit *SU);
 
-    void setLatencyPolicy(CandPolicy &Policy);
+    unsigned findMaxLatency(ArrayRef<SUnit*> ReadySUs);
+
+    unsigned getOtherResourceCount(unsigned &OtherCritIdx);
+
+    void setPolicy(CandPolicy &Policy, SchedBoundary &OtherZone);
 
     void releaseNode(SUnit *SU, unsigned ReadyCycle);
 
-    void bumpCycle();
+    void bumpCycle(unsigned NextCycle);
+
+    void incExecutedResources(unsigned PIdx, unsigned Count);
 
-    void countResource(unsigned PIdx, unsigned Cycles);
+    unsigned countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle);
 
     void bumpNode(SUnit *SU);
 
@@ -1330,6 +1392,10 @@ public:
     void removeReady(SUnit *SU);
 
     SUnit *pickOnlyChoice();
+
+#ifndef NDEBUG
+    void dumpScheduledState();
+#endif
   };
 
 private:
@@ -1366,15 +1432,6 @@ public:
   virtual void registerRoots();
 
 protected:
-  void balanceZones(
-    ConvergingScheduler::SchedBoundary &CriticalZone,
-    ConvergingScheduler::SchedCandidate &CriticalCand,
-    ConvergingScheduler::SchedBoundary &OppositeZone,
-    ConvergingScheduler::SchedCandidate &OppositeCand);
-
-  void checkResourceLimits(ConvergingScheduler::SchedCandidate &TopCand,
-                           ConvergingScheduler::SchedCandidate &BotCand);
-
   void tryCandidate(SchedCandidate &Cand,
                     SchedCandidate &TryCand,
                     SchedBoundary &Zone,
@@ -1404,7 +1461,8 @@ init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
   for (std::vector<SUnit>::iterator
          I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) {
     const MCSchedClassDesc *SC = DAG->getSchedClass(&*I);
-    RemainingMicroOps += SchedModel->getNumMicroOps(I->getInstr(), SC);
+    RemIssueCount += SchedModel->getNumMicroOps(I->getInstr(), SC)
+      * SchedModel->getMicroOpFactor();
     for (TargetSchedModel::ProcResIter
            PI = SchedModel->getWriteProcResBegin(SC),
            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
@@ -1413,13 +1471,6 @@ init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
       RemainingCounts[PIdx] += (Factor * PI->Cycles);
     }
   }
-  for (unsigned PIdx = 0, PEnd = SchedModel->getNumProcResourceKinds();
-       PIdx != PEnd; ++PIdx) {
-    if ((int)(RemainingCounts[PIdx] - RemainingCounts[CritResIdx])
-        >= (int)SchedModel->getLatencyFactor()) {
-      CritResIdx = PIdx;
-    }
-  }
 }
 
 void ConvergingScheduler::SchedBoundary::
@@ -1429,7 +1480,7 @@ init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
   SchedModel = smodel;
   Rem = rem;
   if (SchedModel->hasInstrSchedModel())
-    ResourceCounts.resize(SchedModel->getNumProcResourceKinds());
+    ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
 }
 
 void ConvergingScheduler::initialize(ScheduleDAGMI *dag) {
@@ -1460,13 +1511,15 @@ void ConvergingScheduler::releaseTopNode(SUnit *SU) {
 
   for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
        I != E; ++I) {
+    if (I->isWeak())
+      continue;
     unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
-    unsigned MinLatency = I->getMinLatency();
+    unsigned Latency = I->getLatency();
 #ifndef NDEBUG
-    Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
+    Top.MaxObservedLatency = std::max(Latency, Top.MaxObservedLatency);
 #endif
-    if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
-      SU->TopReadyCycle = PredReadyCycle + MinLatency;
+    if (SU->TopReadyCycle < PredReadyCycle + Latency)
+      SU->TopReadyCycle = PredReadyCycle + Latency;
   }
   Top.releaseNode(SU, SU->TopReadyCycle);
 }
@@ -1482,12 +1535,12 @@ void ConvergingScheduler::releaseBottomNode(SUnit *SU) {
     if (I->isWeak())
       continue;
     unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
-    unsigned MinLatency = I->getMinLatency();
+    unsigned Latency = I->getLatency();
 #ifndef NDEBUG
-    Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
+    Bot.MaxObservedLatency = std::max(Latency, Bot.MaxObservedLatency);
 #endif
-    if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
-      SU->BotReadyCycle = SuccReadyCycle + MinLatency;
+    if (SU->BotReadyCycle < SuccReadyCycle + Latency)
+      SU->BotReadyCycle = SuccReadyCycle + Latency;
   }
   Bot.releaseNode(SU, SU->BotReadyCycle);
 }
@@ -1521,7 +1574,7 @@ bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) {
     return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
 
   unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
-  if ((IssueCount > 0) && (IssueCount + uops > SchedModel->getIssueWidth())) {
+  if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
     DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") uops="
           << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
     return true;
@@ -1529,45 +1582,125 @@ bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) {
   return false;
 }
 
-/// Compute the remaining latency to determine whether ILP should be increased.
-void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) {
-  // FIXME: compile time. In all, we visit four queues here one we should only
-  // need to visit the one that was last popped if we cache the result.
+// Find the unscheduled node in ReadySUs with the highest latency.
+unsigned ConvergingScheduler::SchedBoundary::
+findMaxLatency(ArrayRef<SUnit*> ReadySUs) {
+  SUnit *LateSU = 0;
   unsigned RemLatency = 0;
-  for (ReadyQueue::iterator I = Available.begin(), E = Available.end();
+  for (ArrayRef<SUnit*>::iterator I = ReadySUs.begin(), E = ReadySUs.end();
        I != E; ++I) {
     unsigned L = getUnscheduledLatency(*I);
-    DEBUG(dbgs() << "  " << Available.getName()
-          << " RemLatency SU(" << (*I)->NodeNum << ") " << L << '\n');
-    if (L > RemLatency)
+    if (L > RemLatency) {
       RemLatency = L;
+      LateSU = *I;
+    }
   }
-  for (ReadyQueue::iterator I = Pending.begin(), E = Pending.end();
-       I != E; ++I) {
-    unsigned L = getUnscheduledLatency(*I);
-    if (L > RemLatency)
-      RemLatency = L;
+  if (LateSU) {
+    DEBUG(dbgs() << Available.getName() << " RemLatency SU("
+          << LateSU->NodeNum << ") " << RemLatency << "c\n");
+  }
+  return RemLatency;
+}
+
+// Count resources in this zone and the remaining unscheduled
+// instruction. Return the max count, scaled. Set OtherCritIdx to the critical
+// resource index, or zero if the zone is issue limited.
+unsigned ConvergingScheduler::SchedBoundary::
+getOtherResourceCount(unsigned &OtherCritIdx) {
+  OtherCritIdx = 0;
+  if (!SchedModel->hasInstrSchedModel())
+    return 0;
+
+  unsigned OtherCritCount = Rem->RemIssueCount
+    + (RetiredMOps * SchedModel->getMicroOpFactor());
+  DEBUG(dbgs() << "  " << Available.getName() << " + Remain MOps: "
+        << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
+  for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
+       PIdx != PEnd; ++PIdx) {
+    unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
+    if (OtherCount > OtherCritCount) {
+      OtherCritCount = OtherCount;
+      OtherCritIdx = PIdx;
+    }
+  }
+  if (OtherCritIdx) {
+    DEBUG(dbgs() << "  " << Available.getName() << " + Remain CritRes: "
+          << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
+          << " " << getResourceName(OtherCritIdx) << "\n");
+  }
+  return OtherCritCount;
+}
+
+/// Set the CandPolicy for this zone given the current resources and latencies
+/// inside and outside the zone.
+void ConvergingScheduler::SchedBoundary::setPolicy(CandPolicy &Policy,
+                                                   SchedBoundary &OtherZone) {
+  // Now that potential stalls have been considered, apply preemptive heuristics
+  // based on the the total latency and resources inside and outside this
+  // zone.
+
+  // Compute remaining latency. We need this both to determine whether the
+  // overall schedule has become latency-limited and whether the instructions
+  // outside this zone are resource or latency limited.
+  //
+  // The "dependent" latency is updated incrementally during scheduling as the
+  // max height/depth of scheduled nodes minus the cycles since it was
+  // scheduled:
+  //   DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
+  //
+  // The "independent" latency is the max ready queue depth:
+  //   ILat = max N.depth for N in Available|Pending
+  //
+  // RemainingLatency is the greater of independent and dependent latency.
+  unsigned RemLatency = DependentLatency;
+  RemLatency = std::max(RemLatency, findMaxLatency(Available.elements()));
+  RemLatency = std::max(RemLatency, findMaxLatency(Pending.elements()));
+
+  // Compute the critical resource outside the zone.
+  unsigned OtherCritIdx;
+  unsigned OtherCount = OtherZone.getOtherResourceCount(OtherCritIdx);
+
+  bool OtherResLimited = false;
+  if (SchedModel->hasInstrSchedModel()) {
+    unsigned LFactor = SchedModel->getLatencyFactor();
+    OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor;
   }
-  unsigned CriticalPathLimit = Rem->CriticalPath + SchedModel->getILPWindow();
-  DEBUG(dbgs() << "  " << Available.getName()
-        << " ExpectedLatency " << ExpectedLatency
-        << " CP Limit " << CriticalPathLimit << '\n');
-  if (RemLatency + ExpectedLatency >= CriticalPathLimit
-      && RemLatency > Rem->getMaxRemainingCount(SchedModel)) {
-    Policy.ReduceLatency = true;
-    DEBUG(dbgs() << "  Increase ILP: " << Available.getName() << '\n');
+  if (!OtherResLimited && (RemLatency + CurrCycle > Rem->CriticalPath)) {
+    Policy.ReduceLatency |= true;
+    DEBUG(dbgs() << "  " << Available.getName() << " RemainingLatency "
+          << RemLatency << " + " << CurrCycle << "c > CritPath "
+          << Rem->CriticalPath << "\n");
   }
+  // If the same resource is limiting inside and outside the zone, do nothing.
+  if (ZoneCritResIdx == OtherCritIdx)
+    return;
+
+  DEBUG(
+    if (IsResourceLimited) {
+      dbgs() << "  " << Available.getName() << " ResourceLimited: "
+             << getResourceName(ZoneCritResIdx) << "\n";
+    }
+    if (OtherResLimited)
+      dbgs() << "  RemainingLimit: " << getResourceName(OtherCritIdx) << "\n";
+    if (!IsResourceLimited && !OtherResLimited)
+      dbgs() << "  Latency limited both directions.\n");
+
+  if (IsResourceLimited && !Policy.ReduceResIdx)
+    Policy.ReduceResIdx = ZoneCritResIdx;
+
+  if (OtherResLimited)
+    Policy.DemandResIdx = OtherCritIdx;
 }
 
 void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU,
                                                      unsigned ReadyCycle) {
-
   if (ReadyCycle < MinReadyCycle)
     MinReadyCycle = ReadyCycle;
 
   // Check for interlocks first. For the purpose of other heuristics, an
   // instruction that cannot issue appears as if it's not in the ReadyQueue.
-  if (ReadyCycle > CurrCycle || checkHazard(SU))
+  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
+  if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU))
     Pending.push(SU);
   else
     Available.push(SU);
@@ -1577,16 +1710,21 @@ void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU,
 }
 
 /// Move the boundary of scheduled code by one cycle.
-void ConvergingScheduler::SchedBoundary::bumpCycle() {
-  unsigned Width = SchedModel->getIssueWidth();
-  IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
-
-  unsigned NextCycle = CurrCycle + 1;
-  assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
-  if (MinReadyCycle > NextCycle) {
-    IssueCount = 0;
-    NextCycle = MinReadyCycle;
-  }
+void ConvergingScheduler::SchedBoundary::bumpCycle(unsigned NextCycle) {
+  if (SchedModel->getMicroOpBufferSize() == 0) {
+    assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
+    if (MinReadyCycle > NextCycle)
+      NextCycle = MinReadyCycle;
+  }
+  // Update the current micro-ops, which will issue in the next cycle.
+  unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
+  CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
+
+  // Decrement DependentLatency based on the next cycle.
+  if ((NextCycle - CurrCycle) > DependentLatency)
+    DependentLatency = 0;
+  else
+    DependentLatency -= (NextCycle - CurrCycle);
 
   if (!HazardRec->isEnabled()) {
     // Bypass HazardRec virtual calls.
@@ -1602,34 +1740,50 @@ void ConvergingScheduler::SchedBoundary::bumpCycle() {
     }
   }
   CheckPending = true;
-  IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle);
+  unsigned LFactor = SchedModel->getLatencyFactor();
+  IsResourceLimited =
+    (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
+    > (int)LFactor;
+
+  DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n');
+}
 
-  DEBUG(dbgs() << "  " << Available.getName()
-        << " Cycle: " << CurrCycle << '\n');
+void ConvergingScheduler::SchedBoundary::incExecutedResources(unsigned PIdx,
+                                                              unsigned Count) {
+  ExecutedResCounts[PIdx] += Count;
+  if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
+    MaxExecutedResCount = ExecutedResCounts[PIdx];
 }
 
 /// Add the given processor resource to this scheduled zone.
-void ConvergingScheduler::SchedBoundary::countResource(unsigned PIdx,
-                                                       unsigned Cycles) {
+///
+/// \param Cycles indicates the number of consecutive (non-pipelined) cycles
+/// during which this resource is consumed.
+///
+/// \return the next cycle at which the instruction may execute without
+/// oversubscribing resources.
+unsigned ConvergingScheduler::SchedBoundary::
+countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle) {
   unsigned Factor = SchedModel->getResourceFactor(PIdx);
-  DEBUG(dbgs() << "  " << SchedModel->getProcResource(PIdx)->Name
-        << " +(" << Cycles << "x" << Factor
-        << ") / " << SchedModel->getLatencyFactor() << '\n');
-
   unsigned Count = Factor * Cycles;
-  ResourceCounts[PIdx] += Count;
+  DEBUG(dbgs() << "  " << getResourceName(PIdx)
+        << " +" << Cycles << "x" << Factor << "u\n");
+
+  // Update Executed resources counts.
+  incExecutedResources(PIdx, Count);
   assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
   Rem->RemainingCounts[PIdx] -= Count;
 
-  // Check if this resource exceeds the current critical resource by a full
-  // cycle. If so, it becomes the critical resource.
-  if ((int)(ResourceCounts[PIdx] - ResourceCounts[CritResIdx])
-      >= (int)SchedModel->getLatencyFactor()) {
-    CritResIdx = PIdx;
+  // Check if this resource exceeds the current critical resource. If so, it
+  // becomes the critical resource.
+  if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
+    ZoneCritResIdx = PIdx;
     DEBUG(dbgs() << "  *** Critical resource "
-          << SchedModel->getProcResource(PIdx)->Name << " x"
-          << ResourceCounts[PIdx] << '\n');
+          << getResourceName(PIdx) << ": "
+          << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n");
   }
+  // TODO: We don't yet model reserved resources. It's not hard though.
+  return CurrCycle;
 }
 
 /// Move the boundary of scheduled code by one SUnit.
@@ -1643,40 +1797,96 @@ void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) {
     }
     HazardRec->EmitInstruction(SU);
   }
+  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+  unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
+  CurrMOps += IncMOps;
+  // checkHazard prevents scheduling multiple instructions per cycle that exceed
+  // issue width. However, we commonly reach the maximum. In this case
+  // opportunistically bump the cycle to avoid uselessly checking everything in
+  // the readyQ. Furthermore, a single instruction may produce more than one
+  // cycle's worth of micro-ops.
+  //
+  // TODO: Also check if this SU must end a dispatch group.
+  unsigned NextCycle = CurrCycle;
+  if (CurrMOps >= SchedModel->getIssueWidth()) {
+    ++NextCycle;
+    DEBUG(dbgs() << "  *** Max MOps " << CurrMOps
+          << " at cycle " << CurrCycle << '\n');
+  }
+  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
+  DEBUG(dbgs() << "  Ready @" << ReadyCycle << "c\n");
+
+  switch (SchedModel->getMicroOpBufferSize()) {
+  case 0:
+    assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
+    break;
+  case 1:
+    if (ReadyCycle > NextCycle) {
+      NextCycle = ReadyCycle;
+      DEBUG(dbgs() << "  *** Stall until: " << ReadyCycle << "\n");
+    }
+    break;
+  default:
+    // We don't currently model the OOO reorder buffer, so consider all
+    // scheduled MOps to be "retired".
+    break;
+  }
+  RetiredMOps += IncMOps;
+
   // Update resource counts and critical resource.
   if (SchedModel->hasInstrSchedModel()) {
-    const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
-    Rem->RemainingMicroOps -= SchedModel->getNumMicroOps(SU->getInstr(), SC);
+    unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
+    assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
+    Rem->RemIssueCount -= DecRemIssue;
+    if (ZoneCritResIdx) {
+      // Scale scheduled micro-ops for comparing with the critical resource.
+      unsigned ScaledMOps =
+        RetiredMOps * SchedModel->getMicroOpFactor();
+
+      // If scaled micro-ops are now more than the previous critical resource by
+      // a full cycle, then micro-ops issue becomes critical.
+      if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
+          >= (int)SchedModel->getLatencyFactor()) {
+        ZoneCritResIdx = 0;
+        DEBUG(dbgs() << "  *** Critical resource NumMicroOps: "
+              << ScaledMOps / SchedModel->getLatencyFactor() << "c\n");
+      }
+    }
     for (TargetSchedModel::ProcResIter
            PI = SchedModel->getWriteProcResBegin(SC),
            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
-      countResource(PI->ProcResourceIdx, PI->Cycles);
+      unsigned RCycle =
+        countResource(PI->ProcResourceIdx, PI->Cycles, ReadyCycle);
+      if (RCycle > NextCycle)
+        NextCycle = RCycle;
     }
   }
-  if (isTop()) {
-    if (SU->getDepth() > ExpectedLatency)
-      ExpectedLatency = SU->getDepth();
+  // Update ExpectedLatency and DependentLatency.
+  unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
+  unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
+  if (SU->getDepth() > TopLatency) {
+    TopLatency = SU->getDepth();
+    DEBUG(dbgs() << "  " << Available.getName()
+          << " TopLatency SU(" << SU->NodeNum << ") " << TopLatency << "c\n");
   }
-  else {
-    if (SU->getHeight() > ExpectedLatency)
-      ExpectedLatency = SU->getHeight();
+  if (SU->getHeight() > BotLatency) {
+    BotLatency = SU->getHeight();
+    DEBUG(dbgs() << "  " << Available.getName()
+          << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n");
   }
-
-  IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle);
-
-  // Check the instruction group dispatch limit.
-  // TODO: Check if this SU must end a dispatch group.
-  IssueCount += SchedModel->getNumMicroOps(SU->getInstr());
-
-  // checkHazard prevents scheduling multiple instructions per cycle that exceed
-  // issue width. However, we commonly reach the maximum. In this case
-  // opportunistically bump the cycle to avoid uselessly checking everything in
-  // the readyQ. Furthermore, a single instruction may produce more than one
-  // cycle's worth of micro-ops.
-  if (IssueCount >= SchedModel->getIssueWidth()) {
-    DEBUG(dbgs() << "  *** Max instrs at cycle " << CurrCycle << '\n');
-    bumpCycle();
+  // If we stall for any reason, bump the cycle.
+  if (NextCycle > CurrCycle) {
+    bumpCycle(NextCycle);
+  }
+  else {
+    // After updating ZoneCritResIdx and ExpectedLatency, check if we're
+    // resource limited. If a stall occured, bumpCycle does this.
+    unsigned LFactor = SchedModel->getLatencyFactor();
+    IsResourceLimited =
+      (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
+      > (int)LFactor;
   }
+  DEBUG(dumpScheduledState());
 }
 
 /// Release pending ready nodes in to the available queue. This makes them
@@ -1688,6 +1898,7 @@ void ConvergingScheduler::SchedBoundary::releasePending() {
 
   // Check to see if any of the pending instructions are ready to issue.  If
   // so, add them to the available queue.
+  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
   for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
     SUnit *SU = *(Pending.begin()+i);
     unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
@@ -1695,7 +1906,7 @@ void ConvergingScheduler::SchedBoundary::releasePending() {
     if (ReadyCycle < MinReadyCycle)
       MinReadyCycle = ReadyCycle;
 
-    if (ReadyCycle > CurrCycle)
+    if (!IsBuffered && ReadyCycle > CurrCycle)
       continue;
 
     if (checkHazard(SU))
@@ -1726,7 +1937,7 @@ SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() {
   if (CheckPending)
     releasePending();
 
-  if (IssueCount > 0) {
+  if (CurrMOps > 0) {
     // Defer any ready instrs that now have a hazard.
     for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
       if (checkHazard(*I)) {
@@ -1738,9 +1949,9 @@ SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() {
     }
   }
   for (unsigned i = 0; Available.empty(); ++i) {
-    assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
+    assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedLatency) &&
            "permanent hazard"); (void)i;
-    bumpCycle();
+    bumpCycle(CurrCycle + 1);
     releasePending();
   }
   if (Available.size() == 1)
@@ -1748,104 +1959,31 @@ SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() {
   return NULL;
 }
 
-/// Record the candidate policy for opposite zones with different critical
-/// resources.
-///
-/// If the CriticalZone is latency limited, don't force a policy for the
-/// candidates here. Instead, setLatencyPolicy sets ReduceLatency if needed.
-void ConvergingScheduler::balanceZones(
-  ConvergingScheduler::SchedBoundary &CriticalZone,
-  ConvergingScheduler::SchedCandidate &CriticalCand,
-  ConvergingScheduler::SchedBoundary &OppositeZone,
-  ConvergingScheduler::SchedCandidate &OppositeCand) {
-
-  if (!CriticalZone.IsResourceLimited)
-    return;
-  assert(SchedModel->hasInstrSchedModel() && "required schedmodel");
-
-  SchedRemainder *Rem = CriticalZone.Rem;
-
-  // If the critical zone is overconsuming a resource relative to the
-  // remainder, try to reduce it.
-  unsigned RemainingCritCount =
-    Rem->RemainingCounts[CriticalZone.CritResIdx];
-  if ((int)(Rem->getMaxRemainingCount(SchedModel) - RemainingCritCount)
-      > (int)SchedModel->getLatencyFactor()) {
-    CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx;
-    DEBUG(dbgs() << "  Balance " << CriticalZone.Available.getName()
-          << " reduce "
-          << SchedModel->getProcResource(CriticalZone.CritResIdx)->Name
-          << '\n');
-  }
-  // If the other zone is underconsuming a resource relative to the full zone,
-  // try to increase it.
-  unsigned OppositeCount =
-    OppositeZone.ResourceCounts[CriticalZone.CritResIdx];
-  if ((int)(OppositeZone.ExpectedCount - OppositeCount)
-      > (int)SchedModel->getLatencyFactor()) {
-    OppositeCand.Policy.DemandResIdx = CriticalZone.CritResIdx;
-    DEBUG(dbgs() << "  Balance " << OppositeZone.Available.getName()
-          << " demand "
-          << SchedModel->getProcResource(OppositeZone.CritResIdx)->Name
-          << '\n');
-  }
-}
-
-/// Determine if the scheduled zones exceed resource limits or critical path and
-/// set each candidate's ReduceHeight policy accordingly.
-void ConvergingScheduler::checkResourceLimits(
-  ConvergingScheduler::SchedCandidate &TopCand,
-  ConvergingScheduler::SchedCandidate &BotCand) {
-
-  // Set ReduceLatency to true if needed.
-  Bot.setLatencyPolicy(BotCand.Policy);
-  Top.setLatencyPolicy(TopCand.Policy);
-
-  // Handle resource-limited regions.
-  if (Top.IsResourceLimited && Bot.IsResourceLimited
-      && Top.CritResIdx == Bot.CritResIdx) {
-    // If the scheduled critical resource in both zones is no longer the
-    // critical remaining resource, attempt to reduce resource height both ways.
-    if (Top.CritResIdx != Rem.CritResIdx) {
-      TopCand.Policy.ReduceResIdx = Top.CritResIdx;
-      BotCand.Policy.ReduceResIdx = Bot.CritResIdx;
-      DEBUG(dbgs() << "  Reduce scheduled "
-            << SchedModel->getProcResource(Top.CritResIdx)->Name << '\n');
-    }
-    return;
-  }
-  // Handle latency-limited regions.
-  if (!Top.IsResourceLimited && !Bot.IsResourceLimited) {
-    // If the total scheduled expected latency exceeds the region's critical
-    // path then reduce latency both ways.
-    //
-    // Just because a zone is not resource limited does not mean it is latency
-    // limited. Unbuffered resource, such as max micro-ops may cause CurrCycle
-    // to exceed expected latency.
-    if ((Top.ExpectedLatency + Bot.ExpectedLatency >= Rem.CriticalPath)
-        && (Rem.CriticalPath > Top.CurrCycle + Bot.CurrCycle)) {
-      TopCand.Policy.ReduceLatency = true;
-      BotCand.Policy.ReduceLatency = true;
-      DEBUG(dbgs() << "  Reduce scheduled latency " << Top.ExpectedLatency
-            << " + " << Bot.ExpectedLatency << '\n');
-    }
-    return;
+#ifndef NDEBUG
+// This is useful information to dump after bumpNode.
+// Note that the Queue contents are more useful before pickNodeFromQueue.
+void ConvergingScheduler::SchedBoundary::dumpScheduledState() {
+  unsigned ResFactor;
+  unsigned ResCount;
+  if (ZoneCritResIdx) {
+    ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
+    ResCount = getResourceCount(ZoneCritResIdx);
   }
-  // The critical resource is different in each zone, so request balancing.
-
-  // Compute the cost of each zone.
-  Top.ExpectedCount = std::max(Top.ExpectedLatency, Top.CurrCycle);
-  Top.ExpectedCount = std::max(
-    Top.getCriticalCount(),
-    Top.ExpectedCount * SchedModel->getLatencyFactor());
-  Bot.ExpectedCount = std::max(Bot.ExpectedLatency, Bot.CurrCycle);
-  Bot.ExpectedCount = std::max(
-    Bot.getCriticalCount(),
-    Bot.ExpectedCount * SchedModel->getLatencyFactor());
-
-  balanceZones(Top, TopCand, Bot, BotCand);
-  balanceZones(Bot, BotCand, Top, TopCand);
+  else {
+    ResFactor = SchedModel->getMicroOpFactor();
+    ResCount = RetiredMOps * SchedModel->getMicroOpFactor();
+  }
+  unsigned LFactor = SchedModel->getLatencyFactor();
+  dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
+         << "  Retired: " << RetiredMOps;
+  dbgs() << "\n  Executed: " << getExecutedCount() / LFactor << "c";
+  dbgs() << "\n  Critical: " << ResCount / LFactor << "c, "
+         << ResCount / ResFactor << " " << getResourceName(ZoneCritResIdx)
+         << "\n  ExpectedLatency: " << ExpectedLatency << "c\n"
+         << (IsResourceLimited ? "  - Resource" : "  - Latency")
+         << " limited.\n";
 }
+#endif
 
 void ConvergingScheduler::SchedCandidate::
 initResourceDelta(const ScheduleDAGMI *DAG,
@@ -1864,6 +2002,7 @@ initResourceDelta(const ScheduleDAGMI *DAG,
   }
 }
 
+
 /// Return true if this heuristic determines order.
 static bool tryLess(int TryVal, int CandVal,
                     ConvergingScheduler::SchedCandidate &TryCand,
@@ -1878,6 +2017,7 @@ static bool tryLess(int TryVal, int CandVal,
       Cand.Reason = Reason;
     return true;
   }
+  Cand.setRepeat(Reason);
   return false;
 }
 
@@ -1894,9 +2034,34 @@ static bool tryGreater(int TryVal, int CandVal,
       Cand.Reason = Reason;
     return true;
   }
+  Cand.setRepeat(Reason);
   return false;
 }
 
+static bool tryPressure(const PressureElement &TryP,
+                        const PressureElement &CandP,
+                        ConvergingScheduler::SchedCandidate &TryCand,
+                        ConvergingScheduler::SchedCandidate &Cand,
+                        ConvergingScheduler::CandReason Reason) {
+  // If both candidates affect the same set, go with the smallest increase.
+  if (TryP.PSetID == CandP.PSetID) {
+    return tryLess(TryP.UnitIncrease, CandP.UnitIncrease, TryCand, Cand,
+                   Reason);
+  }
+  // If one candidate decreases and the other increases, go with it.
+  if (tryLess(TryP.UnitIncrease < 0, CandP.UnitIncrease < 0, TryCand, Cand,
+              Reason)) {
+    return true;
+  }
+  // If TryP has lower Rank, it has a higher priority.
+  int TryRank = TryP.PSetRank();
+  int CandRank = CandP.PSetRank();
+  // If the candidates are decreasing pressure, reverse priority.
+  if (TryP.UnitIncrease < 0)
+    std::swap(TryRank, CandRank);
+  return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
+}
+
 static unsigned getWeakLeft(const SUnit *SU, bool isTop) {
   return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
 }
@@ -1962,20 +2127,16 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
                  TryCand, Cand, PhysRegCopy))
     return;
 
-  // Avoid exceeding the target's limit.
-  if (tryLess(TryCand.RPDelta.Excess.UnitIncrease,
-              Cand.RPDelta.Excess.UnitIncrease, TryCand, Cand, SingleExcess))
+  // Avoid exceeding the target's limit. If signed PSetID is negative, it is
+  // invalid; convert it to INT_MAX to give it lowest priority.
+  if (tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand,
+                  RegExcess))
     return;
-  if (Cand.Reason == SingleExcess)
-    Cand.Reason = MultiPressure;
 
   // Avoid increasing the max critical pressure in the scheduled region.
-  if (tryLess(TryCand.RPDelta.CriticalMax.UnitIncrease,
-              Cand.RPDelta.CriticalMax.UnitIncrease,
-              TryCand, Cand, SingleCritical))
+  if (tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax,
+                  TryCand, Cand, RegCritical))
     return;
-  if (Cand.Reason == SingleCritical)
-    Cand.Reason = MultiPressure;
 
   // Keep clustered nodes together to encourage downstream peephole
   // optimizations which may reduce resource requirements.
@@ -1990,17 +2151,16 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
     return;
 
   // Weak edges are for clustering and other constraints.
-  //
-  // Deferring TryCand here does not change Cand's reason. This is good in the
-  // sense that a bad candidate shouldn't affect a previous candidate's
-  // goodness, but bad in that it is assymetric and depends on queue order.
-  CandReason OrigReason = Cand.Reason;
   if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()),
               getWeakLeft(Cand.SU, Zone.isTop()),
               TryCand, Cand, Weak)) {
-    Cand.Reason = OrigReason;
     return;
   }
+  // Avoid increasing the max pressure of the entire region.
+  if (tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax,
+                  TryCand, Cand, RegMax))
+    return;
+
   // Avoid critical resource consumption and balance the schedule.
   TryCand.initResourceDelta(DAG, SchedModel);
   if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
@@ -2014,8 +2174,7 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
   // Avoid serializing long latency dependence chains.
   if (Cand.Policy.ReduceLatency) {
     if (Zone.isTop()) {
-      if (Cand.SU->getDepth() * SchedModel->getLatencyFactor()
-          > Zone.ExpectedCount) {
+      if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
         if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
                     TryCand, Cand, TopDepthReduce))
           return;
@@ -2025,8 +2184,7 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
         return;
     }
     else {
-      if (Cand.SU->getHeight() * SchedModel->getLatencyFactor()
-          > Zone.ExpectedCount) {
+      if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
         if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
                     TryCand, Cand, BotHeightReduce))
           return;
@@ -2037,16 +2195,9 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
     }
   }
 
-  // Avoid increasing the max pressure of the entire region.
-  if (tryLess(TryCand.RPDelta.CurrentMax.UnitIncrease,
-              Cand.RPDelta.CurrentMax.UnitIncrease, TryCand, Cand, SingleMax))
-    return;
-  if (Cand.Reason == SingleMax)
-    Cand.Reason = MultiPressure;
-
   // Prefer immediate defs/users of the last scheduled instruction. This is a
-  // nice pressure avoidance strategy that also conserves the processor's
-  // register renaming resources and keeps the machine code readable.
+  // local pressure avoidance strategy that also makes the machine code
+  // readable.
   if (tryGreater(Zone.NextSUs.count(TryCand.SU), Zone.NextSUs.count(Cand.SU),
                  TryCand, Cand, NextDefUse))
     return;
@@ -2058,49 +2209,17 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
   }
 }
 
-/// pickNodeFromQueue helper that returns true if the LHS reg pressure effect is
-/// more desirable than RHS from scheduling standpoint.
-static bool compareRPDelta(const RegPressureDelta &LHS,
-                           const RegPressureDelta &RHS) {
-  // Compare each component of pressure in decreasing order of importance
-  // without checking if any are valid. Invalid PressureElements are assumed to
-  // have UnitIncrease==0, so are neutral.
-
-  // Avoid increasing the max critical pressure in the scheduled region.
-  if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) {
-    DEBUG(dbgs() << "  RP excess top - bot: "
-          << (LHS.Excess.UnitIncrease - RHS.Excess.UnitIncrease) << '\n');
-    return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease;
-  }
-  // Avoid increasing the max critical pressure in the scheduled region.
-  if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) {
-    DEBUG(dbgs() << "  RP critical top - bot: "
-          << (LHS.CriticalMax.UnitIncrease - RHS.CriticalMax.UnitIncrease)
-          << '\n');
-    return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease;
-  }
-  // Avoid increasing the max pressure of the entire region.
-  if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) {
-    DEBUG(dbgs() << "  RP current top - bot: "
-          << (LHS.CurrentMax.UnitIncrease - RHS.CurrentMax.UnitIncrease)
-          << '\n');
-    return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease;
-  }
-  return false;
-}
-
 #ifndef NDEBUG
 const char *ConvergingScheduler::getReasonStr(
   ConvergingScheduler::CandReason Reason) {
   switch (Reason) {
   case NoCand:         return "NOCAND    ";
   case PhysRegCopy:    return "PREG-COPY";
-  case SingleExcess:   return "REG-EXCESS";
-  case SingleCritical: return "REG-CRIT  ";
+  case RegExcess:      return "REG-EXCESS";
+  case RegCritical:    return "REG-CRIT  ";
   case Cluster:        return "CLUSTER   ";
   case Weak:           return "WEAK      ";
-  case SingleMax:      return "REG-MAX   ";
-  case MultiPressure:  return "REG-MULTI ";
+  case RegMax:         return "REG-MAX   ";
   case ResourceReduce: return "RES-REDUCE";
   case ResourceDemand: return "RES-DEMAND";
   case TopDepthReduce: return "TOP-DEPTH ";
@@ -2120,13 +2239,13 @@ void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand) {
   switch (Cand.Reason) {
   default:
     break;
-  case SingleExcess:
+  case RegExcess:
     P = Cand.RPDelta.Excess;
     break;
-  case SingleCritical:
+  case RegCritical:
     P = Cand.RPDelta.CriticalMax;
     break;
-  case SingleMax:
+  case RegMax:
     P = Cand.RPDelta.CurrentMax;
     break;
   case ResourceReduce:
@@ -2208,18 +2327,19 @@ SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) {
   // efficient, but also provides the best heuristics for CriticalPSets.
   if (SUnit *SU = Bot.pickOnlyChoice()) {
     IsTopNode = false;
-    DEBUG(dbgs() << "Pick Top NOCAND\n");
+    DEBUG(dbgs() << "Pick Bot NOCAND\n");
     return SU;
   }
   if (SUnit *SU = Top.pickOnlyChoice()) {
     IsTopNode = true;
-    DEBUG(dbgs() << "Pick Bot NOCAND\n");
+    DEBUG(dbgs() << "Pick Top NOCAND\n");
     return SU;
   }
   CandPolicy NoPolicy;
   SchedCandidate BotCand(NoPolicy);
   SchedCandidate TopCand(NoPolicy);
-  checkResourceLimits(TopCand, BotCand);
+  Bot.setPolicy(BotCand.Policy, Top);
+  Top.setPolicy(TopCand.Policy, Bot);
 
   // Prefer bottom scheduling when heuristics are silent.
   pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
@@ -2232,7 +2352,10 @@ SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) {
   // affects picking from either Q. If scheduling in one direction must
   // increase pressure for one of the excess PSets, then schedule in that
   // direction first to provide more freedom in the other direction.
-  if (BotCand.Reason == SingleExcess || BotCand.Reason == SingleCritical) {
+  if ((BotCand.Reason == RegExcess && !BotCand.isRepeat(RegExcess))
+      || (BotCand.Reason == RegCritical
+          && !BotCand.isRepeat(RegCritical)))
+  {
     IsTopNode = false;
     tracePick(BotCand, IsTopNode);
     return BotCand.SU;
@@ -2241,30 +2364,13 @@ SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) {
   pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
   assert(TopCand.Reason != NoCand && "failed to find the first candidate");
 
-  // If either Q has a single candidate that minimizes pressure above the
-  // original region's pressure pick it.
-  if (TopCand.Reason <= SingleMax || BotCand.Reason <= SingleMax) {
-    if (TopCand.Reason < BotCand.Reason) {
-      IsTopNode = true;
-      tracePick(TopCand, IsTopNode);
-      return TopCand.SU;
-    }
-    IsTopNode = false;
-    tracePick(BotCand, IsTopNode);
-    return BotCand.SU;
-  }
-  // Check for a salient pressure difference and pick the best from either side.
-  if (compareRPDelta(TopCand.RPDelta, BotCand.RPDelta)) {
-    IsTopNode = true;
-    tracePick(TopCand, IsTopNode);
-    return TopCand.SU;
-  }
-  // Otherwise prefer the bottom candidate, in node order if all else failed.
+  // Choose the queue with the most important (lowest enum) reason.
   if (TopCand.Reason < BotCand.Reason) {
     IsTopNode = true;
     tracePick(TopCand, IsTopNode);
     return TopCand.SU;
   }
+  // Otherwise prefer the bottom candidate, in node order if all else failed.
   IsTopNode = false;
   tracePick(BotCand, IsTopNode);
   return BotCand.SU;
@@ -2348,13 +2454,13 @@ void ConvergingScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
 /// them here. See comments in biasPhysRegCopy.
 void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) {
   if (IsTopNode) {
-    SU->TopReadyCycle = Top.CurrCycle;
+    SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.CurrCycle);
     Top.bumpNode(SU);
     if (SU->hasPhysRegUses)
       reschedulePhysRegCopies(SU, true);
   }
   else {
-    SU->BotReadyCycle = Bot.CurrCycle;
+    SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.CurrCycle);
     Bot.bumpNode(SU);
     if (SU->hasPhysRegDefs)
       reschedulePhysRegCopies(SU, false);
@@ -2372,8 +2478,7 @@ static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) {
   // FIXME: extend the mutation API to allow earlier mutations to instantiate
   // data and pass it to later mutations. Have a single mutation that gathers
   // the interesting nodes in one pass.
-  if (EnableCopyConstrain)
-    DAG->addMutation(new CopyConstrain(DAG->TII, DAG->TRI));
+  DAG->addMutation(new CopyConstrain(DAG->TII, DAG->TRI));
   if (EnableLoadCluster)
     DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI));
   if (EnableMacroFusion)
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index 4dafbe5..dacdbdd 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -394,7 +394,7 @@ static bool AvoidsSinking(MachineInstr *MI, MachineRegisterInfo *MRI) {
 /// collectDebgValues - Scan instructions following MI and collect any
 /// matching DBG_VALUEs.
 static void collectDebugValues(MachineInstr *MI,
-                               SmallVector<MachineInstr *, 2> & DbgValues) {
+                               SmallVectorImpl<MachineInstr *> &DbgValues) {
   DbgValues.clear();
   if (!MI->getOperand(0).isReg())
     return;
@@ -537,8 +537,8 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI,
       // We give successors with smaller loop depth higher priority.
       SmallVector<MachineBasicBlock*, 4> Succs(MBB->succ_begin(), MBB->succ_end());
       std::stable_sort(Succs.begin(), Succs.end(), SuccessorSorter(LI));
-      for (SmallVector<MachineBasicBlock*, 4>::iterator SI = Succs.begin(),
-           E = Succs.end(); SI != E; ++SI) {
+      for (SmallVectorImpl<MachineBasicBlock *>::iterator SI = Succs.begin(),
+             E = Succs.end(); SI != E; ++SI) {
         MachineBasicBlock *SuccBlock = *SI;
         bool LocalUse = false;
         if (AllUsesDominatedByBlock(Reg, SuccBlock, MBB,
@@ -697,7 +697,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
                        ++MachineBasicBlock::iterator(MI));
 
   // Move debug values.
-  for (SmallVector<MachineInstr *, 2>::iterator DBI = DbgValuesToSink.begin(),
+  for (SmallVectorImpl<MachineInstr *>::iterator DBI = DbgValuesToSink.begin(),
          DBE = DbgValuesToSink.end(); DBI != DBE; ++DBI) {
     MachineInstr *DbgMI = *DBI;
     SuccToSinkTo->splice(InsertPos, ParentBlock,  DbgMI,
diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp
index 00f702c..6aa3f67 100644
--- a/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/lib/CodeGen/MachineTraceMetrics.cpp
@@ -853,8 +853,7 @@ computeInstrDepths(const MachineBasicBlock *MBB) {
         // Add latency if DefMI is a real instruction. Transients get latency 0.
         if (!Dep.DefMI->isTransient())
           DepCycle += MTM.SchedModel
-            .computeOperandLatency(Dep.DefMI, Dep.DefOp, UseMI, Dep.UseOp,
-                                   /* FindMin = */ false);
+            .computeOperandLatency(Dep.DefMI, Dep.DefOp, UseMI, Dep.UseOp);
         Cycle = std::max(Cycle, DepCycle);
       }
       // Remember the instruction depth.
@@ -902,8 +901,7 @@ static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height,
         // We may not know the UseMI of this dependency, if it came from the
         // live-in list. SchedModel can handle a NULL UseMI.
         DepHeight += SchedModel
-          .computeOperandLatency(MI, MO.getOperandNo(), I->MI, I->Op,
-                                 /* FindMin = */ false);
+          .computeOperandLatency(MI, MO.getOperandNo(), I->MI, I->Op);
       }
       Height = std::max(Height, DepHeight);
       // This regunit is dead above MI.
@@ -941,7 +939,7 @@ static bool pushDepHeight(const DataDep &Dep,
   // Adjust height by Dep.DefMI latency.
   if (!Dep.DefMI->isTransient())
     UseHeight += SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp,
-                                                  UseMI, Dep.UseOp, false);
+                                                  UseMI, Dep.UseOp);
 
   // Update Heights[DefMI] to be the maximum height seen.
   MIHeightMap::iterator I;
@@ -1171,7 +1169,7 @@ MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr *PHI) const {
   // Add latency if DefMI is a real instruction. Transients get latency 0.
   if (!Dep.DefMI->isTransient())
     DepCycle += TE.MTM.SchedModel
-      .computeOperandLatency(Dep.DefMI, Dep.DefOp, PHI, Dep.UseOp, false);
+      .computeOperandLatency(Dep.DefMI, Dep.DefOp, PHI, Dep.UseOp);
   return DepCycle;
 }
 
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index a7f8112..e74bfc8 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -25,6 +25,7 @@
 
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SetOperations.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
@@ -227,6 +228,8 @@ namespace {
     void verifyLiveIntervalValue(const LiveInterval&, VNInfo*);
     void verifyLiveIntervalSegment(const LiveInterval&,
                                    LiveInterval::const_iterator);
+
+    void verifyStackFrame();
   };
 
   struct MachineVerifierPass : public MachineFunctionPass {
@@ -268,8 +271,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
   raw_ostream *OutFile = 0;
   if (OutFileName) {
     std::string ErrorInfo;
-    OutFile = new raw_fd_ostream(OutFileName, ErrorInfo,
-                                 raw_fd_ostream::F_Append);
+    OutFile = new raw_fd_ostream(OutFileName, ErrorInfo, sys::fs::F_Append);
     if (!ErrorInfo.empty()) {
       errs() << "Error opening '" << OutFileName << "': " << ErrorInfo << '\n';
       exit(1);
@@ -475,6 +477,8 @@ void MachineVerifier::visitMachineFunctionBefore() {
 
   // Check that the register use lists are sane.
   MRI->verifyUseLists();
+
+  verifyStackFrame();
 }
 
 // Does iterator point to a and b as the first two elements?
@@ -1606,3 +1610,130 @@ void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) {
     }
   }
 }
+
+namespace {
+  // FrameSetup and FrameDestroy can have zero adjustment, so using a single
+  // integer, we can't tell whether it is a FrameSetup or FrameDestroy if the
+  // value is zero.
+  // We use a bool plus an integer to capture the stack state.
+  struct StackStateOfBB {
+    StackStateOfBB() : EntryValue(0), ExitValue(0), EntryIsSetup(false),
+      ExitIsSetup(false) { }
+    StackStateOfBB(int EntryVal, int ExitVal, bool EntrySetup, bool ExitSetup) :
+      EntryValue(EntryVal), ExitValue(ExitVal), EntryIsSetup(EntrySetup),
+      ExitIsSetup(ExitSetup) { }
+    // Can be negative, which means we are setting up a frame.
+    int EntryValue;
+    int ExitValue;
+    bool EntryIsSetup;
+    bool ExitIsSetup;
+  };
+}
+
+/// Make sure on every path through the CFG, a FrameSetup <n> is always followed
+/// by a FrameDestroy <n>, stack adjustments are identical on all
+/// CFG edges to a merge point, and frame is destroyed at end of a return block.
+void MachineVerifier::verifyStackFrame() {
+  int FrameSetupOpcode   = TII->getCallFrameSetupOpcode();
+  int FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();
+
+  SmallVector<StackStateOfBB, 8> SPState;
+  SPState.resize(MF->getNumBlockIDs());
+  SmallPtrSet<const MachineBasicBlock*, 8> Reachable;
+
+  // Visit the MBBs in DFS order.
+  for (df_ext_iterator<const MachineFunction*,
+                       SmallPtrSet<const MachineBasicBlock*, 8> >
+       DFI = df_ext_begin(MF, Reachable), DFE = df_ext_end(MF, Reachable);
+       DFI != DFE; ++DFI) {
+    const MachineBasicBlock *MBB = *DFI;
+
+    StackStateOfBB BBState;
+    // Check the exit state of the DFS stack predecessor.
+    if (DFI.getPathLength() >= 2) {
+      const MachineBasicBlock *StackPred = DFI.getPath(DFI.getPathLength() - 2);
+      assert(Reachable.count(StackPred) &&
+             "DFS stack predecessor is already visited.\n");
+      BBState.EntryValue = SPState[StackPred->getNumber()].ExitValue;
+      BBState.EntryIsSetup = SPState[StackPred->getNumber()].ExitIsSetup;
+      BBState.ExitValue = BBState.EntryValue;
+      BBState.ExitIsSetup = BBState.EntryIsSetup;
+    }
+
+    // Update stack state by checking contents of MBB.
+    for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+         I != E; ++I) {
+      if (I->getOpcode() == FrameSetupOpcode) {
+        // The first operand of a FrameOpcode should be i32.
+        int Size = I->getOperand(0).getImm();
+        assert(Size >= 0 &&
+          "Value should be non-negative in FrameSetup and FrameDestroy.\n");
+
+        if (BBState.ExitIsSetup)
+          report("FrameSetup is after another FrameSetup", I); 
+        BBState.ExitValue -= Size;
+        BBState.ExitIsSetup = true;
+      }
+
+      if (I->getOpcode() == FrameDestroyOpcode) {
+        // The first operand of a FrameOpcode should be i32.
+        int Size = I->getOperand(0).getImm();
+        assert(Size >= 0 &&
+          "Value should be non-negative in FrameSetup and FrameDestroy.\n");
+
+        if (!BBState.ExitIsSetup)
+          report("FrameDestroy is not after a FrameSetup", I);
+        int AbsSPAdj = BBState.ExitValue < 0 ? -BBState.ExitValue :
+                                               BBState.ExitValue;
+        if (BBState.ExitIsSetup && AbsSPAdj != Size) {
+          report("FrameDestroy <n> is after FrameSetup <m>", I);
+          *OS << "FrameDestroy <" << Size << "> is after FrameSetup <"
+              << AbsSPAdj << ">.\n";
+        }
+        BBState.ExitValue += Size;
+        BBState.ExitIsSetup = false;
+      }
+    }
+    SPState[MBB->getNumber()] = BBState;
+
+    // Make sure the exit state of any predecessor is consistent with the entry
+    // state.
+    for (MachineBasicBlock::const_pred_iterator I = MBB->pred_begin(),
+         E = MBB->pred_end(); I != E; ++I) {
+      if (Reachable.count(*I) &&
+          (SPState[(*I)->getNumber()].ExitValue != BBState.EntryValue ||
+           SPState[(*I)->getNumber()].ExitIsSetup != BBState.EntryIsSetup)) {
+        report("The exit stack state of a predecessor is inconsistent.", MBB);
+        *OS << "Predecessor BB#" << (*I)->getNumber() << " has exit state ("
+            << SPState[(*I)->getNumber()].ExitValue << ", "
+            << SPState[(*I)->getNumber()].ExitIsSetup
+            << "), while BB#" << MBB->getNumber() << " has entry state ("
+            << BBState.EntryValue << ", " << BBState.EntryIsSetup << ").\n";
+      }
+    }
+
+    // Make sure the entry state of any successor is consistent with the exit
+    // state.
+    for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
+         E = MBB->succ_end(); I != E; ++I) {
+      if (Reachable.count(*I) &&
+          (SPState[(*I)->getNumber()].EntryValue != BBState.ExitValue ||
+           SPState[(*I)->getNumber()].EntryIsSetup != BBState.ExitIsSetup)) {
+        report("The entry stack state of a successor is inconsistent.", MBB);
+        *OS << "Successor BB#" << (*I)->getNumber() << " has entry state ("
+            << SPState[(*I)->getNumber()].EntryValue << ", "
+            << SPState[(*I)->getNumber()].EntryIsSetup
+            << "), while BB#" << MBB->getNumber() << " has exit state ("
+            << BBState.ExitValue << ", " << BBState.ExitIsSetup << ").\n";
+      }
+    }
+
+    // Make sure a basic block with return ends with zero stack adjustment.
+    if (!MBB->empty() && MBB->back().isReturn()) {
+      if (BBState.ExitIsSetup)
+        report("A return block ends with a FrameSetup.", MBB);
+      if (BBState.ExitValue)
+        report("A return block ends with a nonzero stack adjustment.", MBB);
+    }
+  }
+}
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index 5584708..bf23eca 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -66,7 +66,7 @@ namespace {
     ///
     bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB);
     void LowerPHINode(MachineBasicBlock &MBB,
-                      MachineBasicBlock::iterator AfterPHIsIt);
+                      MachineBasicBlock::iterator LastPHIIt);
 
     /// analyzePHINodes - Gather information about the PHI nodes in
     /// here. In particular, we want to map the number of uses of a virtual
@@ -185,10 +185,11 @@ bool PHIElimination::EliminatePHINodes(MachineFunction &MF,
 
   // Get an iterator to the first instruction after the last PHI node (this may
   // also be the end of the basic block).
-  MachineBasicBlock::iterator AfterPHIsIt = MBB.SkipPHIsAndLabels(MBB.begin());
+  MachineBasicBlock::iterator LastPHIIt =
+    prior(MBB.SkipPHIsAndLabels(MBB.begin()));
 
   while (MBB.front().isPHI())
-    LowerPHINode(MBB, AfterPHIsIt);
+    LowerPHINode(MBB, LastPHIIt);
 
   return true;
 }
@@ -218,8 +219,11 @@ static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi,
 /// LowerPHINode - Lower the PHI node at the top of the specified block,
 ///
 void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
-                                  MachineBasicBlock::iterator AfterPHIsIt) {
+                                  MachineBasicBlock::iterator LastPHIIt) {
   ++NumLowered;
+
+  MachineBasicBlock::iterator AfterPHIsIt = llvm::next(LastPHIIt);
+
   // Unlink the PHI node from the basic block, but don't delete the PHI yet.
   MachineInstr *MPhi = MBB.remove(MBB.begin());
 
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index 1a6b62b..c0861c5 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -300,6 +300,8 @@ void TargetPassConfig::addPass(Pass *P) {
 
   if (Started && !Stopped)
     PM->add(P);
+  else
+    delete P;
   if (StopAfter == PassID)
     Stopped = true;
   if (StartAfter == PassID)
@@ -331,7 +333,7 @@ AnalysisID TargetPassConfig::addPass(AnalysisID PassID) {
   addPass(P); // Ends the lifetime of P.
 
   // Add the passes after the pass P if there is any.
-  for (SmallVector<std::pair<AnalysisID, IdentifyingPassPtr>, 4>::iterator
+  for (SmallVectorImpl<std::pair<AnalysisID, IdentifyingPassPtr> >::iterator
          I = Impl->InsertedPasses.begin(), E = Impl->InsertedPasses.end();
        I != E; ++I) {
     if ((*I).first == PassID) {
@@ -396,15 +398,15 @@ void TargetPassConfig::addPassesToHandleExceptions() {
     // removed from the parent invoke(s). This could happen when a landing
     // pad is shared by multiple invokes and is also a target of a normal
     // edge from elsewhere.
-    addPass(createSjLjEHPreparePass(TM->getTargetLowering()));
+    addPass(createSjLjEHPreparePass(TM));
     // FALLTHROUGH
   case ExceptionHandling::DwarfCFI:
   case ExceptionHandling::ARM:
   case ExceptionHandling::Win64:
-    addPass(createDwarfEHPass(TM->getTargetLowering()));
+    addPass(createDwarfEHPass(TM));
     break;
   case ExceptionHandling::None:
-    addPass(createLowerInvokePass(TM->getTargetLowering()));
+    addPass(createLowerInvokePass(TM));
 
     // The lower invoke pass may create unreachable code. Remove it.
     addPass(createUnreachableBlockEliminationPass());
@@ -416,13 +418,13 @@ void TargetPassConfig::addPassesToHandleExceptions() {
 /// before exception handling preparation passes.
 void TargetPassConfig::addCodeGenPrepare() {
   if (getOptLevel() != CodeGenOpt::None && !DisableCGP)
-    addPass(createCodeGenPreparePass(getTargetLowering()));
+    addPass(createCodeGenPreparePass(TM));
 }
 
 /// Add common passes that perform LLVM IR to IR transforms in preparation for
 /// instruction selection.
 void TargetPassConfig::addISelPrepare() {
-  addPass(createStackProtectorPass(getTargetLowering()));
+  addPass(createStackProtectorPass(TM));
 
   addPreISel();
 
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 4b301d8..1965188 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -29,6 +29,7 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/IR/InlineAsm.h"
@@ -227,7 +228,8 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &F) {
   std::vector<CalleeSavedInfo> CSI;
   for (unsigned i = 0; CSRegs[i]; ++i) {
     unsigned Reg = CSRegs[i];
-    if (F.getRegInfo().isPhysRegUsed(Reg)) {
+    // Functions which call __builtin_unwind_init get all their registers saved.
+    if (F.getRegInfo().isPhysRegUsed(Reg) || F.getMMI().callsUnwindInit()) {
       // If the reg is modified, save it!
       CSI.push_back(CalleeSavedInfo(Reg));
     }
@@ -556,14 +558,18 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
   unsigned MaxAlign = MFI->getMaxAlignment();
 
   // Make sure the special register scavenging spill slot is closest to the
-  // frame pointer if a frame pointer is required.
+  // incoming stack pointer if a frame pointer is required and is closer
+  // to the incoming rather than the final stack pointer.
   const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
-  if (RS && TFI.hasFP(Fn) && RegInfo->useFPForScavengingIndex(Fn) &&
-      !RegInfo->needsStackRealignment(Fn)) {
+  bool EarlyScavengingSlots = (TFI.hasFP(Fn) &&
+                               TFI.isFPCloseToIncomingSP() &&
+                               RegInfo->useFPForScavengingIndex(Fn) &&
+                               !RegInfo->needsStackRealignment(Fn));
+  if (RS && EarlyScavengingSlots) {
     SmallVector<int, 2> SFIs;
     RS->getScavengingFrameIndices(SFIs);
-    for (SmallVector<int, 2>::iterator I = SFIs.begin(),
-         IE = SFIs.end(); I != IE; ++I)
+    for (SmallVectorImpl<int>::iterator I = SFIs.begin(),
+           IE = SFIs.end(); I != IE; ++I)
       AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign);
   }
 
@@ -643,12 +649,11 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
 
   // Make sure the special register scavenging spill slot is closest to the
   // stack pointer.
-  if (RS && (!TFI.hasFP(Fn) || RegInfo->needsStackRealignment(Fn) ||
-             !RegInfo->useFPForScavengingIndex(Fn))) {
+  if (RS && !EarlyScavengingSlots) {
     SmallVector<int, 2> SFIs;
     RS->getScavengingFrameIndices(SFIs);
-    for (SmallVector<int, 2>::iterator I = SFIs.begin(),
-         IE = SFIs.end(); I != IE; ++I)
+    for (SmallVectorImpl<int>::iterator I = SFIs.begin(),
+           IE = SFIs.end(); I != IE; ++I)
       AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign);
   }
 
@@ -723,6 +728,40 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
 void PEI::replaceFrameIndices(MachineFunction &Fn) {
   if (!Fn.getFrameInfo()->hasStackObjects()) return; // Nothing to do?
 
+  // Store SPAdj at exit of a basic block.
+  SmallVector<int, 8> SPState;
+  SPState.resize(Fn.getNumBlockIDs());
+  SmallPtrSet<MachineBasicBlock*, 8> Reachable;
+
+  // Iterate over the reachable blocks in DFS order.
+  for (df_ext_iterator<MachineFunction*, SmallPtrSet<MachineBasicBlock*, 8> >
+       DFI = df_ext_begin(&Fn, Reachable), DFE = df_ext_end(&Fn, Reachable);
+       DFI != DFE; ++DFI) {
+    int SPAdj = 0;
+    // Check the exit state of the DFS stack predecessor.
+    if (DFI.getPathLength() >= 2) {
+      MachineBasicBlock *StackPred = DFI.getPath(DFI.getPathLength() - 2);
+      assert(Reachable.count(StackPred) &&
+             "DFS stack predecessor is already visited.\n");
+      SPAdj = SPState[StackPred->getNumber()];
+    }
+    MachineBasicBlock *BB = *DFI;
+    replaceFrameIndices(BB, Fn, SPAdj);
+    SPState[BB->getNumber()] = SPAdj;
+  }
+
+  // Handle the unreachable blocks.
+  for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
+    if (Reachable.count(BB))
+      // Already handled in DFS traversal.
+      continue;
+    int SPAdj = 0;
+    replaceFrameIndices(BB, Fn, SPAdj);
+  }
+}
+
+void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
+                              int &SPAdj) {
   const TargetMachine &TM = Fn.getTarget();
   assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!");
   const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
@@ -733,89 +772,85 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
   int FrameSetupOpcode   = TII.getCallFrameSetupOpcode();
   int FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
 
-  for (MachineFunction::iterator BB = Fn.begin(),
-         E = Fn.end(); BB != E; ++BB) {
-#ifndef NDEBUG
-    int SPAdjCount = 0; // frame setup / destroy count.
-#endif
-    int SPAdj = 0;  // SP offset due to call frame setup / destroy.
-    if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB);
+  if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB);
 
-    for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
+  for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
 
-      if (I->getOpcode() == FrameSetupOpcode ||
-          I->getOpcode() == FrameDestroyOpcode) {
-#ifndef NDEBUG
-        // Track whether we see even pairs of them
-        SPAdjCount += I->getOpcode() == FrameSetupOpcode ? 1 : -1;
-#endif
-        // Remember how much SP has been adjusted to create the call
-        // frame.
-        int Size = I->getOperand(0).getImm();
-
-        if ((!StackGrowsDown && I->getOpcode() == FrameSetupOpcode) ||
-            (StackGrowsDown && I->getOpcode() == FrameDestroyOpcode))
-          Size = -Size;
-
-        SPAdj += Size;
-
-        MachineBasicBlock::iterator PrevI = BB->end();
-        if (I != BB->begin()) PrevI = prior(I);
-        TFI->eliminateCallFramePseudoInstr(Fn, *BB, I);
-
-        // Visit the instructions created by eliminateCallFramePseudoInstr().
-        if (PrevI == BB->end())
-          I = BB->begin();     // The replaced instr was the first in the block.
-        else
-          I = llvm::next(PrevI);
-        continue;
-      }
+    if (I->getOpcode() == FrameSetupOpcode ||
+        I->getOpcode() == FrameDestroyOpcode) {
+      // Remember how much SP has been adjusted to create the call
+      // frame.
+      int Size = I->getOperand(0).getImm();
 
-      MachineInstr *MI = I;
-      bool DoIncr = true;
-      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-        if (!MI->getOperand(i).isFI())
-            continue;
+      if ((!StackGrowsDown && I->getOpcode() == FrameSetupOpcode) ||
+          (StackGrowsDown && I->getOpcode() == FrameDestroyOpcode))
+        Size = -Size;
 
-        // Some instructions (e.g. inline asm instructions) can have
-        // multiple frame indices and/or cause eliminateFrameIndex
-        // to insert more than one instruction. We need the register
-        // scavenger to go through all of these instructions so that
-        // it can update its register information. We keep the
-        // iterator at the point before insertion so that we can
-        // revisit them in full.
-        bool AtBeginning = (I == BB->begin());
-        if (!AtBeginning) --I;
-
-        // If this instruction has a FrameIndex operand, we need to
-        // use that target machine register info object to eliminate
-        // it.
-        TRI.eliminateFrameIndex(MI, SPAdj, i,
-                                FrameIndexVirtualScavenging ?  NULL : RS);
-
-        // Reset the iterator if we were at the beginning of the BB.
-        if (AtBeginning) {
-          I = BB->begin();
-          DoIncr = false;
-        }
+      SPAdj += Size;
+
+      MachineBasicBlock::iterator PrevI = BB->end();
+      if (I != BB->begin()) PrevI = prior(I);
+      TFI->eliminateCallFramePseudoInstr(Fn, *BB, I);
 
-        MI = 0;
-        break;
+      // Visit the instructions created by eliminateCallFramePseudoInstr().
+      if (PrevI == BB->end())
+        I = BB->begin();     // The replaced instr was the first in the block.
+      else
+        I = llvm::next(PrevI);
+      continue;
+    }
+
+    MachineInstr *MI = I;
+    bool DoIncr = true;
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      if (!MI->getOperand(i).isFI())
+        continue;
+
+      // Frame indicies in debug values are encoded in a target independent
+      // way with simply the frame index and offset rather than any
+      // target-specific addressing mode.
+      if (MI->isDebugValue()) {
+        assert(i == 0 && "Frame indicies can only appear as the first "
+                         "operand of a DBG_VALUE machine instruction");
+        unsigned Reg;
+        MachineOperand &Offset = MI->getOperand(1);
+        Offset.setImm(Offset.getImm() +
+                      TFI->getFrameIndexReference(
+                          Fn, MI->getOperand(0).getIndex(), Reg));
+        MI->getOperand(0).ChangeToRegister(Reg, false /*isDef*/);
+        continue;
       }
 
-      if (DoIncr && I != BB->end()) ++I;
+      // Some instructions (e.g. inline asm instructions) can have
+      // multiple frame indices and/or cause eliminateFrameIndex
+      // to insert more than one instruction. We need the register
+      // scavenger to go through all of these instructions so that
+      // it can update its register information. We keep the
+      // iterator at the point before insertion so that we can
+      // revisit them in full.
+      bool AtBeginning = (I == BB->begin());
+      if (!AtBeginning) --I;
+
+      // If this instruction has a FrameIndex operand, we need to
+      // use that target machine register info object to eliminate
+      // it.
+      TRI.eliminateFrameIndex(MI, SPAdj, i,
+                              FrameIndexVirtualScavenging ?  NULL : RS);
+
+      // Reset the iterator if we were at the beginning of the BB.
+      if (AtBeginning) {
+        I = BB->begin();
+        DoIncr = false;
+      }
 
-      // Update register states.
-      if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI);
+      MI = 0;
+      break;
     }
 
-    // If we have evenly matched pairs of frame setup / destroy instructions,
-    // make sure the adjustments come out to zero. If we don't have matched
-    // pairs, we can't be sure the missing bit isn't in another basic block
-    // due to a custom inserter playing tricks, so just asserting SPAdj==0
-    // isn't sufficient. See tMOVCC on Thumb1, for example.
-    assert((SPAdjCount || SPAdj == 0) &&
-           "Unbalanced call frame setup / destroy pairs?");
+    if (DoIncr && I != BB->end()) ++I;
+
+    // Update register states.
+    if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI);
   }
 }
 
diff --git a/lib/CodeGen/PrologEpilogInserter.h b/lib/CodeGen/PrologEpilogInserter.h
index 87fff9a..50f4daf 100644
--- a/lib/CodeGen/PrologEpilogInserter.h
+++ b/lib/CodeGen/PrologEpilogInserter.h
@@ -1,4 +1,4 @@
-//===-- PrologEpilogInserter.h - Prolog/Epilog code insertion -*- C++ -* --===//
+//===-- PrologEpilogInserter.h - Prolog/Epilog code insertion -*- C++ -*---===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -112,13 +112,13 @@ namespace llvm {
     bool calcAvailInOut(MachineBasicBlock* MBB);
     void calculateAnticAvail(MachineFunction &Fn);
     bool addUsesForMEMERegion(MachineBasicBlock* MBB,
-                              SmallVector<MachineBasicBlock*, 4>& blks);
-    bool addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks);
+                              SmallVectorImpl<MachineBasicBlock *> &blks);
+    bool addUsesForTopLevelLoops(SmallVectorImpl<MachineBasicBlock *> &blks);
     bool calcSpillPlacements(MachineBasicBlock* MBB,
-                             SmallVector<MachineBasicBlock*, 4> &blks,
+                             SmallVectorImpl<MachineBasicBlock *> &blks,
                              CSRegBlockMap &prevSpills);
     bool calcRestorePlacements(MachineBasicBlock* MBB,
-                               SmallVector<MachineBasicBlock*, 4> &blks,
+                               SmallVectorImpl<MachineBasicBlock *> &blks,
                                CSRegBlockMap &prevRestores);
     void placeSpillsAndRestores(MachineFunction &Fn);
     void placeCSRSpillsAndRestores(MachineFunction &Fn);
@@ -127,6 +127,8 @@ namespace llvm {
     void insertCSRSpillsAndRestores(MachineFunction &Fn);
     void calculateFrameObjectOffsets(MachineFunction &Fn);
     void replaceFrameIndices(MachineFunction &Fn);
+    void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
+                             int &SPAdj);
     void scavengeFrameVirtualRegs(MachineFunction &Fn);
     void insertPrologEpilogCode(MachineFunction &Fn);
 
diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp
index c035590..df3e12a 100644
--- a/lib/CodeGen/RegAllocBase.cpp
+++ b/lib/CodeGen/RegAllocBase.cpp
@@ -43,7 +43,7 @@ static cl::opt<bool, true>
 VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled),
                cl::desc("Verify during register allocation"));
 
-const char *RegAllocBase::TimerGroupName = "Register Allocation";
+const char RegAllocBase::TimerGroupName[] = "Register Allocation";
 bool RegAllocBase::VerifyEnabled = false;
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h
index 064e40f..ccaabba 100644
--- a/lib/CodeGen/RegAllocBase.h
+++ b/lib/CodeGen/RegAllocBase.h
@@ -93,7 +93,7 @@ protected:
                                  SmallVectorImpl<LiveInterval*> &splitLVRs) = 0;
 
   // Use this group name for NamedRegionTimer.
-  static const char *TimerGroupName;
+  static const char TimerGroupName[];
 
 public:
   /// VerifyEnabled - True when -verify-regalloc is given.
diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp
index 7fcfe9e..d6a7d6f 100644
--- a/lib/CodeGen/RegAllocBasic.cpp
+++ b/lib/CodeGen/RegAllocBasic.cpp
@@ -24,6 +24,7 @@
 #include "llvm/CodeGen/LiveRangeEdit.h"
 #include "llvm/CodeGen/LiveRegMatrix.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
@@ -145,6 +146,8 @@ void RABasic::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<CalculateSpillWeights>();
   AU.addRequired<LiveStacks>();
   AU.addPreserved<LiveStacks>();
+  AU.addRequired<MachineBlockFrequencyInfo>();
+  AU.addPreserved<MachineBlockFrequencyInfo>();
   AU.addRequiredID(MachineDominatorsID);
   AU.addPreservedID(MachineDominatorsID);
   AU.addRequired<MachineLoopInfo>();
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index bb9c05c..6617e50 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -293,29 +293,26 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
     // If this register is used by DBG_VALUE then insert new DBG_VALUE to
     // identify spilled location as the place to find corresponding variable's
     // value.
-    SmallVector<MachineInstr *, 4> &LRIDbgValues =
+    SmallVectorImpl<MachineInstr *> &LRIDbgValues =
       LiveDbgValueMap[LRI->VirtReg];
     for (unsigned li = 0, le = LRIDbgValues.size(); li != le; ++li) {
       MachineInstr *DBG = LRIDbgValues[li];
-      const MDNode *MDPtr =
-        DBG->getOperand(DBG->getNumOperands()-1).getMetadata();
-      int64_t Offset = 0;
-      if (DBG->getOperand(1).isImm())
-        Offset = DBG->getOperand(1).getImm();
+      const MDNode *MDPtr = DBG->getOperand(2).getMetadata();
+      bool IsIndirect = DBG->getOperand(1).isImm(); // Register-indirect value?
+      uint64_t Offset = IsIndirect ? DBG->getOperand(1).getImm() : 0;
       DebugLoc DL;
       if (MI == MBB->end()) {
         // If MI is at basic block end then use last instruction's location.
         MachineBasicBlock::iterator EI = MI;
         DL = (--EI)->getDebugLoc();
-      }
-      else
+      } else
         DL = MI->getDebugLoc();
-      if (MachineInstr *NewDV =
-          TII->emitFrameIndexDebugValue(*MF, FI, Offset, MDPtr, DL)) {
-        MachineBasicBlock *MBB = DBG->getParent();
-        MBB->insert(MI, NewDV);
-        DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV);
-      }
+      MachineBasicBlock *MBB = DBG->getParent();
+      MachineInstr *NewDV =
+          BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::DBG_VALUE))
+              .addFrameIndex(FI).addImm(Offset).addMetadata(MDPtr);
+      (void)NewDV;
+      DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV);
     }
     // Now this register is spilled there is should not be any DBG_VALUE
     // pointing to this register because they are all pointing to spilled value
@@ -859,25 +856,21 @@ void RAFast::AllocateBasicBlock() {
             }
             else {
               // Modify DBG_VALUE now that the value is in a spill slot.
-              int64_t Offset = MI->getOperand(1).getImm();
+              bool IsIndirect = MI->getOperand(1).isImm();
+              uint64_t Offset = IsIndirect ? MI->getOperand(1).getImm() : 0;
               const MDNode *MDPtr =
                 MI->getOperand(MI->getNumOperands()-1).getMetadata();
               DebugLoc DL = MI->getDebugLoc();
-              if (MachineInstr *NewDV =
-                  TII->emitFrameIndexDebugValue(*MF, SS, Offset, MDPtr, DL)) {
-                DEBUG(dbgs() << "Modifying debug info due to spill:" <<
-                      "\t" << *MI);
-                MachineBasicBlock *MBB = MI->getParent();
-                MBB->insert(MBB->erase(MI), NewDV);
-                // Scan NewDV operands from the beginning.
-                MI = NewDV;
-                ScanDbgValue = true;
-                break;
-              } else {
-                // We can't allocate a physreg for a DebugValue; sorry!
-                DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE");
-                MO.setReg(0);
-              }
+              MachineBasicBlock *MBB = MI->getParent();
+              MachineInstr *NewDV = BuildMI(*MBB, MBB->erase(MI), DL,
+                                            TII->get(TargetOpcode::DBG_VALUE))
+                  .addFrameIndex(SS).addImm(Offset).addMetadata(MDPtr);
+              DEBUG(dbgs() << "Modifying debug info due to spill:"
+                           << "\t" << *NewDV);
+              // Scan NewDV operands from the beginning.
+              MI = NewDV;
+              ScanDbgValue = true;
+              break;
             }
           }
           LiveDbgValueMap[Reg].push_back(MI);
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index 4974828..f9e363b 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -29,6 +29,7 @@
 #include "llvm/CodeGen/LiveRangeEdit.h"
 #include "llvm/CodeGen/LiveRegMatrix.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
@@ -71,6 +72,7 @@ class RAGreedy : public MachineFunctionPass,
 
   // analyses
   SlotIndexes *Indexes;
+  MachineBlockFrequencyInfo *MBFI;
   MachineDominatorTree *DomTree;
   MachineLoopInfo *Loops;
   EdgeBundles *Bundles;
@@ -158,6 +160,8 @@ class RAGreedy : public MachineFunctionPass,
 
     EvictionCost(unsigned B = 0) : BrokenHints(B), MaxWeight(0) {}
 
+    bool isMax() const { return BrokenHints == ~0u; }
+
     bool operator<(const EvictionCost &O) const {
       if (BrokenHints != O.BrokenHints)
         return BrokenHints < O.BrokenHints;
@@ -249,14 +253,15 @@ private:
   void LRE_WillShrinkVirtReg(unsigned);
   void LRE_DidCloneVirtReg(unsigned, unsigned);
 
-  float calcSpillCost();
-  bool addSplitConstraints(InterferenceCache::Cursor, float&);
+  BlockFrequency calcSpillCost();
+  bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency&);
   void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>);
   void growRegion(GlobalSplitCandidate &Cand);
-  float calcGlobalSplitCost(GlobalSplitCandidate&);
+  BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate&);
   bool calcCompactRegion(GlobalSplitCandidate&);
   void splitAroundRegion(LiveRangeEdit&, ArrayRef<unsigned>);
   void calcGapWeights(unsigned, SmallVectorImpl<float>&);
+  unsigned canReassign(LiveInterval &VirtReg, unsigned PhysReg);
   bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool);
   bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&);
   void evictInterference(LiveInterval&, unsigned,
@@ -320,6 +325,8 @@ RAGreedy::RAGreedy(): MachineFunctionPass(ID) {
 
 void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesCFG();
+  AU.addRequired<MachineBlockFrequencyInfo>();
+  AU.addPreserved<MachineBlockFrequencyInfo>();
   AU.addRequired<AliasAnalysis>();
   AU.addPreserved<AliasAnalysis>();
   AU.addRequired<LiveIntervals>();
@@ -407,15 +414,28 @@ void RAGreedy::enqueue(LiveInterval *LI) {
     // everything else has been allocated.
     Prio = Size;
   } else {
-    // Everything is allocated in long->short order. Long ranges that don't fit
-    // should be spilled (or split) ASAP so they don't create interference.
-    Prio = (1u << 31) + Size;
+    if (ExtraRegInfo[Reg].Stage == RS_Assign && !LI->empty() &&
+        LIS->intervalIsInOneMBB(*LI)) {
+      // Allocate original local ranges in linear instruction order. Since they
+      // are singly defined, this produces optimal coloring in the absence of
+      // global interference and other constraints.
+      Prio = LI->beginIndex().getInstrDistance(Indexes->getLastIndex());
+    }
+    else {
+      // Allocate global and split ranges in long->short order. Long ranges that
+      // don't fit should be spilled (or split) ASAP so they don't create
+      // interference.  Mark a bit to prioritize global above local ranges.
+      Prio = (1u << 29) + Size;
+    }
+    // Mark a higher bit to prioritize global and local above RS_Split.
+    Prio |= (1u << 31);
 
     // Boost ranges that have a physical register hint.
     if (VRM->hasKnownPreference(Reg))
       Prio |= (1u << 30);
   }
-
+  // The virtual register number is a tie breaker for same-sized ranges.
+  // Give lower vreg numbers higher priority to assign them first.
   Queue.push(std::make_pair(Prio, ~Reg));
 }
 
@@ -476,6 +496,31 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
 //                         Interference eviction
 //===----------------------------------------------------------------------===//
 
+unsigned RAGreedy::canReassign(LiveInterval &VirtReg, unsigned PrevReg) {
+  AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo);
+  unsigned PhysReg;
+  while ((PhysReg = Order.next())) {
+    if (PhysReg == PrevReg)
+      continue;
+
+    MCRegUnitIterator Units(PhysReg, TRI);
+    for (; Units.isValid(); ++Units) {
+      // Instantiate a "subquery", not to be confused with the Queries array.
+      LiveIntervalUnion::Query subQ(&VirtReg, &Matrix->getLiveUnions()[*Units]);
+      if (subQ.checkInterference())
+        break;
+    }
+    // If no units have interference, break out with the current PhysReg.
+    if (!Units.isValid())
+      break;
+  }
+  if (PhysReg)
+    DEBUG(dbgs() << "can reassign: " << VirtReg << " from "
+          << PrintReg(PrevReg, TRI) << " to " << PrintReg(PhysReg, TRI)
+          << '\n');
+  return PhysReg;
+}
+
 /// shouldEvict - determine if A should evict the assigned live range B. The
 /// eviction policy defined by this function together with the allocation order
 /// defined by enqueue() decides which registers ultimately end up being split
@@ -516,6 +561,8 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
   if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg)
     return false;
 
+  bool IsLocal = LIS->intervalIsInOneMBB(VirtReg);
+
   // Find VirtReg's cascade number. This will be unassigned if VirtReg was never
   // involved in an eviction before. If a cascade number was assigned, deny
   // evicting anything with the same or a newer cascade number. This prevents
@@ -569,8 +616,17 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
       // Abort if this would be too expensive.
       if (!(Cost < MaxCost))
         return false;
+      if (Urgent)
+        continue;
+      // If !MaxCost.isMax(), then we're just looking for a cheap register.
+      // Evicting another local live range in this case could lead to suboptimal
+      // coloring.
+      if (!MaxCost.isMax() && IsLocal && LIS->intervalIsInOneMBB(*Intf) &&
+          !canReassign(*Intf, PhysReg)) {
+        return false;
+      }
       // Finally, apply the eviction policy for non-urgent evictions.
-      if (!Urgent && !shouldEvict(VirtReg, IsHint, *Intf, BreaksHint))
+      if (!shouldEvict(VirtReg, IsHint, *Intf, BreaksHint))
         return false;
     }
   }
@@ -699,12 +755,12 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
 /// that all preferences in SplitConstraints are met.
 /// Return false if there are no bundles with positive bias.
 bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
-                                   float &Cost) {
+                                   BlockFrequency &Cost) {
   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
 
   // Reset interference dependent info.
   SplitConstraints.resize(UseBlocks.size());
-  float StaticCost = 0;
+  BlockFrequency StaticCost = 0;
   for (unsigned i = 0; i != UseBlocks.size(); ++i) {
     const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
     SpillPlacement::BlockConstraint &BC = SplitConstraints[i];
@@ -742,8 +798,8 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
     }
 
     // Accumulate the total frequency of inserted spill code.
-    if (Ins)
-      StaticCost += Ins * SpillPlacer->getBlockFrequency(BC.Number);
+    while (Ins--)
+      StaticCost += SpillPlacer->getBlockFrequency(BC.Number);
   }
   Cost = StaticCost;
 
@@ -876,7 +932,7 @@ bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {
   SpillPlacer->prepare(Cand.LiveBundles);
 
   // The static split cost will be zero since Cand.Intf reports no interference.
-  float Cost;
+  BlockFrequency Cost;
   if (!addSplitConstraints(Cand.Intf, Cost)) {
     DEBUG(dbgs() << ", none.\n");
     return false;
@@ -901,8 +957,8 @@ bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {
 
 /// calcSpillCost - Compute how expensive it would be to split the live range in
 /// SA around all use blocks instead of forming bundle regions.
-float RAGreedy::calcSpillCost() {
-  float Cost = 0;
+BlockFrequency RAGreedy::calcSpillCost() {
+  BlockFrequency Cost = 0;
   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
   for (unsigned i = 0; i != UseBlocks.size(); ++i) {
     const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
@@ -921,8 +977,8 @@ float RAGreedy::calcSpillCost() {
 /// pattern in LiveBundles. This cost should be added to the local cost of the
 /// interference pattern in SplitConstraints.
 ///
-float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) {
-  float GlobalCost = 0;
+BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) {
+  BlockFrequency GlobalCost = 0;
   const BitVector &LiveBundles = Cand.LiveBundles;
   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
   for (unsigned i = 0; i != UseBlocks.size(); ++i) {
@@ -936,8 +992,8 @@ float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) {
       Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg);
     if (BI.LiveOut)
       Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg);
-    if (Ins)
-      GlobalCost += Ins * SpillPlacer->getBlockFrequency(BC.Number);
+    while (Ins--)
+      GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
   }
 
   for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) {
@@ -949,8 +1005,10 @@ float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) {
     if (RegIn && RegOut) {
       // We need double spill code if this block has interference.
       Cand.Intf.moveToBlock(Number);
-      if (Cand.Intf.hasInterference())
-        GlobalCost += 2*SpillPlacer->getBlockFrequency(Number);
+      if (Cand.Intf.hasInterference()) {
+        GlobalCost += SpillPlacer->getBlockFrequency(Number);
+        GlobalCost += SpillPlacer->getBlockFrequency(Number);
+      }
       continue;
     }
     // live-in / stack-out or stack-in live-out.
@@ -1115,7 +1173,7 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
                                   SmallVectorImpl<LiveInterval*> &NewVRegs) {
   unsigned NumCands = 0;
   unsigned BestCand = NoCand;
-  float BestCost;
+  BlockFrequency BestCost;
   SmallVector<unsigned, 8> UsedCands;
 
   // Check if we can split this live range around a compact region.
@@ -1123,11 +1181,11 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
   if (HasCompact) {
     // Yes, keep GlobalCand[0] as the compact region candidate.
     NumCands = 1;
-    BestCost = HUGE_VALF;
+    BestCost = BlockFrequency::getMaxFrequency();
   } else {
     // No benefit from the compact region, our fallback will be per-block
     // splitting. Make sure we find a solution that is cheaper than spilling.
-    BestCost = Hysteresis * calcSpillCost();
+    BestCost = calcSpillCost();
     DEBUG(dbgs() << "Cost of isolating all blocks = " << BestCost << '\n');
   }
 
@@ -1157,7 +1215,7 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
     Cand.reset(IntfCache, PhysReg);
 
     SpillPlacer->prepare(Cand.LiveBundles);
-    float Cost;
+    BlockFrequency Cost;
     if (!addSplitConstraints(Cand.Intf, Cost)) {
       DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n");
       continue;
@@ -1193,7 +1251,7 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
     });
     if (Cost < BestCost) {
       BestCand = NumCands;
-      BestCost = Hysteresis * Cost; // Prevent rounding effects.
+      BestCost = Cost;
     }
     ++NumCands;
   }
@@ -1511,7 +1569,9 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
   unsigned BestAfter = 0;
   float BestDiff = 0;
 
-  const float blockFreq = SpillPlacer->getBlockFrequency(BI.MBB->getNumber());
+  const float blockFreq =
+    SpillPlacer->getBlockFrequency(BI.MBB->getNumber()).getFrequency() *
+    (1.0f / BlockFrequency::getEntryFrequency());
   SmallVector<float, 8> GapWeight;
 
   Order.rewind();
@@ -1770,6 +1830,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
                      getAnalysis<LiveIntervals>(),
                      getAnalysis<LiveRegMatrix>());
   Indexes = &getAnalysis<SlotIndexes>();
+  MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
   DomTree = &getAnalysis<MachineDominatorTree>();
   SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
   Loops = &getAnalysis<MachineLoopInfo>();
@@ -1777,8 +1838,10 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
   SpillPlacer = &getAnalysis<SpillPlacement>();
   DebugVars = &getAnalysis<LiveDebugVariables>();
 
+  DEBUG(LIS->dump());
+
   SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
-  SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree));
+  SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree, *MBFI));
   ExtraRegInfo.clear();
   ExtraRegInfo.resize(MRI->getNumVirtRegs());
   NextCascade = 1;
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 15a88e2..81ecca1 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -40,6 +40,7 @@
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveRangeEdit.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
@@ -96,7 +97,6 @@ public:
     initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
     initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
     initializeLiveStacksPass(*PassRegistry::getPassRegistry());
-    initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
     initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
   }
 
@@ -130,8 +130,8 @@ private:
   const TargetMachine *tm;
   const TargetRegisterInfo *tri;
   const TargetInstrInfo *tii;
-  const MachineLoopInfo *loopInfo;
   MachineRegisterInfo *mri;
+  const MachineBlockFrequencyInfo *mbfi;
 
   OwningPtr<Spiller> spiller;
   LiveIntervals *lis;
@@ -188,7 +188,7 @@ unsigned PBQPRAProblem::getPRegForOption(unsigned vreg, unsigned option) const {
 }
 
 PBQPRAProblem *PBQPBuilder::build(MachineFunction *mf, const LiveIntervals *lis,
-                                  const MachineLoopInfo *loopInfo,
+                                  const MachineBlockFrequencyInfo *mbfi,
                                   const RegSet &vregs) {
 
   LiveIntervals *LIS = const_cast<LiveIntervals*>(lis);
@@ -313,10 +313,10 @@ void PBQPBuilder::addInterferenceCosts(
 
 PBQPRAProblem *PBQPBuilderWithCoalescing::build(MachineFunction *mf,
                                                 const LiveIntervals *lis,
-                                                const MachineLoopInfo *loopInfo,
+                                                const MachineBlockFrequencyInfo *mbfi,
                                                 const RegSet &vregs) {
 
-  OwningPtr<PBQPRAProblem> p(PBQPBuilder::build(mf, lis, loopInfo, vregs));
+  OwningPtr<PBQPRAProblem> p(PBQPBuilder::build(mf, lis, mbfi, vregs));
   PBQP::Graph &g = p->getGraph();
 
   const TargetMachine &tm = mf->getTarget();
@@ -350,7 +350,7 @@ PBQPRAProblem *PBQPBuilderWithCoalescing::build(MachineFunction *mf,
 
       PBQP::PBQPNum cBenefit =
         copyFactor * LiveIntervals::getSpillWeight(false, true,
-                                                   loopInfo->getLoopDepth(mbb));
+                                                   mbfi->getBlockFreq(mbb));
 
       if (cp.isPhys()) {
         if (!mf->getRegInfo().isAllocatable(dst)) {
@@ -435,10 +435,12 @@ void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
   au.addRequired<CalculateSpillWeights>();
   au.addRequired<LiveStacks>();
   au.addPreserved<LiveStacks>();
-  au.addRequired<MachineDominatorTree>();
-  au.addPreserved<MachineDominatorTree>();
+  au.addRequired<MachineBlockFrequencyInfo>();
+  au.addPreserved<MachineBlockFrequencyInfo>();
   au.addRequired<MachineLoopInfo>();
   au.addPreserved<MachineLoopInfo>();
+  au.addRequired<MachineDominatorTree>();
+  au.addPreserved<MachineDominatorTree>();
   au.addRequired<VirtRegMap>();
   au.addPreserved<VirtRegMap>();
   MachineFunctionPass::getAnalysisUsage(au);
@@ -546,7 +548,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
 
   lis = &getAnalysis<LiveIntervals>();
   lss = &getAnalysis<LiveStacks>();
-  loopInfo = &getAnalysis<MachineLoopInfo>();
+  mbfi = &getAnalysis<MachineBlockFrequencyInfo>();
 
   vrm = &getAnalysis<VirtRegMap>();
   spiller.reset(createInlineSpiller(*this, MF, *vrm));
@@ -584,7 +586,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
       DEBUG(dbgs() << "  PBQP Regalloc round " << round << ":\n");
 
       OwningPtr<PBQPRAProblem> problem(
-        builder->build(mf, lis, loopInfo, vregsToAlloc));
+        builder->build(mf, lis, mbfi, vregsToAlloc));
 
 #ifndef NDEBUG
       if (pbqpDumpGraphs) {
diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp
index 87382d8..cacd7de 100644
--- a/lib/CodeGen/RegisterClassInfo.cpp
+++ b/lib/CodeGen/RegisterClassInfo.cpp
@@ -40,6 +40,9 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
   if (MF->getTarget().getRegisterInfo() != TRI) {
     TRI = MF->getTarget().getRegisterInfo();
     RegClass.reset(new RCInfo[TRI->getNumRegClasses()]);
+    unsigned NumPSets = TRI->getNumRegPressureSets();
+    PSetLimits.reset(new unsigned[NumPSets]);
+    std::fill(&PSetLimits[0], &PSetLimits[NumPSets], 0);
     Update = true;
   }
 
@@ -144,3 +147,32 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
   RCI.Tag = Tag;
 }
 
+/// This is not accurate because two overlapping register sets may have some
+/// nonoverlapping reserved registers. However, computing the allocation order
+/// for all register classes would be too expensive.
+unsigned RegisterClassInfo::computePSetLimit(unsigned Idx) const {
+  const TargetRegisterClass *RC = 0;
+  unsigned NumRCUnits = 0;
+  for (TargetRegisterInfo::regclass_iterator
+         RI = TRI->regclass_begin(), RE = TRI->regclass_end(); RI != RE; ++RI) {
+    const int *PSetID = TRI->getRegClassPressureSets(*RI);
+    for (; *PSetID != -1; ++PSetID) {
+      if ((unsigned)*PSetID == Idx)
+        break;
+    }
+    if (*PSetID == -1)
+      continue;
+
+    // Found a register class that counts against this pressure set.
+    // For efficiency, only compute the set order for the largest set.
+    unsigned NUnits = TRI->getRegClassWeight(*RI).WeightLimit;
+    if (!RC || NUnits > NumRCUnits) {
+      RC = *RI;
+      NumRCUnits = NUnits;
+    }
+  }
+  compute(RC);
+  unsigned NReserved = RC->getNumRegs() - getNumAllocatableRegs(RC);
+  return TRI->getRegPressureSetLimit(Idx)
+    - TRI->getRegClassWeight(RC).RegWeight * NReserved;
+}
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 82043c2..f99f1a3 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -166,7 +166,8 @@ namespace {
 
     /// reMaterializeTrivialDef - If the source of a copy is defined by a
     /// trivial computation, replace the copy by rematerialize the definition.
-    bool reMaterializeTrivialDef(CoalescerPair &CP, MachineInstr *CopyMI);
+    bool reMaterializeTrivialDef(CoalescerPair &CP, MachineInstr *CopyMI,
+                                 bool &IsDefCopy);
 
     /// canJoinPhys - Return true if a physreg copy should be joined.
     bool canJoinPhys(const CoalescerPair &CP);
@@ -731,7 +732,9 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
 /// reMaterializeTrivialDef - If the source of a copy is defined by a trivial
 /// computation, replace the copy by rematerialize the definition.
 bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP,
-                                                MachineInstr *CopyMI) {
+                                                MachineInstr *CopyMI,
+                                                bool &IsDefCopy) {
+  IsDefCopy = false;
   unsigned SrcReg = CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg();
   unsigned SrcIdx = CP.isFlipped() ? CP.getDstIdx() : CP.getSrcIdx();
   unsigned DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg();
@@ -740,16 +743,18 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP,
     return false;
 
   LiveInterval &SrcInt = LIS->getInterval(SrcReg);
-  SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true);
-  LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx);
-  assert(SrcLR != SrcInt.end() && "Live range not found!");
-  VNInfo *ValNo = SrcLR->valno;
+  SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI);
+  VNInfo *ValNo = LiveRangeQuery(SrcInt, CopyIdx).valueIn();
+  assert(ValNo && "CopyMI input register not live");
   if (ValNo->isPHIDef() || ValNo->isUnused())
     return false;
   MachineInstr *DefMI = LIS->getInstructionFromIndex(ValNo->def);
   if (!DefMI)
     return false;
-  assert(DefMI && "Defining instruction disappeared");
+  if (DefMI->isCopyLike()) {
+    IsDefCopy = true;
+    return false;
+  }
   if (!DefMI->isAsCheapAsAMove())
     return false;
   if (!TII->isTriviallyReMaterializable(DefMI, AA))
@@ -843,6 +848,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP,
     // been asked for. If so it must implicitly define the whole thing.
     assert(TargetRegisterInfo::isPhysicalRegister(DstReg) &&
            "Only expect virtual or physical registers in remat");
+    NewMI->getOperand(0).setIsDead(true);
     NewMI->addOperand(MachineOperand::CreateReg(CopyDstReg,
                                                 true  /*IsDef*/,
                                                 true  /*IsImp*/,
@@ -1063,8 +1069,11 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
     if (!canJoinPhys(CP)) {
       // Before giving up coalescing, if definition of source is defined by
       // trivial computation, try rematerializing it.
-      if (reMaterializeTrivialDef(CP, CopyMI))
+      bool IsDefCopy;
+      if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy))
         return true;
+      if (IsDefCopy)
+        Again = true;  // May be possible to coalesce later.
       return false;
     }
   } else {
@@ -1096,7 +1105,8 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
 
     // If definition of source is defined by trivial computation, try
     // rematerializing it.
-    if (reMaterializeTrivialDef(CP, CopyMI))
+    bool IsDefCopy;
+    if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy))
       return true;
 
     // If we can eliminate the copy without merging the live ranges, do so now.
@@ -2060,6 +2070,9 @@ static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) {
   if (!Copy->isCopy())
     return false;
 
+  if (Copy->getOperand(1).isUndef())
+    return false;
+
   unsigned SrcReg = Copy->getOperand(1).getReg();
   unsigned DstReg = Copy->getOperand(0).getReg();
   if (TargetRegisterInfo::isPhysicalRegister(SrcReg)
@@ -2105,8 +2118,8 @@ RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) {
     // are not inherently easier to resolve, but slightly preferable until we
     // have local live range splitting. In particular this is required by
     // cmp+jmp macro fusion.
-    for (MachineBasicBlock::reverse_iterator
-           MII = MBB->rbegin(), E = MBB->rend(); MII != E; ++MII) {
+    for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
+         MII != E; ++MII) {
       if (!MII->isCopyLike())
         continue;
       if (isLocalCopy(&(*MII), LIS))
diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp
index 97f22e1..b7ab138 100644
--- a/lib/CodeGen/RegisterPressure.cpp
+++ b/lib/CodeGen/RegisterPressure.cpp
@@ -76,17 +76,22 @@ void RegisterPressure::decrease(unsigned Reg, const TargetRegisterInfo *TRI,
 }
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-static void dumpSetPressure(const std::vector<unsigned> &SetPressure,
-                            const TargetRegisterInfo *TRI) {
+void llvm::dumpRegSetPressure(ArrayRef<unsigned> SetPressure,
+                              const TargetRegisterInfo *TRI) {
+  bool Empty = true;
   for (unsigned i = 0, e = SetPressure.size(); i < e; ++i) {
-    if (SetPressure[i] != 0)
+    if (SetPressure[i] != 0) {
       dbgs() << TRI->getRegPressureSetName(i) << "=" << SetPressure[i] << '\n';
+      Empty = false;
+    }
   }
+  if (Empty)
+    dbgs() << "\n";
 }
 
 void RegisterPressure::dump(const TargetRegisterInfo *TRI) const {
   dbgs() << "Max Pressure: ";
-  dumpSetPressure(MaxSetPressure, TRI);
+  dumpRegSetPressure(MaxSetPressure, TRI);
   dbgs() << "Live In: ";
   for (unsigned i = 0, e = LiveInRegs.size(); i < e; ++i)
     dbgs() << PrintReg(LiveInRegs[i], TRI) << " ";
@@ -98,8 +103,10 @@ void RegisterPressure::dump(const TargetRegisterInfo *TRI) const {
 }
 
 void RegPressureTracker::dump() const {
-  dbgs() << "Curr Pressure: ";
-  dumpSetPressure(CurrSetPressure, TRI);
+  if (!isTopClosed() || !isBottomClosed()) {
+    dbgs() << "Curr Pressure: ";
+    dumpRegSetPressure(CurrSetPressure, TRI);
+  }
   P.dump(TRI);
 }
 #endif
@@ -200,13 +207,15 @@ void RegPressureTracker::init(const MachineFunction *mf,
                               const RegisterClassInfo *rci,
                               const LiveIntervals *lis,
                               const MachineBasicBlock *mbb,
-                              MachineBasicBlock::const_iterator pos)
+                              MachineBasicBlock::const_iterator pos,
+                              bool ShouldTrackUntiedDefs)
 {
   MF = mf;
   TRI = MF->getTarget().getRegisterInfo();
   RCI = rci;
   MRI = &MF->getRegInfo();
   MBB = mbb;
+  TrackUntiedDefs = ShouldTrackUntiedDefs;
 
   if (RequireIntervals) {
     assert(lis && "IntervalPressure requires LiveIntervals");
@@ -215,6 +224,7 @@ void RegPressureTracker::init(const MachineFunction *mf,
 
   CurrPos = pos;
   CurrSetPressure.assign(TRI->getNumRegPressureSets(), 0);
+  LiveThruPressure.clear();
 
   if (RequireIntervals)
     static_cast<IntervalPressure&>(P).reset();
@@ -226,6 +236,9 @@ void RegPressureTracker::init(const MachineFunction *mf,
   LiveRegs.PhysRegs.setUniverse(TRI->getNumRegs());
   LiveRegs.VirtRegs.clear();
   LiveRegs.VirtRegs.setUniverse(MRI->getNumVirtRegs());
+  UntiedDefs.clear();
+  if (TrackUntiedDefs)
+    UntiedDefs.setUniverse(MRI->getNumVirtRegs());
 }
 
 /// Does this pressure result have a valid top position and live ins.
@@ -304,6 +317,25 @@ void RegPressureTracker::closeRegion() {
   // If both top and bottom are closed, do nothing.
 }
 
+/// The register tracker is unaware of global liveness so ignores normal
+/// live-thru ranges. However, two-address or coalesced chains can also lead
+/// to live ranges with no holes. Count these to inform heuristics that we
+/// can never drop below this pressure.
+void RegPressureTracker::initLiveThru(const RegPressureTracker &RPTracker) {
+  LiveThruPressure.assign(TRI->getNumRegPressureSets(), 0);
+  assert(isBottomClosed() && "need bottom-up tracking to intialize.");
+  for (unsigned i = 0, e = P.LiveOutRegs.size(); i < e; ++i) {
+    unsigned Reg = P.LiveOutRegs[i];
+    if (TargetRegisterInfo::isVirtualRegister(Reg)
+        && !RPTracker.hasUntiedDef(Reg)) {
+      const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+      increaseSetPressure(LiveThruPressure, LiveThruPressure,
+                          TRI->getRegClassPressureSets(RC),
+                          TRI->getRegClassWeight(RC).RegWeight);
+    }
+  }
+}
+
 /// \brief Convenient wrapper for checking membership in RegisterOperands.
 static bool containsReg(ArrayRef<unsigned> Regs, unsigned Reg) {
   return std::find(Regs.begin(), Regs.end(), Reg) != Regs.end();
@@ -459,11 +491,20 @@ bool RegPressureTracker::recede() {
       LiveRegs.insert(Reg);
     }
   }
+  if (TrackUntiedDefs) {
+    for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) {
+      unsigned Reg = RegOpers.Defs[i];
+      if (TargetRegisterInfo::isVirtualRegister(Reg) && !LiveRegs.contains(Reg))
+        UntiedDefs.insert(Reg);
+    }
+  }
   return true;
 }
 
 /// Advance across the current instruction.
 bool RegPressureTracker::advance() {
+  assert(!TrackUntiedDefs && "unsupported mode");
+
   // Check for the bottom of the analyzable region.
   if (CurrPos == MBB->end()) {
     closeRegion();
@@ -533,7 +574,8 @@ bool RegPressureTracker::advance() {
 static void computeExcessPressureDelta(ArrayRef<unsigned> OldPressureVec,
                                        ArrayRef<unsigned> NewPressureVec,
                                        RegPressureDelta &Delta,
-                                       const TargetRegisterInfo *TRI) {
+                                       const RegisterClassInfo *RCI,
+                                       ArrayRef<unsigned> LiveThruPressureVec) {
   int ExcessUnits = 0;
   unsigned PSetID = ~0U;
   for (unsigned i = 0, e = OldPressureVec.size(); i < e; ++i) {
@@ -543,7 +585,10 @@ static void computeExcessPressureDelta(ArrayRef<unsigned> OldPressureVec,
     if (!PDiff) // No change in this set in the common case.
       continue;
     // Only consider change beyond the limit.
-    unsigned Limit = TRI->getRegPressureSetLimit(i);
+    unsigned Limit = RCI->getRegPressureSetLimit(i);
+    if (!LiveThruPressureVec.empty())
+      Limit += LiveThruPressureVec[i];
+
     if (Limit > POld) {
       if (Limit > PNew)
         PDiff = 0;            // Under the limit
@@ -553,9 +598,10 @@ static void computeExcessPressureDelta(ArrayRef<unsigned> OldPressureVec,
     else if (Limit > PNew)
       PDiff = Limit - POld;   // Just obeyed limit.
 
-    if (std::abs(PDiff) > std::abs(ExcessUnits)) {
+    if (PDiff) {
       ExcessUnits = PDiff;
       PSetID = i;
+      break;
     }
   }
   Delta.Excess.PSetID = PSetID;
@@ -583,23 +629,28 @@ static void computeMaxPressureDelta(ArrayRef<unsigned> OldMaxPressureVec,
     if (PNew == POld) // No change in this set in the common case.
       continue;
 
-    while (CritIdx != CritEnd && CriticalPSets[CritIdx].PSetID < i)
-      ++CritIdx;
+    if (!Delta.CriticalMax.isValid()) {
+      while (CritIdx != CritEnd && CriticalPSets[CritIdx].PSetID < i)
+        ++CritIdx;
 
-    if (CritIdx != CritEnd && CriticalPSets[CritIdx].PSetID == i) {
-      int PDiff = (int)PNew - (int)CriticalPSets[CritIdx].UnitIncrease;
-      if (PDiff > Delta.CriticalMax.UnitIncrease) {
-        Delta.CriticalMax.PSetID = i;
-        Delta.CriticalMax.UnitIncrease = PDiff;
+      if (CritIdx != CritEnd && CriticalPSets[CritIdx].PSetID == i) {
+        int PDiff = (int)PNew - (int)CriticalPSets[CritIdx].UnitIncrease;
+        if (PDiff > 0) {
+          Delta.CriticalMax.PSetID = i;
+          Delta.CriticalMax.UnitIncrease = PDiff;
+        }
       }
     }
-
-    // Find the greatest increase above MaxPressureLimit.
+    // Find the first increase above MaxPressureLimit.
     // (Ignores negative MDiff).
-    int MDiff = (int)PNew - (int)MaxPressureLimit[i];
-    if (MDiff > Delta.CurrentMax.UnitIncrease) {
-      Delta.CurrentMax.PSetID = i;
-      Delta.CurrentMax.UnitIncrease = PNew;
+    if (!Delta.CurrentMax.isValid()) {
+      int MDiff = (int)PNew - (int)MaxPressureLimit[i];
+      if (MDiff > 0) {
+        Delta.CurrentMax.PSetID = i;
+        Delta.CurrentMax.UnitIncrease = MDiff;
+        if (CritIdx == CritEnd || Delta.CriticalMax.isValid())
+          break;
+      }
     }
   }
 }
@@ -659,7 +710,8 @@ getMaxUpwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta,
 
   bumpUpwardPressure(MI);
 
-  computeExcessPressureDelta(SavedPressure, CurrSetPressure, Delta, TRI);
+  computeExcessPressureDelta(SavedPressure, CurrSetPressure, Delta, RCI,
+                             LiveThruPressure);
   computeMaxPressureDelta(SavedMaxPressure, P.MaxSetPressure, CriticalPSets,
                           MaxPressureLimit, Delta);
   assert(Delta.CriticalMax.UnitIncrease >= 0 &&
@@ -749,7 +801,8 @@ getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta,
 
   bumpDownwardPressure(MI);
 
-  computeExcessPressureDelta(SavedPressure, CurrSetPressure, Delta, TRI);
+  computeExcessPressureDelta(SavedPressure, CurrSetPressure, Delta, RCI,
+                             LiveThruPressure);
   computeMaxPressureDelta(SavedMaxPressure, P.MaxSetPressure, CriticalPSets,
                           MaxPressureLimit, Delta);
   assert(Delta.CriticalMax.UnitIncrease >= 0 &&
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index b8ef6a4..75ebdaa 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -44,8 +44,8 @@ bool RegScavenger::isAliasUsed(unsigned Reg) const {
 }
 
 void RegScavenger::initRegState() {
-  for (SmallVector<ScavengedInfo, 2>::iterator I = Scavenged.begin(),
-       IE = Scavenged.end(); I != IE; ++I) {
+  for (SmallVectorImpl<ScavengedInfo>::iterator I = Scavenged.begin(),
+         IE = Scavenged.end(); I != IE; ++I) {
     I->Reg = 0;
     I->Restore = NULL;
   }
@@ -181,8 +181,8 @@ void RegScavenger::forward() {
 
   MachineInstr *MI = MBBI;
 
-  for (SmallVector<ScavengedInfo, 2>::iterator I = Scavenged.begin(),
-       IE = Scavenged.end(); I != IE; ++I) {
+  for (SmallVectorImpl<ScavengedInfo>::iterator I = Scavenged.begin(),
+         IE = Scavenged.end(); I != IE; ++I) {
     if (I->Restore != MI)
       continue;
 
@@ -368,7 +368,7 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
   // Exclude all the registers being used by the instruction.
   for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
     MachineOperand &MO = I->getOperand(i);
-    if (MO.isReg() && MO.getReg() != 0 &&
+    if (MO.isReg() && MO.getReg() != 0 && !(MO.isUse() && MO.isUndef()) &&
         !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
       Candidates.reset(MO.getReg());
   }
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 07e5b47..75e3790 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -64,8 +64,8 @@ const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const {
 /// specified node.
 bool SUnit::addPred(const SDep &D, bool Required) {
   // If this node already has this depenence, don't add a redundant one.
-  for (SmallVector<SDep, 4>::iterator I = Preds.begin(), E = Preds.end();
-       I != E; ++I) {
+  for (SmallVectorImpl<SDep>::iterator I = Preds.begin(), E = Preds.end();
+         I != E; ++I) {
     // Zero-latency weak edges may be added purely for heuristic ordering. Don't
     // add them if another kind of edge already exists.
     if (!Required && I->getSUnit() == D.getSUnit())
@@ -77,7 +77,7 @@ bool SUnit::addPred(const SDep &D, bool Required) {
         // Find the corresponding successor in N.
         SDep ForwardD = *I;
         ForwardD.setSUnit(this);
-        for (SmallVector<SDep, 4>::iterator II = PredSU->Succs.begin(),
+        for (SmallVectorImpl<SDep>::iterator II = PredSU->Succs.begin(),
                EE = PredSU->Succs.end(); II != EE; ++II) {
           if (*II == ForwardD) {
             II->setLatency(D.getLatency());
@@ -132,8 +132,8 @@ bool SUnit::addPred(const SDep &D, bool Required) {
 /// the specified node.
 void SUnit::removePred(const SDep &D) {
   // Find the matching predecessor.
-  for (SmallVector<SDep, 4>::iterator I = Preds.begin(), E = Preds.end();
-       I != E; ++I)
+  for (SmallVectorImpl<SDep>::iterator I = Preds.begin(), E = Preds.end();
+         I != E; ++I)
     if (*I == D) {
       // Find the corresponding successor in N.
       SDep P = D;
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index e4da6a4..892903c 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -98,7 +98,7 @@ static void getUnderlyingObjects(const Value *V,
     SmallVector<Value *, 4> Objs;
     GetUnderlyingObjects(const_cast<Value *>(V), Objs);
 
-    for (SmallVector<Value *, 4>::iterator I = Objs.begin(), IE = Objs.end();
+    for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end();
          I != IE; ++I) {
       V = *I;
       if (!Visited.insert(V))
@@ -116,12 +116,15 @@ static void getUnderlyingObjects(const Value *V,
   } while (!Working.empty());
 }
 
+typedef SmallVector<PointerIntPair<const Value *, 1, bool>, 4>
+UnderlyingObjectsVector;
+
 /// getUnderlyingObjectsForInstr - If this machine instr has memory reference
 /// information and it can be tracked to a normal reference to a known
 /// object, return the Value for that object.
 static void getUnderlyingObjectsForInstr(const MachineInstr *MI,
-              const MachineFrameInfo *MFI,
-              SmallVectorImpl<std::pair<const Value *, bool> > &Objects) {
+                                         const MachineFrameInfo *MFI,
+                                         UnderlyingObjectsVector &Objects) {
   if (!MI->hasOneMemOperand() ||
       !(*MI->memoperands_begin())->getValue() ||
       (*MI->memoperands_begin())->isVolatile())
@@ -134,8 +137,8 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI,
   SmallVector<Value *, 4> Objs;
   getUnderlyingObjects(V, Objs);
 
-  for (SmallVector<Value *, 4>::iterator I = Objs.begin(), IE = Objs.end();
-       I != IE; ++I) {
+  for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end();
+         I != IE; ++I) {
     bool MayAlias = true;
     V = *I;
 
@@ -155,7 +158,7 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI,
       return;
     }
 
-    Objects.push_back(std::make_pair(V, MayAlias));
+    Objects.push_back(UnderlyingObjectsVector::value_type(V, MayAlias));
   }
 }
 
@@ -267,13 +270,10 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
         SU->hasPhysRegDefs = true;
         Dep = SDep(SU, SDep::Data, *Alias);
         RegUse = UseSU->getInstr();
-        Dep.setMinLatency(
-          SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
-                                           RegUse, UseOp, /*FindMin=*/true));
       }
       Dep.setLatency(
-        SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
-                                         RegUse, UseOp, /*FindMin=*/false));
+        SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, RegUse,
+                                         UseOp));
 
       ST.adjustSchedDependency(SU, UseSU, Dep);
       UseSU->addPred(Dep);
@@ -310,10 +310,8 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
           DefSU->addPred(SDep(SU, Kind, /*Reg=*/*Alias));
         else {
           SDep Dep(SU, Kind, /*Reg=*/*Alias);
-          unsigned OutLatency =
-            SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr());
-          Dep.setMinLatency(OutLatency);
-          Dep.setLatency(OutLatency);
+          Dep.setLatency(
+            SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
           DefSU->addPred(Dep);
         }
       }
@@ -389,10 +387,8 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
     SUnit *DefSU = DefI->SU;
     if (DefSU != SU && DefSU != &ExitSU) {
       SDep Dep(SU, SDep::Output, Reg);
-      unsigned OutLatency =
-        SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr());
-      Dep.setMinLatency(OutLatency);
-      Dep.setLatency(OutLatency);
+      Dep.setLatency(
+        SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
       DefSU->addPred(Dep);
     }
     DefI->SU = SU;
@@ -427,10 +423,7 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
       // Adjust the dependence latency using operand def/use information, then
       // allow the target to perform its own adjustments.
       int DefOp = Def->findRegisterDefOperandIdx(Reg);
-      dep.setLatency(
-        SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx, false));
-      dep.setMinLatency(
-        SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx, true));
+      dep.setLatency(SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx));
 
       const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
       ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
@@ -472,8 +465,8 @@ static inline bool isUnsafeMemoryObject(MachineInstr *MI,
 
   SmallVector<Value *, 4> Objs;
   getUnderlyingObjects(V, Objs);
-  for (SmallVector<Value *, 4>::iterator I = Objs.begin(),
-       IE = Objs.end(); I != IE; ++I) {
+  for (SmallVectorImpl<Value *>::iterator I = Objs.begin(),
+         IE = Objs.end(); I != IE; ++I) {
     V = *I;
 
     if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
@@ -855,7 +848,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
       AliasMemDefs.clear();
       AliasMemUses.clear();
     } else if (MI->mayStore()) {
-      SmallVector<std::pair<const Value *, bool>, 4> Objs;
+      UnderlyingObjectsVector Objs;
       getUnderlyingObjectsForInstr(MI, MFI, Objs);
 
       if (Objs.empty()) {
@@ -864,10 +857,10 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
       }
 
       bool MayAlias = false;
-      for (SmallVector<std::pair<const Value *, bool>, 4>::iterator
-           K = Objs.begin(), KE = Objs.end(); K != KE; ++K) {
-        const Value *V = K->first;
-        bool ThisMayAlias = K->second;
+      for (UnderlyingObjectsVector::iterator K = Objs.begin(), KE = Objs.end();
+           K != KE; ++K) {
+        const Value *V = K->getPointer();
+        bool ThisMayAlias = K->getInt();
         if (ThisMayAlias)
           MayAlias = true;
 
@@ -929,7 +922,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
       if (MI->isInvariantLoad(AA)) {
         // Invariant load, no chain dependencies needed!
       } else {
-        SmallVector<std::pair<const Value *, bool>, 4> Objs;
+        UnderlyingObjectsVector Objs;
         getUnderlyingObjectsForInstr(MI, MFI, Objs);
 
         if (Objs.empty()) {
@@ -945,10 +938,10 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
           MayAlias = false;
         }
 
-        for (SmallVector<std::pair<const Value *, bool>, 4>::iterator
+        for (UnderlyingObjectsVector::iterator
              J = Objs.begin(), JE = Objs.end(); J != JE; ++J) {
-          const Value *V = J->first;
-          bool ThisMayAlias = J->second;
+          const Value *V = J->getPointer();
+          bool ThisMayAlias = J->getInt();
 
           if (ThisMayAlias)
             MayAlias = true;
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 046dd41..cb88941 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -154,7 +154,7 @@ namespace {
     SDValue PromoteExtend(SDValue Op);
     bool PromoteLoad(SDValue Op);
 
-    void ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs,
+    void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
                          SDValue Trunc, SDValue ExtLoad, SDLoc DL,
                          ISD::NodeType ExtType);
 
@@ -279,7 +279,7 @@ namespace {
     /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
     /// looking for aliasing nodes and adding them to the Aliases vector.
     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
-                          SmallVector<SDValue, 8> &Aliases);
+                          SmallVectorImpl<SDValue> &Aliases);
 
     /// isAlias - Return true if there is any possibility that the two addresses
     /// overlap.
@@ -326,7 +326,10 @@ namespace {
     /// getShiftAmountTy - Returns a type large enough to hold any valid
     /// shift amount - before type legalization these can be huge.
     EVT getShiftAmountTy(EVT LHSTy) {
-      return LegalTypes ? TLI.getShiftAmountTy(LHSTy) : TLI.getPointerTy();
+      assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
+      if (LHSTy.isVector())
+        return LHSTy;
+      return LegalTypes ? TLI.getScalarShiftAmountTy(LHSTy) : TLI.getPointerTy();
     }
 
     /// isTypeLegal - This method returns true if we are running before type
@@ -1251,7 +1254,7 @@ static SDValue getInputChainForNode(SDNode *N) {
   if (unsigned NumOps = N->getNumOperands()) {
     if (N->getOperand(0).getValueType() == MVT::Other)
       return N->getOperand(0);
-    else if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
+    if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
       return N->getOperand(NumOps-1);
     for (unsigned i = 1; i < NumOps-1; ++i)
       if (N->getOperand(i).getValueType() == MVT::Other)
@@ -1610,13 +1613,19 @@ SDValue DAGCombiner::visitADDE(SDNode *N) {
 // Since it may not be valid to emit a fold to zero for vector initializers
 // check if we can before folding.
 static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT,
-                             SelectionDAG &DAG, bool LegalOperations) {
-  if (!VT.isVector()) {
+                             SelectionDAG &DAG,
+                             bool LegalOperations, bool LegalTypes) {
+  if (!VT.isVector())
     return DAG.getConstant(0, VT);
-  }
   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
     // Produce a vector of zeros.
-    SDValue El = DAG.getConstant(0, VT.getVectorElementType());
+    EVT ElemTy = VT.getVectorElementType();
+    if (LegalTypes && TLI.getTypeAction(*DAG.getContext(), ElemTy) ==
+                      TargetLowering::TypePromoteInteger)
+      ElemTy = TLI.getTypeToTransformTo(*DAG.getContext(), ElemTy);
+    assert((!LegalTypes || TLI.isTypeLegal(ElemTy)) &&
+           "Type for zero vector elements is not legal");
+    SDValue El = DAG.getConstant(0, ElemTy);
     std::vector<SDValue> Ops(VT.getVectorNumElements(), El);
     return DAG.getNode(ISD::BUILD_VECTOR, DL, VT,
       &Ops[0], Ops.size());
@@ -1646,7 +1655,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   // fold (sub x, x) -> 0
   // FIXME: Refactor this and xor and other similar operations together.
   if (N0 == N1)
-    return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations);
+    return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
   // fold (sub c1, c2) -> c1-c2
   if (N0C && N1C)
     return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C);
@@ -1762,43 +1771,73 @@ SDValue DAGCombiner::visitSUBE(SDNode *N) {
   return SDValue();
 }
 
+/// isConstantSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are
+/// all the same constant or undefined.
+static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) {
+  BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N);
+  if (!C)
+    return false;
+
+  APInt SplatUndef;
+  unsigned SplatBitSize;
+  bool HasAnyUndefs;
+  EVT EltVT = N->getValueType(0).getVectorElementType();
+  return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
+                             HasAnyUndefs) &&
+          EltVT.getSizeInBits() >= SplatBitSize);
+}
+
 SDValue DAGCombiner::visitMUL(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   EVT VT = N0.getValueType();
 
+  // fold (mul x, undef) -> 0
+  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
+
+  bool N0IsConst = false;
+  bool N1IsConst = false;
+  APInt ConstValue0, ConstValue1;
   // fold vector ops
   if (VT.isVector()) {
     SDValue FoldedVOp = SimplifyVBinOp(N);
     if (FoldedVOp.getNode()) return FoldedVOp;
+
+    N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0);
+    N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1);
+  } else {
+    N0IsConst = dyn_cast<ConstantSDNode>(N0) != 0;
+    ConstValue0 = N0IsConst? (dyn_cast<ConstantSDNode>(N0))->getAPIntValue() : APInt();
+    N1IsConst = dyn_cast<ConstantSDNode>(N1) != 0;
+    ConstValue1 = N1IsConst? (dyn_cast<ConstantSDNode>(N1))->getAPIntValue() : APInt();
   }
 
-  // fold (mul x, undef) -> 0
-  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
-    return DAG.getConstant(0, VT);
   // fold (mul c1, c2) -> c1*c2
-  if (N0C && N1C)
-    return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0C, N1C);
+  if (N0IsConst && N1IsConst)
+    return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0.getNode(), N1.getNode());
+
   // canonicalize constant to RHS
-  if (N0C && !N1C)
+  if (N0IsConst && !N1IsConst)
     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
   // fold (mul x, 0) -> 0
-  if (N1C && N1C->isNullValue())
+  if (N1IsConst && ConstValue1 == 0)
     return N1;
+  // fold (mul x, 1) -> x
+  if (N1IsConst && ConstValue1 == 1)
+    return N0;
   // fold (mul x, -1) -> 0-x
-  if (N1C && N1C->isAllOnesValue())
+  if (N1IsConst && ConstValue1.isAllOnesValue())
     return DAG.getNode(ISD::SUB, SDLoc(N), VT,
                        DAG.getConstant(0, VT), N0);
   // fold (mul x, (1 << c)) -> x << c
-  if (N1C && N1C->getAPIntValue().isPowerOf2())
+  if (N1IsConst && ConstValue1.isPowerOf2())
     return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
-                       DAG.getConstant(N1C->getAPIntValue().logBase2(),
+                       DAG.getConstant(ConstValue1.logBase2(),
                                        getShiftAmountTy(N0.getValueType())));
   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
-  if (N1C && (-N1C->getAPIntValue()).isPowerOf2()) {
-    unsigned Log2Val = (-N1C->getAPIntValue()).logBase2();
+  if (N1IsConst && (-ConstValue1).isPowerOf2()) {
+    unsigned Log2Val = (-ConstValue1).logBase2();
     // FIXME: If the input is something that is easily negated (e.g. a
     // single-use add), we should put the negate there.
     return DAG.getNode(ISD::SUB, SDLoc(N), VT,
@@ -1807,9 +1846,12 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
                             DAG.getConstant(Log2Val,
                                       getShiftAmountTy(N0.getValueType()))));
   }
+
+  APInt Val;
   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
-  if (N1C && N0.getOpcode() == ISD::SHL &&
-      isa<ConstantSDNode>(N0.getOperand(1))) {
+  if (N1IsConst && N0.getOpcode() == ISD::SHL &&
+      (isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
+                     isa<ConstantSDNode>(N0.getOperand(1)))) {
     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT,
                              N1, N0.getOperand(1));
     AddToWorkList(C3.getNode());
@@ -1822,7 +1864,9 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
   {
     SDValue Sh(0,0), Y(0,0);
     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
-    if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
+    if (N0.getOpcode() == ISD::SHL &&
+        (isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
+                       isa<ConstantSDNode>(N0.getOperand(1))) &&
         N0.getNode()->hasOneUse()) {
       Sh = N0; Y = N1;
     } else if (N1.getOpcode() == ISD::SHL &&
@@ -1840,8 +1884,9 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
   }
 
   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
-  if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
-      isa<ConstantSDNode>(N0.getOperand(1)))
+  if (N1IsConst && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
+      (isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
+                     isa<ConstantSDNode>(N0.getOperand(1))))
     return DAG.getNode(ISD::ADD, SDLoc(N), VT,
                        DAG.getNode(ISD::MUL, SDLoc(N0), VT,
                                    N0.getOperand(0), N1),
@@ -2502,7 +2547,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
     }
   }
-  // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) -> 
+  // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
   // already be zero by virtue of the width of the base type of the load.
   //
@@ -2716,7 +2761,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
       ? cast<LoadSDNode>(N0.getOperand(0))
       : cast<LoadSDNode>(N0);
     if (LN0->getExtensionType() != ISD::SEXTLOAD &&
-        LN0->isUnindexed() && N0.hasOneUse() && LN0->hasOneUse()) {
+        LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) {
       uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits();
       if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){
         EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
@@ -2905,7 +2950,7 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
 /// isBSwapHWordElement - Return true if the specified node is an element
 /// that makes up a 32-bit packed halfword byteswap. i.e.
 /// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8)
-static bool isBSwapHWordElement(SDValue N, SmallVector<SDNode*,4> &Parts) {
+static bool isBSwapHWordElement(SDValue N, SmallVectorImpl<SDNode *> &Parts) {
   if (!N.getNode()->hasOneUse())
     return false;
 
@@ -3304,25 +3349,21 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
       LHSShiftAmt == RHSShiftAmt.getOperand(1)) {
     if (ConstantSDNode *SUBC =
           dyn_cast<ConstantSDNode>(RHSShiftAmt.getOperand(0))) {
-      if (SUBC->getAPIntValue() == OpSizeInBits) {
+      if (SUBC->getAPIntValue() == OpSizeInBits)
         return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
                            HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode();
-      }
     }
   }
 
   // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotr x, y)
   // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotl x, (sub 32, y))
   if (LHSShiftAmt.getOpcode() == ISD::SUB &&
-      RHSShiftAmt == LHSShiftAmt.getOperand(1)) {
+      RHSShiftAmt == LHSShiftAmt.getOperand(1))
     if (ConstantSDNode *SUBC =
-          dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0))) {
-      if (SUBC->getAPIntValue() == OpSizeInBits) {
+          dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0)))
+      if (SUBC->getAPIntValue() == OpSizeInBits)
         return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg,
                            HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode();
-      }
-    }
-  }
 
   // Look for sign/zext/any-extended or truncate cases:
   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
@@ -3342,13 +3383,11 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
       // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
       //   (rotr x, (sub 32, y))
       if (ConstantSDNode *SUBC =
-            dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0))) {
-        if (SUBC->getAPIntValue() == OpSizeInBits) {
+            dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0)))
+        if (SUBC->getAPIntValue() == OpSizeInBits)
           return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
                              LHSShiftArg,
                              HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode();
-        }
-      }
     } else if (LExtOp0.getOpcode() == ISD::SUB &&
                RExtOp0 == LExtOp0.getOperand(1)) {
       // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
@@ -3356,13 +3395,11 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
       // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
       //   (rotl x, (sub 32, y))
       if (ConstantSDNode *SUBC =
-            dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0))) {
-        if (SUBC->getAPIntValue() == OpSizeInBits) {
+            dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0)))
+        if (SUBC->getAPIntValue() == OpSizeInBits)
           return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT,
                              LHSShiftArg,
                              HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode();
-        }
-      }
     }
   }
 
@@ -3489,7 +3526,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
   }
   // fold (xor x, x) -> 0
   if (N0 == N1)
-    return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations);
+    return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
 
   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
   if (N0.getOpcode() == N1.getOpcode()) {
@@ -3915,8 +3952,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
                        DAG.getConstant(~0ULL >> ShAmt, VT));
   }
 
-
-  // fold (srl (anyextend x), c) -> (anyextend (srl x, c))
+  // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
     // Shifting in all undef bits?
     EVT SmallVT = N0.getOperand(0).getValueType();
@@ -3929,7 +3965,10 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
                                        N0.getOperand(0),
                           DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT)));
       AddToWorkList(SmallShift.getNode());
-      return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SmallShift);
+      APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits()).lshr(ShiftAmt);
+      return DAG.getNode(ISD::AND, SDLoc(N), VT,
+                         DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SmallShift),
+                         DAG.getConstant(Mask, VT));
     }
   }
 
@@ -4233,20 +4272,22 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
   // Determine if the condition we're dealing with is constant
   SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
                               N0, N1, CC, SDLoc(N), false);
-  if (SCC.getNode()) AddToWorkList(SCC.getNode());
+  if (SCC.getNode()) {
+    AddToWorkList(SCC.getNode());
 
-  if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
-    if (!SCCC->isNullValue())
-      return N2;    // cond always true -> true val
-    else
-      return N3;    // cond always false -> false val
-  }
+    if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
+      if (!SCCC->isNullValue())
+        return N2;    // cond always true -> true val
+      else
+        return N3;    // cond always false -> false val
+    }
 
-  // Fold to a simpler select_cc
-  if (SCC.getNode() && SCC.getOpcode() == ISD::SETCC)
-    return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
-                       SCC.getOperand(0), SCC.getOperand(1), N2, N3,
-                       SCC.getOperand(2));
+    // Fold to a simpler select_cc
+    if (SCC.getOpcode() == ISD::SETCC)
+      return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
+                         SCC.getOperand(0), SCC.getOperand(1), N2, N3,
+                         SCC.getOperand(2));
+  }
 
   // If we can fold this based on the true/false value, do so.
   if (SimplifySelectOps(N, N2, N3))
@@ -4268,7 +4309,7 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) {
 // mentioned transformation is profitable.
 static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
                                     unsigned ExtOpc,
-                                    SmallVector<SDNode*, 4> &ExtendNodes,
+                                    SmallVectorImpl<SDNode *> &ExtendNodes,
                                     const TargetLowering &TLI) {
   bool HasCopyToRegUses = false;
   bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType());
@@ -4326,7 +4367,7 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
   return true;
 }
 
-void DAGCombiner::ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs,
+void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
                                   SDValue Trunc, SDValue ExtLoad, SDLoc DL,
                                   ISD::NodeType ExtType) {
   // Extend SetCC uses if necessary.
@@ -4508,7 +4549,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
     // Only do this before legalize for now.
     if (VT.isVector() && !LegalOperations &&
-        TLI.getBooleanContents(true) == 
+        TLI.getBooleanContents(true) ==
           TargetLowering::ZeroOrNegativeOneBooleanContent) {
       EVT N0VT = N0.getOperand(0).getValueType();
       // On some architectures (such as SSE/NEON/etc) the SETCC result type is
@@ -4547,14 +4588,16 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
                        NegOne, DAG.getConstant(0, VT),
                        cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
     if (SCC.getNode()) return SCC;
-    if (!VT.isVector() && (!LegalOperations ||
-        TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(VT))))
-      return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
-                         DAG.getSetCC(SDLoc(N),
-                                      getSetCCResultType(VT),
-                                      N0.getOperand(0), N0.getOperand(1),
-                                 cast<CondCodeSDNode>(N0.getOperand(2))->get()),
-                         NegOne, DAG.getConstant(0, VT));
+    if (!VT.isVector() &&
+        (!LegalOperations ||
+         TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(VT)))) {
+      return DAG.getSelect(SDLoc(N), VT,
+                           DAG.getSetCC(SDLoc(N),
+                                        getSetCCResultType(VT),
+                                        N0.getOperand(0), N0.getOperand(1),
+                                        cast<CondCodeSDNode>(N0.getOperand(2))->get()),
+                           NegOne, DAG.getConstant(0, VT));
+    }
   }
 
   // fold (sext x) -> (zext x) if the sign bit is known zero.
@@ -5039,9 +5082,8 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
     assert(CV != 0 && "Const value should be ConstSDNode.");
     const APInt &CVal = CV->getAPIntValue();
     APInt NewVal = CVal & Mask;
-    if (NewVal != CVal) {
+    if (NewVal != CVal)
       return DAG.getConstant(NewVal, V.getValueType());
-    }
     break;
   }
   case ISD::OR:
@@ -5169,12 +5211,19 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
 
   // For the transform to be legal, the load must produce only two values
   // (the value loaded and the chain).  Don't transform a pre-increment
-  // load, for example, which produces an extra value.  Otherwise the 
+  // load, for example, which produces an extra value.  Otherwise the
   // transformation is not equivalent, and the downstream logic to replace
   // uses gets things wrong.
   if (LN0->getNumValues() > 2)
     return SDValue();
 
+  // If the load that we're shrinking is an extload and we're not just
+  // discarding the extension we can't simply shrink the load. Bail.
+  // TODO: It would be possible to merge the extensions in some cases.
+  if (LN0->getExtensionType() != ISD::NON_EXTLOAD &&
+      LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt)
+    return SDValue();
+
   EVT PtrType = N0.getOperand(1).getValueType();
 
   if (PtrType == MVT::Untyped || PtrType.isExtended())
@@ -5251,10 +5300,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
 
   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
-      EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) {
+      EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
                        N0.getOperand(0), N1);
-  }
 
   // fold (sext_in_reg (sext x)) -> (sext x)
   // fold (sext_in_reg (aext x)) -> (sext x)
@@ -5400,7 +5448,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
     SDValue EltNo = N0->getOperand(1);
     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
-      EVT IndexTy = N0->getOperand(1).getValueType();
+      EVT IndexTy = TLI.getVectorIdxTy();
       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
 
       SDValue V = DAG.getNode(ISD::BITCAST, SDLoc(N),
@@ -5632,8 +5680,8 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
   // This often reduces constant pool loads.
-  if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(VT)) ||
-       (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(VT))) &&
+  if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
+       (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
       N0.getNode()->hasOneUse() && VT.isInteger() &&
       !VT.isVector() && !N0.getValueType().isVector()) {
     SDValue NewConv = DAG.getNode(ISD::BITCAST, SDLoc(N0), VT,
@@ -5892,22 +5940,20 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
   // We don't need test this condition for transformation like following, as
   // the DAG being transformed implies it is legal to take FP constant as
   // operand.
-  // 
+  //
   //  (fadd (fmul c, x), x) -> (fmul c+1, x)
-  // 
+  //
   bool AllowNewFpConst = (Level < AfterLegalizeDAG);
 
   // If allow, fold (fadd (fneg x), x) -> 0.0
   if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath &&
-      N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) {
+      N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
     return DAG.getConstantFP(0.0, VT);
-  }
 
     // If allow, fold (fadd x, (fneg x)) -> 0.0
   if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath &&
-      N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) {
+      N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
     return DAG.getConstantFP(0.0, VT);
-  }
 
   // In unsafe math mode, we can fold chains of FADD's of the same value
   // into multiplications.  This transform is not safe in general because
@@ -5919,7 +5965,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
       ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
       ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
 
-      // (fadd (fmul c, x), x) -> (fmul c+1, x)
+      // (fadd (fmul c, x), x) -> (fmul x, c+1)
       if (CFP00 && !CFP01 && N0.getOperand(1) == N1) {
         SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
                                      SDValue(CFP00, 0),
@@ -5928,7 +5974,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
                            N1, NewCFP);
       }
 
-      // (fadd (fmul x, c), x) -> (fmul c+1, x)
+      // (fadd (fmul x, c), x) -> (fmul x, c+1)
       if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
         SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
                                      SDValue(CFP01, 0),
@@ -5937,7 +5983,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
                            N1, NewCFP);
       }
 
-      // (fadd (fmul c, x), (fadd x, x)) -> (fmul c+2, x)
+      // (fadd (fmul c, x), (fadd x, x)) -> (fmul x, c+2)
       if (CFP00 && !CFP01 && N1.getOpcode() == ISD::FADD &&
           N1.getOperand(0) == N1.getOperand(1) &&
           N0.getOperand(1) == N1.getOperand(0)) {
@@ -5948,7 +5994,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
                            N0.getOperand(1), NewCFP);
       }
 
-      // (fadd (fmul x, c), (fadd x, x)) -> (fmul c+2, x)
+      // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
       if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
           N1.getOperand(0) == N1.getOperand(1) &&
           N0.getOperand(0) == N1.getOperand(0)) {
@@ -5964,7 +6010,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
       ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
       ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1));
 
-      // (fadd x, (fmul c, x)) -> (fmul c+1, x)
+      // (fadd x, (fmul c, x)) -> (fmul x, c+1)
       if (CFP10 && !CFP11 && N1.getOperand(1) == N0) {
         SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
                                      SDValue(CFP10, 0),
@@ -5973,7 +6019,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
                            N0, NewCFP);
       }
 
-      // (fadd x, (fmul x, c)) -> (fmul c+1, x)
+      // (fadd x, (fmul x, c)) -> (fmul x, c+1)
       if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
         SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
                                      SDValue(CFP11, 0),
@@ -5983,79 +6029,74 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
       }
 
 
-      // (fadd (fadd x, x), (fmul c, x)) -> (fmul c+2, x)
-      if (CFP10 && !CFP11 && N1.getOpcode() == ISD::FADD &&
-          N1.getOperand(0) == N1.getOperand(1) &&
-          N0.getOperand(1) == N1.getOperand(0)) {
+      // (fadd (fadd x, x), (fmul c, x)) -> (fmul x, c+2)
+      if (CFP10 && !CFP11 && N0.getOpcode() == ISD::FADD &&
+          N0.getOperand(0) == N0.getOperand(1) &&
+          N1.getOperand(1) == N0.getOperand(0)) {
         SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
                                      SDValue(CFP10, 0),
                                      DAG.getConstantFP(2.0, VT));
         return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
-                           N0.getOperand(1), NewCFP);
+                           N1.getOperand(1), NewCFP);
       }
 
-      // (fadd (fadd x, x), (fmul x, c)) -> (fmul c+2, x)
-      if (CFP11 && !CFP10 && N1.getOpcode() == ISD::FADD &&
-          N1.getOperand(0) == N1.getOperand(1) &&
-          N0.getOperand(0) == N1.getOperand(0)) {
+      // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
+      if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
+          N0.getOperand(0) == N0.getOperand(1) &&
+          N1.getOperand(0) == N0.getOperand(0)) {
         SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
                                      SDValue(CFP11, 0),
                                      DAG.getConstantFP(2.0, VT));
         return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
-                           N0.getOperand(0), NewCFP);
+                           N1.getOperand(0), NewCFP);
       }
     }
 
     if (N0.getOpcode() == ISD::FADD && AllowNewFpConst) {
       ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
-      // (fadd (fadd x, x), x) -> (fmul 3.0, x)
+      // (fadd (fadd x, x), x) -> (fmul x, 3.0)
       if (!CFP && N0.getOperand(0) == N0.getOperand(1) &&
-          (N0.getOperand(0) == N1)) {
+          (N0.getOperand(0) == N1))
         return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
                            N1, DAG.getConstantFP(3.0, VT));
-      }
     }
 
     if (N1.getOpcode() == ISD::FADD && AllowNewFpConst) {
       ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
-      // (fadd x, (fadd x, x)) -> (fmul 3.0, x)
+      // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
       if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
-          N1.getOperand(0) == N0) {
+          N1.getOperand(0) == N0)
         return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
                            N0, DAG.getConstantFP(3.0, VT));
-      }
     }
 
-    // (fadd (fadd x, x), (fadd x, x)) -> (fmul 4.0, x)
+    // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
     if (AllowNewFpConst &&
         N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
         N0.getOperand(0) == N0.getOperand(1) &&
         N1.getOperand(0) == N1.getOperand(1) &&
-        N0.getOperand(0) == N1.getOperand(0)) {
+        N0.getOperand(0) == N1.getOperand(0))
       return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
                          N0.getOperand(0),
                          DAG.getConstantFP(4.0, VT));
-    }
   }
 
   // FADD -> FMA combines:
   if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
        DAG.getTarget().Options.UnsafeFPMath) &&
-      DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) &&
-      TLI.isOperationLegalOrCustom(ISD::FMA, VT)) {
+      DAG.getTarget().getTargetLowering()->isFMAFasterThanFMulAndFAdd(VT) &&
+      (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
 
     // fold (fadd (fmul x, y), z) -> (fma x, y, z)
-    if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) {
+    if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse())
       return DAG.getNode(ISD::FMA, SDLoc(N), VT,
                          N0.getOperand(0), N0.getOperand(1), N1);
-    }
 
     // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
     // Note: Commutes FADD operands.
-    if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) {
+    if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse())
       return DAG.getNode(ISD::FMA, SDLoc(N), VT,
                          N1.getOperand(0), N1.getOperand(1), N0);
-    }
   }
 
   return SDValue();
@@ -6110,8 +6151,9 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
       if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI,
                                           &DAG.getTarget().Options))
         return GetNegatedExpression(N11, DAG, LegalOperations);
-      else if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI,
-                                               &DAG.getTarget().Options))
+
+      if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI,
+                                          &DAG.getTarget().Options))
         return GetNegatedExpression(N10, DAG, LegalOperations);
     }
   }
@@ -6119,27 +6161,25 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
   // FSUB -> FMA combines:
   if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
        DAG.getTarget().Options.UnsafeFPMath) &&
-      DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) &&
-      TLI.isOperationLegalOrCustom(ISD::FMA, VT)) {
+      DAG.getTarget().getTargetLowering()->isFMAFasterThanFMulAndFAdd(VT) &&
+      (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
 
     // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
-    if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) {
+    if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse())
       return DAG.getNode(ISD::FMA, dl, VT,
                          N0.getOperand(0), N0.getOperand(1),
                          DAG.getNode(ISD::FNEG, dl, VT, N1));
-    }
 
     // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
     // Note: Commutes FSUB operands.
-    if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) {
+    if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse())
       return DAG.getNode(ISD::FMA, dl, VT,
                          DAG.getNode(ISD::FNEG, dl, VT,
                          N1.getOperand(0)),
                          N1.getOperand(1), N0);
-    }
 
-    // fold (fsub (-(fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
-    if (N0.getOpcode() == ISD::FNEG && 
+    // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
+    if (N0.getOpcode() == ISD::FNEG &&
         N0.getOperand(0).getOpcode() == ISD::FMUL &&
         N0->hasOneUse() && N0.getOperand(0).hasOneUse()) {
       SDValue N00 = N0.getOperand(0).getOperand(0);
@@ -6195,7 +6235,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
   // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI,
                                        &DAG.getTarget().Options)) {
-    if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, 
+    if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI,
                                          &DAG.getTarget().Options)) {
       // Both can be negated for free, check to see if at least one is cheaper
       // negated.
@@ -6276,21 +6316,17 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
   }
 
   // (fma x, c, x) -> (fmul x, (c+1))
-  if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0 == N2) {
-    return DAG.getNode(ISD::FMUL, dl, VT,
-                       N0,
+  if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0 == N2)
+    return DAG.getNode(ISD::FMUL, dl, VT, N0,
                        DAG.getNode(ISD::FADD, dl, VT,
                                    N1, DAG.getConstantFP(1.0, VT)));
-  }
 
   // (fma x, c, (fneg x)) -> (fmul x, (c-1))
   if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
-      N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
-    return DAG.getNode(ISD::FMUL, dl, VT,
-                       N0,
+      N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0)
+    return DAG.getNode(ISD::FMUL, dl, VT, N0,
                        DAG.getNode(ISD::FADD, dl, VT,
                                    N1, DAG.getConstantFP(-1.0, VT)));
-  }
 
 
   return SDValue();
@@ -6670,12 +6706,11 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
   // (fneg (fmul c, x)) -> (fmul -c, x)
   if (N0.getOpcode() == ISD::FMUL) {
     ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
-    if (CFP1) {
+    if (CFP1)
       return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
                          N0.getOperand(0),
                          DAG.getNode(ISD::FNEG, SDLoc(N), VT,
                                      N0.getOperand(1)));
-    }
   }
 
   return SDValue();
@@ -6740,7 +6775,7 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
 
   // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading
   // constant pool values.
-  if (!TLI.isFAbsFree(VT) && 
+  if (!TLI.isFAbsFree(VT) &&
       N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() &&
       N0.getOperand(0).getValueType().isInteger() &&
       !N0.getOperand(0).getValueType().isVector()) {
@@ -7165,7 +7200,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
     //   x0 * offset0 + y0 * ptr0 = t0
     // knowing that
     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
-    // 
+    //
     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
     // indexed load/store and the expresion that needs to be re-written.
     //
@@ -7287,7 +7322,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
           for (SDNode::use_iterator III = Use->use_begin(),
                  EEE = Use->use_end(); III != EEE; ++III) {
             SDNode *UseUse = *III;
-            if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) 
+            if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
               RealUse = true;
           }
 
@@ -8577,7 +8612,9 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
   // vector elements.
   SmallVector<SDValue, 8> Ops;
-  if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
+  // Do not combine these two vectors if the output vector will not replace
+  // the input vector.
+  if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
     Ops.append(InVec.getNode()->op_begin(),
                InVec.getNode()->op_end());
   } else if (InVec.getOpcode() == ISD::UNDEF) {
@@ -8650,7 +8687,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
       OrigElt -= NumElem;
     }
 
-    EVT IndexTy = N->getOperand(1).getValueType();
+    EVT IndexTy = TLI.getVectorIdxTy();
     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT,
                        InVec, DAG.getConstant(OrigElt, IndexTy));
   }
@@ -8789,7 +8826,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
     } else {
       Load = DAG.getLoad(LVT, SDLoc(N), LN0->getChain(), NewPtr,
                          LN0->getPointerInfo().getWithOffset(PtrOff),
-                         LN0->isVolatile(), LN0->isNonTemporal(), 
+                         LN0->isVolatile(), LN0->isNonTemporal(),
                          LN0->isInvariant(), Align);
       Chain = Load.getValue(1);
       if (NVT.bitsLT(LVT))
@@ -9167,7 +9204,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
     // The extract index must be constant.
     if (!CS)
       return SDValue();
-    
+
     // Check that we are reading from the identity index.
     if (CS->getZExtValue() != IdentityIndex)
       return SDValue();
@@ -9175,7 +9212,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
 
   if (SingleSource.getNode())
     return SingleSource;
-  
+
   return SDValue();
 }
 
@@ -9605,8 +9642,8 @@ SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0,
                                   SCC.getOperand(0), SCC.getOperand(1),
                                   SCC.getOperand(4));
       AddToWorkList(SETCC.getNode());
-      return DAG.getNode(ISD::SELECT, SDLoc(SCC), SCC.getValueType(),
-                         SCC.getOperand(2), SCC.getOperand(3), SETCC);
+      return DAG.getSelect(SDLoc(SCC), SCC.getValueType(),
+                           SCC.getOperand(2), SCC.getOperand(3), SETCC);
     }
 
     return SCC;
@@ -9675,10 +9712,10 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
       if (LLD->isPredecessorOf(RLD) ||
           RLD->isPredecessorOf(LLD))
         return false;
-      Addr = DAG.getNode(ISD::SELECT, SDLoc(TheSelect),
-                         LLD->getBasePtr().getValueType(),
-                         TheSelect->getOperand(0), LLD->getBasePtr(),
-                         RLD->getBasePtr());
+      Addr = DAG.getSelect(SDLoc(TheSelect),
+                           LLD->getBasePtr().getValueType(),
+                           TheSelect->getOperand(0), LLD->getBasePtr(),
+                           RLD->getBasePtr());
     } else {  // Otherwise SELECT_CC
       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
@@ -9812,8 +9849,8 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
                                     getSetCCResultType(N0.getValueType()),
                                     N0, N1, CC);
         AddToWorkList(Cond.getNode());
-        SDValue CstOffset = DAG.getNode(ISD::SELECT, DL, Zero.getValueType(),
-                                        Cond, One, Zero);
+        SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(),
+                                          Cond, One, Zero);
         AddToWorkList(CstOffset.getNode());
         CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx,
                             CstOffset);
@@ -10205,7 +10242,7 @@ bool DAGCombiner::FindAliasInfo(SDNode *N,
 /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
 /// looking for aliasing nodes and adding them to the Aliases vector.
 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
-                                   SmallVector<SDValue, 8> &Aliases) {
+                                   SmallVectorImpl<SDValue> &Aliases) {
   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
 
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index eb80c64..b4ac948f 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -41,6 +41,7 @@
 
 #define DEBUG_TYPE "isel"
 #include "llvm/CodeGen/FastISel.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/CodeGen/Analysis.h"
@@ -75,15 +76,12 @@ STATISTIC(NumFastIselDead, "Number of dead insts removed on failure");
 void FastISel::startNewBlock() {
   LocalValueMap.clear();
 
+  // Instructions are appended to FuncInfo.MBB. If the basic block already
+  // contains labels or copies, use the last instruction as the last local
+  // value.
   EmitStartPt = 0;
-
-  // Advance the emit start point past any EH_LABEL instructions.
-  MachineBasicBlock::iterator
-    I = FuncInfo.MBB->begin(), E = FuncInfo.MBB->end();
-  while (I != E && I->getOpcode() == TargetOpcode::EH_LABEL) {
-    EmitStartPt = I;
-    ++I;
-  }
+  if (!FuncInfo.MBB->empty())
+    EmitStartPt = &FuncInfo.MBB->back();
   LastLocalValue = EmitStartPt;
 }
 
@@ -92,18 +90,16 @@ bool FastISel::LowerArguments() {
     // Fallback to SDISel argument lowering code to deal with sret pointer
     // parameter.
     return false;
-  
+
   if (!FastLowerArguments())
     return false;
 
-  // Enter non-dead arguments into ValueMap for uses in non-entry BBs.
+  // Enter arguments into ValueMap for uses in non-entry BBs.
   for (Function::const_arg_iterator I = FuncInfo.Fn->arg_begin(),
          E = FuncInfo.Fn->arg_end(); I != E; ++I) {
-    if (!I->use_empty()) {
-      DenseMap<const Value *, unsigned>::iterator VI = LocalValueMap.find(I);
-      assert(VI != LocalValueMap.end() && "Missed an argument?");
-      FuncInfo.ValueMap[I] = VI->second;
-    }
+    DenseMap<const Value *, unsigned>::iterator VI = LocalValueMap.find(I);
+    assert(VI != LocalValueMap.end() && "Missed an argument?");
+    FuncInfo.ValueMap[I] = VI->second;
   }
   return true;
 }
@@ -601,7 +597,10 @@ bool FastISel::SelectCall(const User *I) {
 
   case Intrinsic::dbg_declare: {
     const DbgDeclareInst *DI = cast<DbgDeclareInst>(Call);
-    if (!DIVariable(DI->getVariable()).Verify() ||
+    DIVariable DIVar(DI->getVariable());
+    assert((!DIVar || DIVar.isVariable()) &&
+      "Variable in DbgDeclareInst should be either null or a DIVariable.");
+    if (!DIVar ||
         !FuncInfo.MF->getMMI().hasDebugInfo()) {
       DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
       return true;
@@ -613,16 +612,16 @@ bool FastISel::SelectCall(const User *I) {
       return true;
     }
 
-    unsigned Reg = 0;
     unsigned Offset = 0;
-    if (const Argument *Arg = dyn_cast<Argument>(Address)) {
+    Optional<MachineOperand> Op;
+    if (const Argument *Arg = dyn_cast<Argument>(Address))
       // Some arguments' frame index is recorded during argument lowering.
       Offset = FuncInfo.getArgumentFrameIndex(Arg);
-      if (Offset)
-        Reg = TRI.getFrameRegister(*FuncInfo.MF);
-    }
-    if (!Reg)
-      Reg = lookUpRegForValue(Address);
+    if (Offset)
+        Op = MachineOperand::CreateFI(Offset);
+    if (!Op)
+      if (unsigned Reg = lookUpRegForValue(Address))
+        Op = MachineOperand::CreateReg(Reg, false);
 
     // If we have a VLA that has a "use" in a metadata node that's then used
     // here but it has no other uses, then we have a problem. E.g.,
@@ -635,16 +634,29 @@ bool FastISel::SelectCall(const User *I) {
     // If we assign 'a' a vreg and fast isel later on has to use the selection
     // DAG isel, it will want to copy the value to the vreg. However, there are
     // no uses, which goes counter to what selection DAG isel expects.
-    if (!Reg && !Address->use_empty() && isa<Instruction>(Address) &&
+    if (!Op && !Address->use_empty() && isa<Instruction>(Address) &&
         (!isa<AllocaInst>(Address) ||
          !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(Address))))
-      Reg = FuncInfo.InitializeRegForValue(Address);
-
-    if (Reg)
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
-              TII.get(TargetOpcode::DBG_VALUE))
-        .addReg(Reg, RegState::Debug).addImm(Offset)
-        .addMetadata(DI->getVariable());
+      Op = MachineOperand::CreateReg(FuncInfo.InitializeRegForValue(Address),
+                                      false);
+
+    if (Op)
+      if (Op->isReg()) {
+        // Set the indirect flag if the type and the DIVariable's
+        // indirect field are in disagreement: Indirectly-addressed
+        // variables that are nonpointer types should be marked as
+        // indirect, and VLAs should be marked as indirect eventhough
+        // they are a pointer type.
+        bool IsIndirect = DI->getAddress()->getType()->isPointerTy()
+          ^ DIVar.isIndirect();
+        Op->setIsDebug(true);
+        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                TII.get(TargetOpcode::DBG_VALUE),
+                IsIndirect, Op->getReg(), Offset, DI->getVariable());
+      } else
+        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                TII.get(TargetOpcode::DBG_VALUE)).addOperand(*Op).addImm(0)
+          .addMetadata(DI->getVariable());
     else
       // We can't yet handle anything else here because it would require
       // generating code, thus altering codegen because of debug info.
@@ -676,9 +688,9 @@ bool FastISel::SelectCall(const User *I) {
         .addFPImm(CF).addImm(DI->getOffset())
         .addMetadata(DI->getVariable());
     } else if (unsigned Reg = lookUpRegForValue(V)) {
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
-        .addReg(Reg, RegState::Debug).addImm(DI->getOffset())
-        .addMetadata(DI->getVariable());
+      bool IsIndirect = DI->getOffset() != 0;
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, IsIndirect,
+              Reg, DI->getOffset(), DI->getVariable());
     } else {
       // We can't yet handle anything else here because it would require
       // generating code, thus altering codegen because of debug info.
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 86e188a..4309dc1 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -55,15 +55,12 @@ static bool isUsedOutsideOfDefiningBlock(const Instruction *I) {
   return false;
 }
 
-FunctionLoweringInfo::FunctionLoweringInfo(const TargetMachine &TM)
-  : TM(TM), TLI(0) {
-}
-
 void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) {
+  const TargetLowering *TLI = TM.getTargetLowering();
+
   Fn = &fn;
   MF = &mf;
   RegInfo = &MF->getRegInfo();
-  TLI = TM.getTargetLowering();
 
   // Check whether the function can return without sret-demotion.
   SmallVector<ISD::OutputArg, 4> Outs;
@@ -115,8 +112,11 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) {
       // in a predictable order.
       if (const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(I)) {
         MachineModuleInfo &MMI = MF->getMMI();
+        DIVariable DIVar(DI->getVariable());
+        assert((!DIVar || DIVar.isVariable()) &&
+          "Variable in DbgDeclareInst should be either null or a DIVariable.");
         if (MMI.hasDebugInfo() &&
-            DIVariable(DI->getVariable()).Verify() &&
+            DIVar &&
             !DI->getDebugLoc().isUnknown()) {
           // Don't handle byval struct arguments or VLAs, for example.
           // Non-byval arguments are handled here (they refer to the stack
@@ -209,7 +209,8 @@ void FunctionLoweringInfo::clear() {
 
 /// CreateReg - Allocate a single virtual register for the given type.
 unsigned FunctionLoweringInfo::CreateReg(MVT VT) {
-  return RegInfo->createVirtualRegister(TLI->getRegClassFor(VT));
+  return RegInfo->
+    createVirtualRegister(TM.getTargetLowering()->getRegClassFor(VT));
 }
 
 /// CreateRegs - Allocate the appropriate number of virtual registers of
@@ -220,6 +221,8 @@ unsigned FunctionLoweringInfo::CreateReg(MVT VT) {
 /// will assign registers for each member or element.
 ///
 unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) {
+  const TargetLowering *TLI = TM.getTargetLowering();
+
   SmallVector<EVT, 4> ValueVTs;
   ComputeValueVTs(*TLI, Ty, ValueVTs);
 
@@ -267,6 +270,8 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
   if (!Ty->isIntegerTy() || Ty->isVectorTy())
     return;
 
+  const TargetLowering *TLI = TM.getTargetLowering();
+
   SmallVector<EVT, 1> ValueVTs;
   ComputeValueVTs(*TLI, Ty, ValueVTs);
   assert(ValueVTs.size() == 1 &&
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 3b1abd7..e107276 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -639,8 +639,8 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
   if (SD->getKind() == SDDbgValue::FRAMEIX) {
     // Stack address; this needs to be lowered in target-dependent fashion.
     // EmitTargetCodeForFrameDebugValue is responsible for allocation.
-    unsigned FrameIx = SD->getFrameIx();
-    return TII->emitFrameIndexDebugValue(*MF, FrameIx, Offset, MDPtr, DL);
+    return BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE))
+        .addFrameIndex(SD->getFrameIx()).addImm(Offset).addMetadata(MDPtr);
   }
   // Otherwise, we're going to create an instruction here.
   const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE);
@@ -678,7 +678,13 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
     MIB.addReg(0U);
   }
 
-  MIB.addImm(Offset).addMetadata(MDPtr);
+  if (Offset != 0) // Indirect addressing.
+    MIB.addImm(Offset);
+  else
+    MIB.addReg(0U, RegState::Debug);
+
+  MIB.addMetadata(MDPtr);
+
   return &*MIB;
 }
 
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h
index a9c2203..920dda8 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -1,4 +1,4 @@
-//===---- InstrEmitter.h - Emit MachineInstrs for the SelectionDAG class ---==//
+//===- InstrEmitter.h - Emit MachineInstrs for the SelectionDAG -*- C++ -*--==//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 224fa5f..bd844e5 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -610,7 +610,7 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
                          false, false, 0);
   // Load the updated vector.
   return DAG.getLoad(VT, dl, Ch, StackPtr,
-                     MachinePointerInfo::getFixedStack(SPFI), false, false, 
+                     MachinePointerInfo::getFixedStack(SPFI), false, false,
                      false, 0);
 }
 
@@ -1493,7 +1493,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
     StoreChain = DAG.getEntryNode();
 
   // Result is a load from the stack slot.
-  return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo, 
+  return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo,
                      false, false, false, 0);
 }
 
@@ -1553,9 +1553,9 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
   SDValue AbsVal = DAG.getNode(ISD::FABS, dl, Tmp1.getValueType(), Tmp1);
   // Select between the nabs and abs value based on the sign bit of
   // the input.
-  return DAG.getNode(ISD::SELECT, dl, AbsVal.getValueType(), SignBit,
-                     DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal),
-                     AbsVal);
+  return DAG.getSelect(dl, AbsVal.getValueType(), SignBit,
+                       DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal),
+                       AbsVal);
 }
 
 void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
@@ -1614,12 +1614,12 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
     unsigned Opc = 0;
     switch (CCCode) {
     default: llvm_unreachable("Don't know how to expand this condition!");
-    case ISD::SETO: 
+    case ISD::SETO:
         assert(TLI.getCondCodeAction(ISD::SETOEQ, OpVT)
             == TargetLowering::Legal
             && "If SETO is expanded, SETOEQ must be legal!");
         CC1 = ISD::SETOEQ; CC2 = ISD::SETOEQ; Opc = ISD::AND; break;
-    case ISD::SETUO:  
+    case ISD::SETUO:
         assert(TLI.getCondCodeAction(ISD::SETUNE, OpVT)
             == TargetLowering::Legal
             && "If SETUO is expanded, SETUNE must be legal!");
@@ -1629,12 +1629,12 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
     case ISD::SETOGE:
     case ISD::SETOLT:
     case ISD::SETOLE:
-    case ISD::SETONE: 
-    case ISD::SETUEQ: 
-    case ISD::SETUNE: 
-    case ISD::SETUGT: 
-    case ISD::SETUGE: 
-    case ISD::SETULT: 
+    case ISD::SETONE:
+    case ISD::SETUEQ:
+    case ISD::SETUNE:
+    case ISD::SETUGT:
+    case ISD::SETUGE:
+    case ISD::SETULT:
     case ISD::SETULE:
         // If we are floating point, assign and break, otherwise fall through.
         if (!OpVT.isInteger()) {
@@ -1663,7 +1663,7 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
       CC = SDValue();
       return;
     }
-    
+
     SDValue SetCC1, SetCC2;
     if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
       // If we aren't the ordered or unorder operation,
@@ -2136,7 +2136,7 @@ static bool canCombineSinCosLibcall(SDNode *Node, const TargetLowering &TLI,
 static bool useSinCos(SDNode *Node) {
   unsigned OtherOpcode = Node->getOpcode() == ISD::FSIN
     ? ISD::FCOS : ISD::FSIN;
-  
+
   SDValue Op0 = Node->getOperand(0);
   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
        UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
@@ -2164,25 +2164,25 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
   case MVT::f128:    LC = RTLIB::SINCOS_F128; break;
   case MVT::ppcf128: LC = RTLIB::SINCOS_PPCF128; break;
   }
-  
+
   // The input chain to this libcall is the entry node of the function.
   // Legalizing the call will automatically add the previous call to the
   // dependence.
   SDValue InChain = DAG.getEntryNode();
-  
+
   EVT RetVT = Node->getValueType(0);
   Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
-  
+
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
-  
+
   // Pass the argument.
   Entry.Node = Node->getOperand(0);
   Entry.Ty = RetTy;
   Entry.isSExt = false;
   Entry.isZExt = false;
   Args.push_back(Entry);
-  
+
   // Pass the return address of sin.
   SDValue SinPtr = DAG.CreateStackTemporary(RetVT);
   Entry.Node = SinPtr;
@@ -2190,7 +2190,7 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
   Entry.isSExt = false;
   Entry.isZExt = false;
   Args.push_back(Entry);
-  
+
   // Also pass the return address of the cos.
   SDValue CosPtr = DAG.CreateStackTemporary(RetVT);
   Entry.Node = CosPtr;
@@ -2198,10 +2198,10 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
   Entry.isSExt = false;
   Entry.isZExt = false;
   Args.push_back(Entry);
-  
+
   SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
                                          TLI.getPointerTy());
-  
+
   SDLoc dl(Node);
   TargetLowering::
   CallLoweringInfo CLI(InChain, Type::getVoidTy(*DAG.getContext()),
@@ -2335,7 +2335,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
       //pseudo-op, or, even better, for whole-function isel.
       SDValue SignBitTest = DAG.getSetCC(dl, getSetCCResultType(MVT::i64),
         Op0, DAG.getConstant(0, MVT::i64), ISD::SETLT);
-      return DAG.getNode(ISD::SELECT, dl, MVT::f32, SignBitTest, Slow, Fast);
+      return DAG.getSelect(dl, MVT::f32, SignBitTest, Slow, Fast);
     }
 
     // Otherwise, implement the fully general conversion.
@@ -2348,11 +2348,11 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
          DAG.getConstant(UINT64_C(0x7ff), MVT::i64));
     SDValue Ne = DAG.getSetCC(dl, getSetCCResultType(MVT::i64),
                    And2, DAG.getConstant(UINT64_C(0), MVT::i64), ISD::SETNE);
-    SDValue Sel = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ne, Or, Op0);
+    SDValue Sel = DAG.getSelect(dl, MVT::i64, Ne, Or, Op0);
     SDValue Ge = DAG.getSetCC(dl, getSetCCResultType(MVT::i64),
                    Op0, DAG.getConstant(UINT64_C(0x0020000000000000), MVT::i64),
                    ISD::SETUGE);
-    SDValue Sel2 = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ge, Sel, Op0);
+    SDValue Sel2 = DAG.getSelect(dl, MVT::i64, Ge, Sel, Op0);
     EVT SHVT = TLI.getShiftAmountTy(Sel2.getValueType());
 
     SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2,
@@ -2375,7 +2375,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
                                  Op0, DAG.getConstant(0, Op0.getValueType()),
                                  ISD::SETLT);
   SDValue Zero = DAG.getIntPtrConstant(0), Four = DAG.getIntPtrConstant(4);
-  SDValue CstOffset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(),
+  SDValue CstOffset = DAG.getSelect(dl, Zero.getValueType(),
                                     SignSet, Four, Zero);
 
   // If the sign bit of the integer is set, the large number will be treated
@@ -2928,7 +2928,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
                                     Node->getOperand(0), Tmp1));
     False = DAG.getNode(ISD::XOR, dl, NVT, False,
                         DAG.getConstant(x, NVT));
-    Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2, True, False);
+    Tmp1 = DAG.getSelect(dl, NVT, Tmp2, True, False);
     Results.push_back(Tmp1);
     break;
   }
@@ -2940,7 +2940,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     unsigned Align = Node->getConstantOperandVal(3);
 
     SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2,
-                                     MachinePointerInfo(V), 
+                                     MachinePointerInfo(V),
                                      false, false, false, 0);
     SDValue VAList = VAListLoad;
 
@@ -3031,7 +3031,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
         // cast operands to v8i32 and re-build the mask.
 
         // Calculate new VT, the size of the new VT should be equal to original.
-        EVT NewVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT, 
+        EVT NewVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT,
                                       VT.getSizeInBits()/NewEltVT.getSizeInBits());
         assert(NewVT.bitsEq(VT));
 
@@ -3071,11 +3071,12 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
       if (Idx < NumElems)
         Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
                                   Op0,
-                                  DAG.getIntPtrConstant(Idx)));
+                                  DAG.getConstant(Idx, TLI.getVectorIdxTy())));
       else
         Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
                                   Op1,
-                                  DAG.getIntPtrConstant(Idx - NumElems)));
+                                  DAG.getConstant(Idx - NumElems,
+                                                  TLI.getVectorIdxTy())));
     }
 
     Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
@@ -3140,7 +3141,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     Tmp2 = DAG.getSetCC(dl, getSetCCResultType(Tmp1.getValueType()),
                         Tmp1, Tmp2, ISD::SETUGT);
     Tmp3 = DAG.getNode(ISD::FNEG, dl, VT, Tmp1);
-    Tmp1 = DAG.getNode(ISD::SELECT, dl, VT, Tmp2, Tmp1, Tmp3);
+    Tmp1 = DAG.getSelect(dl, VT, Tmp2, Tmp1, Tmp3);
     Results.push_back(Tmp1);
     break;
   }
@@ -3269,22 +3270,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
       Results.push_back(ExpandConstantFP(CFP, true));
     break;
   }
-  case ISD::EHSELECTION: {
-    unsigned Reg = TLI.getExceptionSelectorRegister();
-    assert(Reg && "Can't expand to unknown register!");
-    Results.push_back(DAG.getCopyFromReg(Node->getOperand(1), dl, Reg,
-                                         Node->getValueType(0)));
-    Results.push_back(Results[0].getValue(1));
-    break;
-  }
-  case ISD::EXCEPTIONADDR: {
-    unsigned Reg = TLI.getExceptionPointerRegister();
-    assert(Reg && "Can't expand to unknown register!");
-    Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, Reg,
-                                         Node->getValueType(0)));
-    Results.push_back(Results[0].getValue(1));
-    break;
-  }
   case ISD::FSUB: {
     EVT VT = Node->getValueType(0);
     assert(TLI.isOperationLegalOrCustom(ISD::FADD, VT) &&
@@ -3704,10 +3689,12 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     for (unsigned Idx = 0; Idx < NumElem; Idx++) {
       SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
                                VT.getScalarType(),
-                               Node->getOperand(0), DAG.getIntPtrConstant(Idx));
+                               Node->getOperand(0), DAG.getConstant(Idx,
+                                                    TLI.getVectorIdxTy()));
       SDValue Sh = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
                                VT.getScalarType(),
-                               Node->getOperand(1), DAG.getIntPtrConstant(Idx));
+                               Node->getOperand(1), DAG.getConstant(Idx,
+                                                    TLI.getVectorIdxTy()));
       Scalars.push_back(DAG.getNode(Node->getOpcode(), dl,
                                     VT.getScalarType(), Ex, Sh));
     }
@@ -3762,8 +3749,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
       Tmp2 = DAG.getSetCC(dl, getSetCCResultType(NVT),
                           Tmp1, DAG.getConstant(NVT.getSizeInBits(), NVT),
                           ISD::SETEQ);
-      Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2,
-                          DAG.getConstant(OVT.getSizeInBits(), NVT), Tmp1);
+      Tmp1 = DAG.getSelect(dl, NVT, Tmp2,
+                           DAG.getConstant(OVT.getSizeInBits(), NVT), Tmp1);
     } else if (Node->getOpcode() == ISD::CTLZ ||
                Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
       // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT))
@@ -3858,7 +3845,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
     Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
     Tmp3 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2));
     // Perform the larger operation, then round down.
-    Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp1, Tmp2, Tmp3);
+    Tmp1 = DAG.getSelect(dl, NVT, Tmp1, Tmp2, Tmp3);
     if (TruncOp != ISD::FP_ROUND)
       Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1);
     else
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index f6df211..cea0b02 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -503,7 +503,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
   if (L->getExtensionType() == ISD::NON_EXTLOAD) {
     NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(),
                        NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(),
-                       L->getPointerInfo(), NVT, L->isVolatile(), 
+                       L->getPointerInfo(), NVT, L->isVolatile(),
                        L->isNonTemporal(), false, L->getAlignment());
     // Legalized the chain result - switch anything that used the old chain to
     // use the new one.
@@ -526,8 +526,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
 SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) {
   SDValue LHS = GetSoftenedFloat(N->getOperand(1));
   SDValue RHS = GetSoftenedFloat(N->getOperand(2));
-  return DAG.getNode(ISD::SELECT, SDLoc(N),
-                     LHS.getValueType(), N->getOperand(0),LHS,RHS);
+  return DAG.getSelect(SDLoc(N),
+                       LHS.getValueType(), N->getOperand(0), LHS, RHS);
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) {
@@ -855,9 +855,9 @@ void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo,
   GetExpandedFloat(N->getOperand(0), Lo, Tmp);
   Hi = DAG.getNode(ISD::FABS, dl, Tmp.getValueType(), Tmp);
   // Lo = Hi==fabs(Hi) ? Lo : -Lo;
-  Lo = DAG.getNode(ISD::SELECT_CC, dl, Lo.getValueType(), Tmp, Hi, Lo,
+  Lo = DAG.getSelectCC(dl, Tmp, Hi, Lo,
                    DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo),
-                   DAG.getCondCode(ISD::SETEQ));
+                   ISD::SETEQ);
 }
 
 void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo,
@@ -1216,8 +1216,8 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
                    DAG.getConstantFP(APFloat(APFloat::PPCDoubleDouble,
                                              APInt(128, Parts)),
                                      MVT::ppcf128));
-  Lo = DAG.getNode(ISD::SELECT_CC, dl, VT, Src, DAG.getConstant(0, SrcVT),
-                   Lo, Hi, DAG.getCondCode(ISD::SETLT));
+  Lo = DAG.getSelectCC(dl, Src, DAG.getConstant(0, SrcVT),
+                       Lo, Hi, ISD::SETLT);
   GetPairElements(Lo, Lo, Hi);
 }
 
@@ -1370,17 +1370,17 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
     SDValue Tmp = DAG.getConstantFP(APF, MVT::ppcf128);
     //  X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
     // FIXME: generated code sucks.
-    return DAG.getNode(ISD::SELECT_CC, dl, MVT::i32, N->getOperand(0), Tmp,
-                       DAG.getNode(ISD::ADD, dl, MVT::i32,
-                                   DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32,
-                                               DAG.getNode(ISD::FSUB, dl,
-                                                           MVT::ppcf128,
-                                                           N->getOperand(0),
-                                                           Tmp)),
-                                   DAG.getConstant(0x80000000, MVT::i32)),
-                       DAG.getNode(ISD::FP_TO_SINT, dl,
-                                   MVT::i32, N->getOperand(0)),
-                       DAG.getCondCode(ISD::SETGE));
+    return DAG.getSelectCC(dl, N->getOperand(0), Tmp,
+                           DAG.getNode(ISD::ADD, dl, MVT::i32,
+                                       DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32,
+                                                   DAG.getNode(ISD::FSUB, dl,
+                                                               MVT::ppcf128,
+                                                               N->getOperand(0),
+                                                               Tmp)),
+                                       DAG.getConstant(0x80000000, MVT::i32)),
+                           DAG.getNode(ISD::FP_TO_SINT, dl,
+                                       MVT::i32, N->getOperand(0)),
+                           ISD::SETGE);
   }
 
   RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index c281553..ff8f1f9 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -483,8 +483,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SDIV(SDNode *N) {
 SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) {
   SDValue LHS = GetPromotedInteger(N->getOperand(1));
   SDValue RHS = GetPromotedInteger(N->getOperand(2));
-  return DAG.getNode(ISD::SELECT, SDLoc(N),
-                     LHS.getValueType(), N->getOperand(0),LHS,RHS);
+  return DAG.getSelect(SDLoc(N),
+                       LHS.getValueType(), N->getOperand(0), LHS, RHS);
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) {
@@ -966,7 +966,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N,
   assert(OpNo == 2 && "Different operand and result vector types?");
 
   // Promote the index.
-  SDValue Idx = ZExtPromotedInteger(N->getOperand(2));
+  SDValue Idx = DAG.getZExtOrTrunc(N->getOperand(2), SDLoc(N),
+                                   TLI.getVectorIdxTy());
   return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
                                 N->getOperand(1), Idx), 0);
 }
@@ -1138,7 +1139,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::ATOMIC_LOAD_MAX:
   case ISD::ATOMIC_LOAD_UMIN:
   case ISD::ATOMIC_LOAD_UMAX:
-  case ISD::ATOMIC_SWAP: {
+  case ISD::ATOMIC_SWAP:
+  case ISD::ATOMIC_CMP_SWAP: {
     std::pair<SDValue, SDValue> Tmp = ExpandAtomic(N);
     SplitInteger(Tmp.first, Lo, Hi);
     ReplaceValueWith(SDValue(N, 1), Tmp.second);
@@ -1478,8 +1480,8 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
     LoL = DAG.getConstant(0, NVT);                        // Lo part is zero.
     HiL = DAG.getNode(ISD::SHL, dl, NVT, InL, AmtExcess); // Hi from Lo part.
 
-    Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL);
-    Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL);
+    Lo = DAG.getSelect(dl, NVT, isShort, LoS, LoL);
+    Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL);
     return true;
   case ISD::SRL:
     // Short: ShAmt < NVTBits
@@ -1494,8 +1496,8 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
     HiL = DAG.getConstant(0, NVT);                        // Hi part is zero.
     LoL = DAG.getNode(ISD::SRL, dl, NVT, InH, AmtExcess); // Lo from Hi part.
 
-    Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL);
-    Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL);
+    Lo = DAG.getSelect(dl, NVT, isShort, LoS, LoL);
+    Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL);
     return true;
   case ISD::SRA:
     // Short: ShAmt < NVTBits
@@ -1511,8 +1513,8 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
                       DAG.getConstant(NVTBits-1, ShTy));
     LoL = DAG.getNode(ISD::SRA, dl, NVT, InH, AmtExcess); // Lo from Hi part.
 
-    Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL);
-    Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL);
+    Lo = DAG.getSelect(dl, NVT, isShort, LoS, LoL);
+    Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL);
     return true;
   }
 }
@@ -1558,13 +1560,13 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
     Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2);
     SDValue Cmp1 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0],
                                 ISD::SETULT);
-    SDValue Carry1 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp1,
-                                 DAG.getConstant(1, NVT),
-                                 DAG.getConstant(0, NVT));
+    SDValue Carry1 = DAG.getSelect(dl, NVT, Cmp1,
+                                   DAG.getConstant(1, NVT),
+                                   DAG.getConstant(0, NVT));
     SDValue Cmp2 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[1],
                                 ISD::SETULT);
-    SDValue Carry2 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp2,
-                                 DAG.getConstant(1, NVT), Carry1);
+    SDValue Carry2 = DAG.getSelect(dl, NVT, Cmp2,
+                                   DAG.getConstant(1, NVT), Carry1);
     Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2);
   } else {
     Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps, 2);
@@ -1572,9 +1574,9 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
     SDValue Cmp =
       DAG.getSetCC(dl, getSetCCResultType(LoOps[0].getValueType()),
                    LoOps[0], LoOps[1], ISD::SETULT);
-    SDValue Borrow = DAG.getNode(ISD::SELECT, dl, NVT, Cmp,
-                                 DAG.getConstant(1, NVT),
-                                 DAG.getConstant(0, NVT));
+    SDValue Borrow = DAG.getSelect(dl, NVT, Cmp,
+                                   DAG.getConstant(1, NVT),
+                                   DAG.getConstant(0, NVT));
     Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow);
   }
 }
@@ -1725,9 +1727,9 @@ void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N,
   SDValue LoLZ = DAG.getNode(N->getOpcode(), dl, NVT, Lo);
   SDValue HiLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, NVT, Hi);
 
-  Lo = DAG.getNode(ISD::SELECT, dl, NVT, HiNotZero, HiLZ,
-                   DAG.getNode(ISD::ADD, dl, NVT, LoLZ,
-                               DAG.getConstant(NVT.getSizeInBits(), NVT)));
+  Lo = DAG.getSelect(dl, NVT, HiNotZero, HiLZ,
+                     DAG.getNode(ISD::ADD, dl, NVT, LoLZ,
+                                 DAG.getConstant(NVT.getSizeInBits(), NVT)));
   Hi = DAG.getConstant(0, NVT);
 }
 
@@ -1755,9 +1757,9 @@ void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N,
   SDValue LoLZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, NVT, Lo);
   SDValue HiLZ = DAG.getNode(N->getOpcode(), dl, NVT, Hi);
 
-  Lo = DAG.getNode(ISD::SELECT, dl, NVT, LoNotZero, LoLZ,
-                   DAG.getNode(ISD::ADD, dl, NVT, HiLZ,
-                               DAG.getConstant(NVT.getSizeInBits(), NVT)));
+  Lo = DAG.getSelect(dl, NVT, LoNotZero, LoLZ,
+                     DAG.getNode(ISD::ADD, dl, NVT, HiLZ,
+                                 DAG.getConstant(NVT.getSizeInBits(), NVT)));
   Hi = DAG.getConstant(0, NVT);
 }
 
@@ -2289,14 +2291,14 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
     // make sure we aren't using 0.
     SDValue isZero = DAG.getSetCC(dl, getSetCCResultType(VT),
                                   RHS, DAG.getConstant(0, VT), ISD::SETEQ);
-    SDValue NotZero = DAG.getNode(ISD::SELECT, dl, VT, isZero,
-                                  DAG.getConstant(1, VT), RHS);
+    SDValue NotZero = DAG.getSelect(dl, VT, isZero,
+                                    DAG.getConstant(1, VT), RHS);
     SDValue DIV = DAG.getNode(ISD::UDIV, dl, VT, MUL, NotZero);
     SDValue Overflow = DAG.getSetCC(dl, N->getValueType(1), DIV, LHS,
                                     ISD::SETNE);
-    Overflow = DAG.getNode(ISD::SELECT, dl, N->getValueType(1), isZero,
-                           DAG.getConstant(0, N->getValueType(1)),
-                           Overflow);
+    Overflow = DAG.getSelect(dl, N->getValueType(1), isZero,
+                             DAG.getConstant(0, N->getValueType(1)),
+                             Overflow);
     ReplaceValueWith(SDValue(N, 1), Overflow);
     return;
   }
@@ -2304,7 +2306,7 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
   Type *RetTy = VT.getTypeForEVT(*DAG.getContext());
   EVT PtrVT = TLI.getPointerTy();
   Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext());
-  
+
   // Replace this with a libcall that will check overflow.
   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
   if (VT == MVT::i32)
@@ -2601,8 +2603,8 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
   if (!NewLHS.getNode())
     NewLHS = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()),
                           LHSHi, RHSHi, ISD::SETEQ);
-  NewLHS = DAG.getNode(ISD::SELECT, dl, Tmp1.getValueType(),
-                       NewLHS, Tmp1, Tmp2);
+  NewLHS = DAG.getSelect(dl, Tmp1.getValueType(),
+                         NewLHS, Tmp1, Tmp2);
   NewRHS = SDValue();
 }
 
@@ -2830,8 +2832,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
     SDValue Zero = DAG.getIntPtrConstant(0);
     SDValue Four = DAG.getIntPtrConstant(4);
     if (TLI.isBigEndian()) std::swap(Zero, Four);
-    SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet,
-                                 Zero, Four);
+    SDValue Offset = DAG.getSelect(dl, Zero.getValueType(), SignSet,
+                                   Zero, Four);
     unsigned Alignment = cast<ConstantPoolSDNode>(FudgePtr)->getAlignment();
     FudgePtr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), FudgePtr, Offset);
     Alignment = std::min(Alignment, 4u);
@@ -2885,7 +2887,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
 
     // Extract the element from the original vector.
     SDValue Index = DAG.getNode(ISD::ADD, dl, BaseIdx.getValueType(),
-      BaseIdx, DAG.getIntPtrConstant(i));
+      BaseIdx, DAG.getConstant(i, BaseIdx.getValueType()));
     SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
       InVT.getVectorElementType(), N->getOperand(0), Index);
 
@@ -2929,7 +2931,15 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) {
   SmallVector<SDValue, 8> Ops;
   Ops.reserve(NumElems);
   for (unsigned i = 0; i != NumElems; ++i) {
-    SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(i));
+    SDValue Op;
+    // BUILD_VECTOR integer operand types are allowed to be larger than the
+    // result's element type. This may still be true after the promotion. For
+    // example, we might be promoting (<v?i1> = BV <i32>, <i32>, ...) to
+    // (v?i16 = BV <i32>, <i32>, ...), and we can't any_extend <i32> to <i16>.
+    if (N->getOperand(i).getValueType().bitsLT(NOutVTElem))
+      Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(i));
+    else
+      Op = N->getOperand(i);
     Ops.push_back(Op);
   }
 
@@ -2975,7 +2985,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
     SDValue Op = N->getOperand(i);
     for (unsigned j = 0; j < NumElem; ++j) {
       SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
-                                InElemTy, Op, DAG.getIntPtrConstant(j));
+                                InElemTy, Op, DAG.getConstant(j,
+                                              TLI.getVectorIdxTy()));
       Ops[i * NumElem + j] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext);
     }
   }
@@ -3002,7 +3013,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) {
 SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) {
   SDLoc dl(N);
   SDValue V0 = GetPromotedInteger(N->getOperand(0));
-  SDValue V1 = N->getOperand(1);
+  SDValue V1 = DAG.getZExtOrTrunc(N->getOperand(1), dl, TLI.getVectorIdxTy());
   SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
     V0->getValueType(0).getScalarType(), V0, V1);
 
@@ -3030,7 +3041,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) {
     for (unsigned i=0; i<NumElem; ++i) {
       // Extract element from incoming vector
       SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SclrTy,
-      Incoming, DAG.getIntPtrConstant(i));
+      Incoming, DAG.getConstant(i, TLI.getVectorIdxTy()));
       SDValue Tr = DAG.getNode(ISD::TRUNCATE, dl, RetSclrTy, Ex);
       NewOps.push_back(Tr);
     }
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index ef79662..fd770d1 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -615,7 +615,10 @@ void DAGTypeLegalizer::RemapValue(SDValue &N) {
     // replaced with other values.
     RemapValue(I->second);
     N = I->second;
-    assert(N.getNode()->getNodeId() != NewNode && "Mapped to new node!");
+
+    // Note that it is possible to have N.getNode()->getNodeId() == NewNode at
+    // this point because it is possible for a node to be put in the map before
+    // being processed.
   }
 }
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index e2597d6..63e9af3 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -653,7 +653,7 @@ private:
   /// loads to load a vector with a resulting wider type. It takes
   ///   LdChain: list of chains for the load to be generated.
   ///   Ld:      load to widen
-  SDValue GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
+  SDValue GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
                               LoadSDNode *LD);
 
   /// GenWidenVectorExtLoads - Helper function to generate a set of extension
@@ -661,20 +661,20 @@ private:
   ///   LdChain: list of chains for the load to be generated.
   ///   Ld:      load to widen
   ///   ExtType: extension element type
-  SDValue GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain,
+  SDValue GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
                                  LoadSDNode *LD, ISD::LoadExtType ExtType);
 
   /// Helper genWidenVectorStores - Helper function to generate a set of
   /// stores to store a widen vector into non widen memory
   ///   StChain: list of chains for the stores we have generated
   ///   ST:      store of a widen value
-  void GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, StoreSDNode *ST);
+  void GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST);
 
   /// Helper genWidenVectorTruncStores - Helper function to generate a set of
   /// stores to store a truncate widen vector into non widen memory
   ///   StChain: list of chains for the stores we have generated
   ///   ST:      store of a widen value
-  void GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,
+  void GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
                                  StoreSDNode *ST);
 
   /// Modifies a vector input (widen or narrows) to a vector of NVT.  The
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 1ede3dc..96f6143 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -80,9 +80,10 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
       EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
                                    InVT.getVectorNumElements()/2);
       Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
-                       DAG.getIntPtrConstant(0));
+                       DAG.getConstant(0, TLI.getVectorIdxTy()));
       Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
-                       DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+                       DAG.getConstant(InNVT.getVectorNumElements(),
+                                       TLI.getVectorIdxTy()));
       if (TLI.isBigEndian())
         std::swap(Lo, Hi);
       Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
@@ -115,7 +116,8 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
       SmallVector<SDValue, 8> Vals;
       for (unsigned i = 0; i < NumElems; ++i)
         Vals.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ElemVT,
-                                   CastInOp, DAG.getIntPtrConstant(i)));
+                                   CastInOp, DAG.getConstant(i,
+                                             TLI.getVectorIdxTy())));
 
       // Build Lo, Hi pair by pairing extracted elements if needed.
       unsigned Slot = 0;
@@ -161,7 +163,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
                                false, false, 0);
 
   // Load the first half from the stack slot.
-  Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, 
+  Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo,
                    false, false, false, 0);
 
   // Increment the pointer to the other half.
@@ -227,10 +229,6 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo,
   // Extract the elements at 2 * Idx and 2 * Idx + 1 from the new vector.
   SDValue Idx = N->getOperand(1);
 
-  // Make sure the type of Idx is big enough to hold the new values.
-  if (Idx.getValueType().bitsLT(TLI.getPointerTy()))
-    Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx);
-
   Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx);
   Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx);
 
@@ -406,7 +404,8 @@ SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) {
   Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx);
   NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Lo, Idx);
   Idx = DAG.getNode(ISD::ADD, dl,
-                    Idx.getValueType(), Idx, DAG.getIntPtrConstant(1));
+                    Idx.getValueType(), Idx,
+                    DAG.getConstant(1, Idx.getValueType()));
   NewVec =  DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx);
 
   // Convert the new vector to the old vector type.
@@ -495,9 +494,9 @@ void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo,
     unsigned NumElements = Cond.getValueType().getVectorNumElements();
     EVT VCondTy = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElements / 2);
     CL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond,
-                     DAG.getIntPtrConstant(0));
+                     DAG.getConstant(0, TLI.getVectorIdxTy()));
     CH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond,
-                     DAG.getIntPtrConstant(NumElements / 2));
+                     DAG.getConstant(NumElements / 2, TLI.getVectorIdxTy()));
   }
 
   Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), CL, LL, RL);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 071a0b8..bbe11b8 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -551,7 +551,7 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) {
   SmallVector<SDValue, 8> Stores;
   for (unsigned Idx = 0; Idx < NumElem; Idx++) {
     SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
-               RegSclVT, Value, DAG.getIntPtrConstant(Idx));
+               RegSclVT, Value, DAG.getConstant(Idx, TLI.getVectorIdxTy()));
 
     // This scalar TruncStore may be illegal, but we legalize it later.
     SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR,
@@ -572,7 +572,7 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) {
 SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
   // Lower a select instruction where the condition is a scalar and the
   // operands are vectors. Lower this select to VSELECT and implement it
-  // using XOR AND OR. The selector bit is broadcasted. 
+  // using XOR AND OR. The selector bit is broadcasted.
   EVT VT = Op.getValueType();
   SDLoc DL(Op);
 
@@ -605,7 +605,7 @@ SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
   // What is the size of each element in the vector mask.
   EVT BitTy = MaskTy.getScalarType();
 
-  Mask = DAG.getNode(ISD::SELECT, DL, BitTy, Mask,
+  Mask = DAG.getSelect(DL, BitTy, Mask,
           DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), BitTy),
           DAG.getConstant(0, BitTy));
 
@@ -755,16 +755,16 @@ SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
   SmallVector<SDValue, 8> Ops(NumElems);
   for (unsigned i = 0; i < NumElems; ++i) {
     SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
-                                  DAG.getIntPtrConstant(i));
+                                  DAG.getConstant(i, TLI.getVectorIdxTy()));
     SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
-                                  DAG.getIntPtrConstant(i));
+                                  DAG.getConstant(i, TLI.getVectorIdxTy()));
     Ops[i] = DAG.getNode(ISD::SETCC, dl,
                          TLI.getSetCCResultType(*DAG.getContext(), TmpEltVT),
                          LHSElem, RHSElem, CC);
-    Ops[i] = DAG.getNode(ISD::SELECT, dl, EltVT, Ops[i],
-                         DAG.getConstant(APInt::getAllOnesValue
-                                         (EltVT.getSizeInBits()), EltVT),
-                         DAG.getConstant(0, EltVT));
+    Ops[i] = DAG.getSelect(dl, EltVT, Ops[i],
+                           DAG.getConstant(APInt::getAllOnesValue
+                                           (EltVT.getSizeInBits()), EltVT),
+                           DAG.getConstant(0, EltVT));
   }
   return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElems);
 }
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index a36dca6..54380ec 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -274,16 +274,17 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) {
         break;
     }
   }
-  return DAG.getNode(ISD::SELECT, SDLoc(N),
-                     LHS.getValueType(), Cond, LHS,
-                     GetScalarizedVector(N->getOperand(2)));
+
+  return DAG.getSelect(SDLoc(N),
+                       LHS.getValueType(), Cond, LHS,
+                       GetScalarizedVector(N->getOperand(2)));
 }
 
 SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) {
   SDValue LHS = GetScalarizedVector(N->getOperand(1));
-  return DAG.getNode(ISD::SELECT, SDLoc(N),
-                     LHS.getValueType(), N->getOperand(0), LHS,
-                     GetScalarizedVector(N->getOperand(2)));
+  return DAG.getSelect(SDLoc(N),
+                       LHS.getValueType(), N->getOperand(0), LHS,
+                       GetScalarizedVector(N->getOperand(2)));
 }
 
 SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) {
@@ -711,7 +712,8 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,
   Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx);
   uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
   Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec,
-                   DAG.getIntPtrConstant(IdxVal + LoVT.getVectorNumElements()));
+                   DAG.getConstant(IdxVal + LoVT.getVectorNumElements(),
+                                   TLI.getVectorIdxTy()));
 }
 
 void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo,
@@ -753,7 +755,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
                        Lo.getValueType(), Lo, Elt, Idx);
     else
       Hi = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt,
-                       DAG.getIntPtrConstant(IdxVal - LoNumElts));
+                       DAG.getConstant(IdxVal - LoNumElts,
+                                       TLI.getVectorIdxTy()));
     return;
   }
 
@@ -852,14 +855,16 @@ void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) {
   EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
                                LoVT.getVectorNumElements());
   LL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(0),
-                   DAG.getIntPtrConstant(0));
+                   DAG.getConstant(0, TLI.getVectorIdxTy()));
   LH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(0),
-                   DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+                   DAG.getConstant(InNVT.getVectorNumElements(),
+                   TLI.getVectorIdxTy()));
 
   RL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(1),
-                   DAG.getIntPtrConstant(0));
+                   DAG.getConstant(0, TLI.getVectorIdxTy()));
   RH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(1),
-                   DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+                   DAG.getConstant(InNVT.getVectorNumElements(),
+                   TLI.getVectorIdxTy()));
 
   Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
   Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
@@ -881,9 +886,10 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
     EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
                                  LoVT.getVectorNumElements());
     Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
-                     DAG.getIntPtrConstant(0));
+                     DAG.getConstant(0, TLI.getVectorIdxTy()));
     Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
-                     DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+                     DAG.getConstant(InNVT.getVectorNumElements(),
+                                     TLI.getVectorIdxTy()));
   }
 
   if (N->getOpcode() == ISD::FP_ROUND) {
@@ -994,7 +1000,8 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
 
         // Extract the vector element by hand.
         SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
-                                    Inputs[Input], DAG.getIntPtrConstant(Idx)));
+                                    Inputs[Input], DAG.getConstant(Idx,
+                                                   TLI.getVectorIdxTy())));
       }
 
       // Construct the Lo/Hi output using a BUILD_VECTOR.
@@ -1030,6 +1037,10 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
         dbgs() << "\n");
   SDValue Res = SDValue();
 
+  // See if the target wants to custom split this node.
+  if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+    return false;
+
   if (Res.getNode() == 0) {
     switch (N->getOpcode()) {
     default:
@@ -1108,8 +1119,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_VSELECT(SDNode *N, unsigned OpNo) {
   assert(LoNumElts == HiNumElts && "Asymmetric vector split?");
 
   LLVMContext &Ctx = *DAG.getContext();
-  SDValue Zero = DAG.getIntPtrConstant(0);
-  SDValue LoElts = DAG.getIntPtrConstant(LoNumElts);
+  SDValue Zero = DAG.getConstant(0, TLI.getVectorIdxTy());
+  SDValue LoElts = DAG.getConstant(LoNumElts, TLI.getVectorIdxTy());
   EVT Src0VT = Src0.getValueType();
   EVT Src0EltTy = Src0VT.getVectorElementType();
   EVT MaskEltTy = MaskVT.getVectorElementType();
@@ -1284,7 +1295,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
     for (unsigned i = 0, e = Op.getValueType().getVectorNumElements();
          i != e; ++i) {
       Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
-                                 Op, DAG.getIntPtrConstant(i)));
+                                 Op, DAG.getConstant(i, TLI.getVectorIdxTy())));
 
     }
   }
@@ -1333,9 +1344,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) {
   EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
                                  InVT.getVectorElementType(), NumElements/2);
   SDValue InLoVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, InVec,
-                                DAG.getIntPtrConstant(0));
+                                DAG.getConstant(0, TLI.getVectorIdxTy()));
   SDValue InHiVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, InVec,
-                                DAG.getIntPtrConstant(NumElements/2));
+                                DAG.getConstant(NumElements/2,
+                                TLI.getVectorIdxTy()));
   // Truncate them to 1/2 the element size.
   EVT HalfElementVT = EVT::getIntegerVT(*DAG.getContext(), InElementSize/2);
   EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT,
@@ -1562,9 +1574,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
   while (CurNumElts != 0) {
     while (CurNumElts >= NumElts) {
       SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1,
-                                 DAG.getIntPtrConstant(Idx));
+                                 DAG.getConstant(Idx, TLI.getVectorIdxTy()));
       SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2,
-                                 DAG.getIntPtrConstant(Idx));
+                                 DAG.getConstant(Idx, TLI.getVectorIdxTy()));
       ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2);
       Idx += NumElts;
       CurNumElts -= NumElts;
@@ -1577,9 +1589,11 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
     if (NumElts == 1) {
       for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
         SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
-                                   InOp1, DAG.getIntPtrConstant(Idx));
+                                   InOp1, DAG.getConstant(Idx,
+                                                         TLI.getVectorIdxTy()));
         SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
-                                   InOp2, DAG.getIntPtrConstant(Idx));
+                                   InOp2, DAG.getConstant(Idx,
+                                                         TLI.getVectorIdxTy()));
         ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT,
                                              EOp1, EOp2);
       }
@@ -1617,7 +1631,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
       unsigned NumToInsert = ConcatEnd - Idx - 1;
       for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) {
         VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp,
-                            ConcatOps[OpIdx], DAG.getIntPtrConstant(i));
+                            ConcatOps[OpIdx], DAG.getConstant(i,
+                                                         TLI.getVectorIdxTy()));
       }
       ConcatOps[Idx+1] = VecOp;
       ConcatEnd = Idx + 2;
@@ -1705,7 +1720,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
 
     if (InVTNumElts % WidenNumElts == 0) {
       SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT,
-                                  InOp, DAG.getIntPtrConstant(0));
+                                  InOp, DAG.getConstant(0,
+                                                        TLI.getVectorIdxTy()));
       // Extract the input and convert the shorten input vector.
       if (N->getNumOperands() == 1)
         return DAG.getNode(Opcode, DL, WidenVT, InVal);
@@ -1720,7 +1736,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
   unsigned i;
   for (i=0; i < MinElts; ++i) {
     SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
-                              DAG.getIntPtrConstant(i));
+                              DAG.getConstant(i, TLI.getVectorIdxTy()));
     if (N->getNumOperands() == 1)
       Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val);
     else
@@ -1871,7 +1887,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) {
   SDLoc dl(N);
   // Build a vector with undefined for the new nodes.
   EVT VT = N->getValueType(0);
-  EVT EltVT = VT.getVectorElementType();
+
+  // Integer BUILD_VECTOR operands may be larger than the node's vector element
+  // type. The UNDEFs need to have the same type as the existing operands.
+  EVT EltVT = N->getOperand(0).getValueType();
   unsigned NumElts = VT.getVectorNumElements();
 
   EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
@@ -1945,7 +1964,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
       InOp = GetWidenedVector(InOp);
     for (unsigned j=0; j < NumInElts; ++j)
       Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
-                               DAG.getIntPtrConstant(j));
+                               DAG.getConstant(j, TLI.getVectorIdxTy()));
   }
   SDValue UndefVal = DAG.getUNDEF(EltVT);
   for (; Idx < WidenNumElts; ++Idx)
@@ -2003,7 +2022,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
     if (InVTNumElts % WidenNumElts == 0) {
       // Extract the input and convert the shorten input vector.
       InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp,
-                         DAG.getIntPtrConstant(0));
+                         DAG.getConstant(0, TLI.getVectorIdxTy()));
       return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
                                   SatOp, CvtCode);
     }
@@ -2019,7 +2038,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
   unsigned i;
   for (i=0; i < MinElts; ++i) {
     SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
-                                 DAG.getIntPtrConstant(i));
+                                 DAG.getConstant(i, TLI.getVectorIdxTy()));
     Ops[i] = DAG.getConvertRndSat(WidenVT, dl, ExtVal, DTyOp, STyOp, RndOp,
                                   SatOp, CvtCode);
   }
@@ -2062,7 +2081,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
   unsigned i;
   for (i=0; i < NumElts; ++i)
     Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
-                         DAG.getIntPtrConstant(IdxVal+i));
+                         DAG.getConstant(IdxVal+i, TLI.getVectorIdxTy()));
 
   SDValue UndefVal = DAG.getUNDEF(EltVT);
   for (; i < WidenNumElts; ++i)
@@ -2289,7 +2308,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
   for (unsigned i=0; i < NumElts; ++i)
     Ops[i] = DAG.getNode(Opcode, dl, EltVT,
                          DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
-                                     DAG.getIntPtrConstant(i)));
+                                     DAG.getConstant(i, TLI.getVectorIdxTy())));
 
   return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts);
 }
@@ -2310,7 +2329,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
     if (TLI.isTypeLegal(NewVT)) {
       SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);
       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp,
-                         DAG.getIntPtrConstant(0));
+                         DAG.getConstant(0, TLI.getVectorIdxTy()));
     }
   }
 
@@ -2338,7 +2357,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
       InOp = GetWidenedVector(InOp);
     for (unsigned j=0; j < NumInElts; ++j)
       Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
-                               DAG.getIntPtrConstant(j));
+                               DAG.getConstant(j, TLI.getVectorIdxTy()));
   }
   return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts);
 }
@@ -2393,7 +2412,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
                                SVT.getVectorElementType(),
                                N->getValueType(0).getVectorNumElements());
   SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl,
-                           ResVT, WideSETCC, DAG.getIntPtrConstant(0));
+                           ResVT, WideSETCC, DAG.getConstant(0,
+                                             TLI.getVectorIdxTy()));
 
   return PromoteTargetBoolean(CC, N->getValueType(0));
 }
@@ -2464,8 +2484,9 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
 //  LDOps: Load operators to build a vector type
 //  [Start,End) the list of loads to use.
 static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
-                                     SmallVector<SDValue, 16>& LdOps,
+                                     SmallVectorImpl<SDValue> &LdOps,
                                      unsigned Start, unsigned End) {
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   SDLoc dl(LdOps[Start]);
   EVT LdTy = LdOps[Start].getValueType();
   unsigned Width = VecTy.getSizeInBits();
@@ -2486,12 +2507,12 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
       LdTy = NewLdTy;
     }
     VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i],
-                        DAG.getIntPtrConstant(Idx++));
+                        DAG.getConstant(Idx++, TLI.getVectorIdxTy()));
   }
   return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp);
 }
 
-SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain,
+SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
                                               LoadSDNode *LD) {
   // The strategy assumes that we can efficiently load powers of two widths.
   // The routines chops the vector into the largest vector loads with the same
@@ -2645,8 +2666,8 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain,
 }
 
 SDValue
-DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain,
-                                         LoadSDNode * LD,
+DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
+                                         LoadSDNode *LD,
                                          ISD::LoadExtType ExtType) {
   // For extension loads, it may not be more efficient to chop up the vector
   // and then extended it.  Instead, we unroll the load and build a new vector.
@@ -2693,7 +2714,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain,
 }
 
 
-void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
+void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
                                             StoreSDNode *ST) {
   // The strategy assumes that we can efficiently store powers of two widths.
   // The routines chops the vector into the largest vector stores with the same
@@ -2725,7 +2746,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
       unsigned NumVTElts = NewVT.getVectorNumElements();
       do {
         SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp,
-                                   DAG.getIntPtrConstant(Idx));
+                                   DAG.getConstant(Idx, TLI.getVectorIdxTy()));
         StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
                                     ST->getPointerInfo().getWithOffset(Offset),
                                        isVolatile, isNonTemporal,
@@ -2745,7 +2766,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
       Idx = Idx * ValEltWidth / NewVTWidth;
       do {
         SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp,
-                      DAG.getIntPtrConstant(Idx++));
+                      DAG.getConstant(Idx++, TLI.getVectorIdxTy()));
         StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
                                     ST->getPointerInfo().getWithOffset(Offset),
                                        isVolatile, isNonTemporal,
@@ -2762,7 +2783,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
 }
 
 void
-DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,
+DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
                                             StoreSDNode *ST) {
   // For extension loads, it may not be more efficient to truncate the vector
   // and then store it.  Instead, we extract each element and then store it.
@@ -2790,7 +2811,7 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,
   unsigned Increment = ValEltVT.getSizeInBits() / 8;
   unsigned NumElts = StVT.getVectorNumElements();
   SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
-                            DAG.getIntPtrConstant(0));
+                            DAG.getConstant(0, TLI.getVectorIdxTy()));
   StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr,
                                       ST->getPointerInfo(), StEltVT,
                                       isVolatile, isNonTemporal, Align));
@@ -2799,7 +2820,7 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,
     SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
                                      BasePtr, DAG.getIntPtrConstant(Offset));
     SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
-                            DAG.getIntPtrConstant(0));
+                            DAG.getConstant(0, TLI.getVectorIdxTy()));
     StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr,
                                       ST->getPointerInfo().getWithOffset(Offset),
                                         StEltVT, isVolatile, isNonTemporal,
@@ -2836,7 +2857,7 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) {
 
   if (WidenNumElts < InNumElts && InNumElts % WidenNumElts)
     return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp,
-                       DAG.getIntPtrConstant(0));
+                       DAG.getConstant(0, TLI.getVectorIdxTy()));
 
   // Fall back to extract and build.
   SmallVector<SDValue, 16> Ops(WidenNumElts);
@@ -2845,7 +2866,7 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) {
   unsigned Idx;
   for (Idx = 0; Idx < MinNumElts; ++Idx)
     Ops[Idx] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
-                           DAG.getIntPtrConstant(Idx));
+                           DAG.getConstant(Idx, TLI.getVectorIdxTy()));
 
   SDValue UndefVal = DAG.getUNDEF(EltVT);
   for ( ; Idx < WidenNumElts; ++Idx)
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index d1f36cb..6c5e0ab 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -102,8 +102,8 @@ private:
   void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
                                 const TargetRegisterClass*,
                                 const TargetRegisterClass*,
-                                SmallVector<SUnit*, 2>&);
-  bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
+                                SmallVectorImpl<SUnit*>&);
+  bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&);
   void ListScheduleBottomUp();
 
   /// forceUnitLatencies - The fast scheduler doesn't care about real latencies.
@@ -387,7 +387,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
 void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
                                               const TargetRegisterClass *DestRC,
                                               const TargetRegisterClass *SrcRC,
-                                               SmallVector<SUnit*, 2> &Copies) {
+                                              SmallVectorImpl<SUnit*> &Copies) {
   SUnit *CopyFromSU = newSUnit(static_cast<SDNode *>(NULL));
   CopyFromSU->CopySrcRC = SrcRC;
   CopyFromSU->CopyDstRC = DestRC;
@@ -448,7 +448,7 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
 static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
                                std::vector<SUnit*> &LiveRegDefs,
                                SmallSet<unsigned, 4> &RegAdded,
-                               SmallVector<unsigned, 4> &LRegs,
+                               SmallVectorImpl<unsigned> &LRegs,
                                const TargetRegisterInfo *TRI) {
   bool Added = false;
   for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
@@ -467,7 +467,7 @@ static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
 /// If the specific node is the last one that's available to schedule, do
 /// whatever is necessary (i.e. backtracking or cloning) to make it possible.
 bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
-                                               SmallVector<unsigned, 4> &LRegs){
+                                              SmallVectorImpl<unsigned> &LRegs){
   if (NumLiveRegs == 0)
     return false;
 
@@ -567,7 +567,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() {
         // "expensive to copy" values to break the dependency. In case even
         // that doesn't work, insert cross class copies.
         SUnit *TrySU = NotReady[0];
-        SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
+        SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
         assert(LRegs.size() == 1 && "Can't handle this yet!");
         unsigned Reg = LRegs[0];
         SUnit *LRDef = LiveRegDefs[Reg];
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index a7daf87..f5fe168 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -229,8 +229,8 @@ private:
   void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
                                 const TargetRegisterClass*,
                                 const TargetRegisterClass*,
-                                SmallVector<SUnit*, 2>&);
-  bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
+                                SmallVectorImpl<SUnit*>&);
+  bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&);
 
   void releaseInterferences(unsigned Reg = 0);
 
@@ -1133,9 +1133,9 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
 /// InsertCopiesAndMoveSuccs - Insert register copies and move all
 /// scheduled successors of the given SUnit to the last copy.
 void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
-                                               const TargetRegisterClass *DestRC,
-                                               const TargetRegisterClass *SrcRC,
-                                               SmallVector<SUnit*, 2> &Copies) {
+                                              const TargetRegisterClass *DestRC,
+                                              const TargetRegisterClass *SrcRC,
+                                              SmallVectorImpl<SUnit*> &Copies) {
   SUnit *CopyFromSU = CreateNewSUnit(NULL);
   CopyFromSU->CopySrcRC = SrcRC;
   CopyFromSU->CopyDstRC = DestRC;
@@ -1205,7 +1205,7 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
 static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
                                std::vector<SUnit*> &LiveRegDefs,
                                SmallSet<unsigned, 4> &RegAdded,
-                               SmallVector<unsigned, 4> &LRegs,
+                               SmallVectorImpl<unsigned> &LRegs,
                                const TargetRegisterInfo *TRI) {
   for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) {
 
@@ -1227,7 +1227,7 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
 static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask,
                                      std::vector<SUnit*> &LiveRegDefs,
                                      SmallSet<unsigned, 4> &RegAdded,
-                                     SmallVector<unsigned, 4> &LRegs) {
+                                     SmallVectorImpl<unsigned> &LRegs) {
   // Look at all live registers. Skip Reg0 and the special CallResource.
   for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) {
     if (!LiveRegDefs[i]) continue;
@@ -1252,7 +1252,7 @@ static const uint32_t *getNodeRegMask(const SDNode *N) {
 /// If the specific node is the last one that's available to schedule, do
 /// whatever is necessary (i.e. backtracking or cloning) to make it possible.
 bool ScheduleDAGRRList::
-DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) {
+DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
   if (NumLiveRegs == 0)
     return false;
 
@@ -1331,7 +1331,7 @@ void ScheduleDAGRRList::releaseInterferences(unsigned Reg) {
     SUnit *SU = Interferences[i-1];
     LRegsMapT::iterator LRegsPos = LRegsMap.find(SU);
     if (Reg) {
-      SmallVector<unsigned, 4> &LRegs = LRegsPos->second;
+      SmallVectorImpl<unsigned> &LRegs = LRegsPos->second;
       if (std::find(LRegs.begin(), LRegs.end(), Reg) == LRegs.end())
         continue;
     }
@@ -1385,7 +1385,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
   // to resolve it.
   for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
     SUnit *TrySU = Interferences[i];
-    SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
+    SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
 
     // Try unscheduling up to the point where it's safe to schedule
     // this node.
@@ -1433,7 +1433,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
     // insert cross class copies.
     // If it's not too expensive, i.e. cost != -1, issue copies.
     SUnit *TrySU = Interferences[0];
-    SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
+    SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
     assert(LRegs.size() == 1 && "Can't handle this yet!");
     unsigned Reg = LRegs[0];
     SUnit *LRDef = LiveRegDefs[Reg];
@@ -2401,7 +2401,8 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
     bool RHasPhysReg = right->hasPhysRegDefs;
     if (LHasPhysReg != RHasPhysReg) {
       #ifndef NDEBUG
-      const char *const PhysRegMsg[] = {" has no physreg"," defines a physreg"};
+      static const char *const PhysRegMsg[] = { " has no physreg",
+                                                " defines a physreg" };
       #endif
       DEBUG(dbgs() << "  SU (" << left->NodeNum << ") "
             << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") "
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index ad06473..982dcc9 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -700,11 +700,10 @@ namespace {
 }
 
 /// ProcessSDDbgValues - Process SDDbgValues associated with this node.
-static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG,
-                               InstrEmitter &Emitter,
-                    SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders,
-                            DenseMap<SDValue, unsigned> &VRBaseMap,
-                            unsigned Order) {
+static void
+ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
+                   SmallVectorImpl<std::pair<unsigned, MachineInstr*> > &Orders,
+                   DenseMap<SDValue, unsigned> &VRBaseMap, unsigned Order) {
   if (!N->getHasDebugValue())
     return;
 
@@ -731,11 +730,11 @@ static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG,
 // ProcessSourceNode - Process nodes with source order numbers. These are added
 // to a vector which EmitSchedule uses to determine how to insert dbg_value
 // instructions in the right order.
-static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG,
-                           InstrEmitter &Emitter,
-                           DenseMap<SDValue, unsigned> &VRBaseMap,
-                    SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders,
-                           SmallSet<unsigned, 8> &Seen) {
+static void
+ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
+                  DenseMap<SDValue, unsigned> &VRBaseMap,
+                  SmallVectorImpl<std::pair<unsigned, MachineInstr*> > &Orders,
+                  SmallSet<unsigned, 8> &Seen) {
   unsigned Order = N->getIROrder();
   if (!Order || !Seen.insert(Order)) {
     // Process any valid SDDbgValues even if node does not have any order
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index affd9e0..bc6063c 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -864,14 +864,13 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
                    PointerType::get(Type::getInt8Ty(*getContext()), 0) :
                    VT.getTypeForEVT(*getContext());
 
-  return TLI.getDataLayout()->getABITypeAlignment(Ty);
+  return TM.getTargetLowering()->getDataLayout()->getABITypeAlignment(Ty);
 }
 
 // EntryNode could meaningfully have debug info if we can find it...
 SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
-  : TM(tm), TLI(*tm.getTargetLowering()), TSI(*tm.getSelectionDAGInfo()),
-    TTI(0), OptLevel(OL), EntryNode(ISD::EntryToken, 0, DebugLoc(),
-                                    getVTList(MVT::Other)),
+  : TM(tm), TSI(*tm.getSelectionDAGInfo()), TTI(0), OptLevel(OL),
+    EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)),
     Root(getEntryNode()), UpdateListeners(0) {
   AllNodes.push_back(&EntryNode);
   DbgInfo = new SDDbgInfo();
@@ -972,13 +971,15 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) {
   EVT EltVT = VT.getScalarType();
   const ConstantInt *Elt = &Val;
 
+  const TargetLowering *TLI = TM.getTargetLowering();
+
   // In some cases the vector type is legal but the element type is illegal and
   // needs to be promoted, for example v8i8 on ARM.  In this case, promote the
   // inserted value (the type does not need to match the vector element type).
   // Any extra bits introduced will be truncated away.
-  if (VT.isVector() && TLI.getTypeAction(*getContext(), EltVT) ==
+  if (VT.isVector() && TLI->getTypeAction(*getContext(), EltVT) ==
       TargetLowering::TypePromoteInteger) {
-   EltVT = TLI.getTypeToTransformTo(*getContext(), EltVT);
+   EltVT = TLI->getTypeToTransformTo(*getContext(), EltVT);
    APInt NewVal = Elt->getValue().zext(EltVT.getSizeInBits());
    Elt = ConstantInt::get(*getContext(), NewVal);
   }
@@ -1011,7 +1012,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) {
 }
 
 SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, bool isTarget) {
-  return getConstant(Val, TLI.getPointerTy(), isTarget);
+  return getConstant(Val, TM.getTargetLowering()->getPointerTy(), isTarget);
 }
 
 
@@ -1078,7 +1079,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL,
          "Cannot set target flags on target-independent globals");
 
   // Truncate (with sign-extension) the offset value to the pointer size.
-  unsigned BitWidth = TLI.getPointerTy().getSizeInBits();
+  unsigned BitWidth = TM.getTargetLowering()->getPointerTy().getSizeInBits();
   if (BitWidth < 64)
     Offset = SignExtend64(Offset, BitWidth);
 
@@ -1155,7 +1156,8 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
   assert((TargetFlags == 0 || isTarget) &&
          "Cannot set target flags on target-independent globals");
   if (Alignment == 0)
-    Alignment = TLI.getDataLayout()->getPrefTypeAlignment(C->getType());
+    Alignment =
+    TM.getTargetLowering()->getDataLayout()->getPrefTypeAlignment(C->getType());
   unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
@@ -1182,7 +1184,8 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
   assert((TargetFlags == 0 || isTarget) &&
          "Cannot set target flags on target-independent globals");
   if (Alignment == 0)
-    Alignment = TLI.getDataLayout()->getPrefTypeAlignment(C->getType());
+    Alignment =
+    TM.getTargetLowering()->getDataLayout()->getPrefTypeAlignment(C->getType());
   unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
@@ -1512,7 +1515,7 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) {
 /// the target's desired shift amount type.
 SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) {
   EVT OpTy = Op.getValueType();
-  EVT ShTy = TLI.getShiftAmountTy(LHSTy);
+  EVT ShTy = TM.getTargetLowering()->getShiftAmountTy(LHSTy);
   if (OpTy == ShTy || OpTy.isVector()) return Op;
 
   ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ?  ISD::TRUNCATE : ISD::ZERO_EXTEND;
@@ -1525,11 +1528,12 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
   MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo();
   unsigned ByteSize = VT.getStoreSize();
   Type *Ty = VT.getTypeForEVT(*getContext());
+  const TargetLowering *TLI = TM.getTargetLowering();
   unsigned StackAlign =
-  std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty), minAlign);
+  std::max((unsigned)TLI->getDataLayout()->getPrefTypeAlignment(Ty), minAlign);
 
   int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
-  return getFrameIndex(FrameIdx, TLI.getPointerTy());
+  return getFrameIndex(FrameIdx, TLI->getPointerTy());
 }
 
 /// CreateStackTemporary - Create a stack temporary suitable for holding
@@ -1539,13 +1543,14 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
                             VT2.getStoreSizeInBits())/8;
   Type *Ty1 = VT1.getTypeForEVT(*getContext());
   Type *Ty2 = VT2.getTypeForEVT(*getContext());
-  const DataLayout *TD = TLI.getDataLayout();
+  const TargetLowering *TLI = TM.getTargetLowering();
+  const DataLayout *TD = TLI->getDataLayout();
   unsigned Align = std::max(TD->getPrefTypeAlignment(Ty1),
                             TD->getPrefTypeAlignment(Ty2));
 
   MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo();
   int FrameIdx = FrameInfo->CreateStackObject(Bytes, Align, false);
-  return getFrameIndex(FrameIdx, TLI.getPointerTy());
+  return getFrameIndex(FrameIdx, TLI->getPointerTy());
 }
 
 SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1,
@@ -1674,6 +1679,7 @@ bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask,
 /// processing.
 void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero,
                                      APInt &KnownOne, unsigned Depth) const {
+  const TargetLowering *TLI = TM.getTargetLowering();
   unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
 
   KnownZero = KnownOne = APInt(BitWidth, 0);   // Don't know anything.
@@ -1796,7 +1802,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero,
     // The boolean result conforms to getBooleanContents.  Fall through.
   case ISD::SETCC:
     // If we know the result of a setcc has the top bits zero, use this info.
-    if (TLI.getBooleanContents(Op.getValueType().isVector()) ==
+    if (TLI->getBooleanContents(Op.getValueType().isVector()) ==
         TargetLowering::ZeroOrOneBooleanContent && BitWidth > 1)
       KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
     return;
@@ -2108,7 +2114,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero,
   case ISD::INTRINSIC_W_CHAIN:
   case ISD::INTRINSIC_VOID:
     // Allow the target to implement this method for its nodes.
-    TLI.computeMaskedBitsForTargetNode(Op, KnownZero, KnownOne, *this, Depth);
+    TLI->computeMaskedBitsForTargetNode(Op, KnownZero, KnownOne, *this, Depth);
     return;
   }
 }
@@ -2119,6 +2125,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero,
 /// information.  For example, immediately after an "SRA X, 2", we know that
 /// the top 3 bits are all equal to each other, so we return 3.
 unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
+  const TargetLowering *TLI = TM.getTargetLowering();
   EVT VT = Op.getValueType();
   assert(VT.isInteger() && "Invalid VT!");
   unsigned VTBits = VT.getScalarType().getSizeInBits();
@@ -2203,7 +2210,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
     // The boolean result conforms to getBooleanContents.  Fall through.
   case ISD::SETCC:
     // If setcc returns 0/-1, all bits are sign bits.
-    if (TLI.getBooleanContents(Op.getValueType().isVector()) ==
+    if (TLI->getBooleanContents(Op.getValueType().isVector()) ==
         TargetLowering::ZeroOrNegativeOneBooleanContent)
       return VTBits;
     break;
@@ -2304,7 +2311,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
       Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
       Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
       Op.getOpcode() == ISD::INTRINSIC_VOID) {
-    unsigned NumBits = TLI.ComputeNumSignBitsForTargetNode(Op, Depth);
+    unsigned NumBits = TLI->ComputeNumSignBitsForTargetNode(Op, Depth);
     if (NumBits > 1) FirstAnswer = std::max(FirstAnswer, NumBits);
   }
 
@@ -3639,7 +3646,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
     unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty);
 
     // Don't promote to an alignment that would require dynamic stack
-    // realignment.  
+    // realignment.
     const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
     if (!TRI->needsStackRealignment(MF))
        while (NewAlign > Align &&
@@ -3923,10 +3930,12 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst,
   // beyond the given memory regions. But fixing this isn't easy, and most
   // people don't care.
 
+  const TargetLowering *TLI = TM.getTargetLowering();
+
   // Emit a library call.
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
-  Entry.Ty = TLI.getDataLayout()->getIntPtrType(*getContext());
+  Entry.Ty = TLI->getDataLayout()->getIntPtrType(*getContext());
   Entry.Node = Dst; Args.push_back(Entry);
   Entry.Node = Src; Args.push_back(Entry);
   Entry.Node = Size; Args.push_back(Entry);
@@ -3934,13 +3943,13 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst,
   TargetLowering::
   CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()),
                     false, false, false, false, 0,
-                    TLI.getLibcallCallingConv(RTLIB::MEMCPY),
+                    TLI->getLibcallCallingConv(RTLIB::MEMCPY),
                     /*isTailCall=*/false,
                     /*doesNotReturn=*/false, /*isReturnValueUsed=*/false,
-                    getExternalSymbol(TLI.getLibcallName(RTLIB::MEMCPY),
-                                      TLI.getPointerTy()),
+                    getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY),
+                                      TLI->getPointerTy()),
                     Args, *this, dl);
-  std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
+  std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
 
   return CallResult.second;
 }
@@ -3979,10 +3988,12 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst,
   // FIXME: If the memmove is volatile, lowering it to plain libc memmove may
   // not be safe.  See memcpy above for more details.
 
+  const TargetLowering *TLI = TM.getTargetLowering();
+
   // Emit a library call.
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
-  Entry.Ty = TLI.getDataLayout()->getIntPtrType(*getContext());
+  Entry.Ty = TLI->getDataLayout()->getIntPtrType(*getContext());
   Entry.Node = Dst; Args.push_back(Entry);
   Entry.Node = Src; Args.push_back(Entry);
   Entry.Node = Size; Args.push_back(Entry);
@@ -3990,13 +4001,13 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst,
   TargetLowering::
   CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()),
                     false, false, false, false, 0,
-                    TLI.getLibcallCallingConv(RTLIB::MEMMOVE),
+                    TLI->getLibcallCallingConv(RTLIB::MEMMOVE),
                     /*isTailCall=*/false,
                     /*doesNotReturn=*/false, /*isReturnValueUsed=*/false,
-                    getExternalSymbol(TLI.getLibcallName(RTLIB::MEMMOVE),
-                                      TLI.getPointerTy()),
+                    getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE),
+                                      TLI->getPointerTy()),
                     Args, *this, dl);
-  std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
+  std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
 
   return CallResult.second;
 }
@@ -4032,7 +4043,8 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst,
     return Result;
 
   // Emit a library call.
-  Type *IntPtrTy = TLI.getDataLayout()->getIntPtrType(*getContext());
+  const TargetLowering *TLI = TM.getTargetLowering();
+  Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(*getContext());
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
   Entry.Node = Dst; Entry.Ty = IntPtrTy;
@@ -4054,13 +4066,13 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst,
   TargetLowering::
   CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()),
                     false, false, false, false, 0,
-                    TLI.getLibcallCallingConv(RTLIB::MEMSET),
+                    TLI->getLibcallCallingConv(RTLIB::MEMSET),
                     /*isTailCall=*/false,
                     /*doesNotReturn*/false, /*isReturnValueUsed=*/false,
-                    getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET),
-                                      TLI.getPointerTy()),
+                    getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET),
+                                      TLI->getPointerTy()),
                     Args, *this, dl);
-  std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
+  std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
 
   return CallResult.second;
 }
@@ -5884,7 +5896,7 @@ void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) {
       ClonedDVs.push_back(Clone);
     }
   }
-  for (SmallVector<SDDbgValue *, 2>::iterator I = ClonedDVs.begin(),
+  for (SmallVectorImpl<SDDbgValue *>::iterator I = ClonedDVs.begin(),
          E = ClonedDVs.end(); I != E; ++I)
     AddDbgValue(*I, ToNode, false);
 }
@@ -6065,9 +6077,10 @@ bool SDNode::hasPredecessor(const SDNode *N) const {
   return hasPredecessorHelper(N, Visited, Worklist);
 }
 
-bool SDNode::hasPredecessorHelper(const SDNode *N,
-                                  SmallPtrSet<const SDNode *, 32> &Visited,
-                                  SmallVector<const SDNode *, 16> &Worklist) const {
+bool
+SDNode::hasPredecessorHelper(const SDNode *N,
+                             SmallPtrSet<const SDNode *, 32> &Visited,
+                             SmallVectorImpl<const SDNode *> &Worklist) const {
   if (Visited.empty()) {
     Worklist.push_back(this);
   } else {
@@ -6122,11 +6135,12 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
       EVT OperandVT = Operand.getValueType();
       if (OperandVT.isVector()) {
         // A vector operand; extract a single element.
+        const TargetLowering *TLI = TM.getTargetLowering();
         EVT OperandEltVT = OperandVT.getVectorElementType();
         Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl,
                               OperandEltVT,
                               Operand,
-                              getConstant(i, TLI.getPointerTy()));
+                              getConstant(i, TLI->getVectorIdxTy()));
       } else {
         // A scalar operand; just use it as is.
         Operands[j] = Operand;
@@ -6204,8 +6218,9 @@ bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
   const GlobalValue *GV2 = NULL;
   int64_t Offset1 = 0;
   int64_t Offset2 = 0;
-  bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
-  bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
+  const TargetLowering *TLI = TM.getTargetLowering();
+  bool isGA1 = TLI->isGAPlusOffset(Loc.getNode(), GV1, Offset1);
+  bool isGA2 = TLI->isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
   if (isGA1 && isGA2 && GV1 == GV2)
     return Offset1 == (Offset2 + Dist*Bytes);
   return false;
@@ -6218,11 +6233,12 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
   // If this is a GlobalAddress + cst, return the alignment.
   const GlobalValue *GV;
   int64_t GVOffset = 0;
-  if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
-    unsigned PtrWidth = TLI.getPointerTy().getSizeInBits();
+  const TargetLowering *TLI = TM.getTargetLowering();
+  if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
+    unsigned PtrWidth = TLI->getPointerTy().getSizeInBits();
     APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0);
     llvm::ComputeMaskedBits(const_cast<GlobalValue*>(GV), KnownZero, KnownOne,
-                            TLI.getDataLayout());
+                            TLI->getDataLayout());
     unsigned AlignBits = KnownZero.countTrailingOnes();
     unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0;
     if (Align)
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 21148ae..b9f4381 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -15,6 +15,7 @@
 #include "SelectionDAGBuilder.h"
 #include "SDNodeDbgValue.h"
 #include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
@@ -280,7 +281,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL,
       assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() &&
              "Cannot narrow, it would be a lossy transformation");
       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
-                         DAG.getIntPtrConstant(0));
+                         DAG.getConstant(0, TLI.getVectorIdxTy()));
     }
 
     // Vector/Vector bitcast.
@@ -489,7 +490,8 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL,
       SmallVector<SDValue, 16> Ops;
       for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i)
         Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
-                                  ElementVT, Val, DAG.getIntPtrConstant(i)));
+                                  ElementVT, Val, DAG.getConstant(i,
+                                                  TLI.getVectorIdxTy())));
 
       for (unsigned i = ValueVT.getVectorNumElements(),
            e = PartVT.getVectorNumElements(); i != e; ++i)
@@ -515,7 +517,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL,
       assert(ValueVT.getVectorNumElements() == 1 &&
              "Only trivial vector-to-scalar conversions should get here!");
       Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
-                        PartVT, Val, DAG.getIntPtrConstant(0));
+                        PartVT, Val, DAG.getConstant(0, TLI.getVectorIdxTy()));
 
       bool Smaller = ValueVT.bitsLE(PartVT);
       Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
@@ -545,10 +547,12 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL,
     if (IntermediateVT.isVector())
       Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
                            IntermediateVT, Val,
-                   DAG.getIntPtrConstant(i * (NumElements / NumIntermediates)));
+                   DAG.getConstant(i * (NumElements / NumIntermediates),
+                                   TLI.getVectorIdxTy()));
     else
       Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
-                           IntermediateVT, Val, DAG.getIntPtrConstant(i));
+                           IntermediateVT, Val,
+                           DAG.getConstant(i, TLI.getVectorIdxTy()));
   }
 
   // Split the intermediate operands into legal parts.
@@ -717,6 +721,14 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
       unsigned NumSignBits = LOI->NumSignBits;
       unsigned NumZeroBits = LOI->KnownZero.countLeadingOnes();
 
+      if (NumZeroBits == RegSize) {
+        // The current value is a zero.
+        // Explicitly express that as it would be easier for
+        // optimizations to kick in.
+        Parts[i] = DAG.getConstant(0, RegisterVT);
+        continue;
+      }
+
       // FIXME: We capture more information than the dag can represent.  For
       // now, just use the tightest assertzext/assertsext possible.
       bool isSExt = true;
@@ -1008,7 +1020,8 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
   DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V);
   if (It != FuncInfo.ValueMap.end()) {
     unsigned InReg = It->second;
-    RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType());
+    RegsForValue RFV(*DAG.getContext(), *TM.getTargetLowering(),
+                     InReg, V->getType());
     SDValue Chain = DAG.getEntryNode();
     N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, NULL, V);
     resolveDanglingDebugInfo(V, N);
@@ -1039,8 +1052,10 @@ SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) {
 /// getValueImpl - Helper function for getValue and getNonRegisterValue.
 /// Create an SDValue for the given value.
 SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
+  const TargetLowering *TLI = TM.getTargetLowering();
+
   if (const Constant *C = dyn_cast<Constant>(V)) {
-    EVT VT = TLI.getValueType(V->getType(), true);
+    EVT VT = TLI->getValueType(V->getType(), true);
 
     if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
       return DAG.getConstant(*CI, VT);
@@ -1049,7 +1064,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
       return DAG.getGlobalAddress(GV, getCurSDLoc(), VT);
 
     if (isa<ConstantPointerNull>(C))
-      return DAG.getConstant(0, TLI.getPointerTy());
+      return DAG.getConstant(0, TLI->getPointerTy());
 
     if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
       return DAG.getConstantFP(*CFP, VT);
@@ -1080,7 +1095,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
       return DAG.getMergeValues(&Constants[0], Constants.size(),
                                 getCurSDLoc());
     }
-    
+
     if (const ConstantDataSequential *CDS =
           dyn_cast<ConstantDataSequential>(C)) {
       SmallVector<SDValue, 4> Ops;
@@ -1103,7 +1118,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
              "Unknown struct or array constant!");
 
       SmallVector<EVT, 4> ValueVTs;
-      ComputeValueVTs(TLI, C->getType(), ValueVTs);
+      ComputeValueVTs(*TLI, C->getType(), ValueVTs);
       unsigned NumElts = ValueVTs.size();
       if (NumElts == 0)
         return SDValue(); // empty struct
@@ -1136,7 +1151,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
         Ops.push_back(getValue(CV->getOperand(i)));
     } else {
       assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
-      EVT EltVT = TLI.getValueType(VecTy->getElementType());
+      EVT EltVT = TLI->getValueType(VecTy->getElementType());
 
       SDValue Op;
       if (EltVT.isFloatingPoint())
@@ -1157,13 +1172,13 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
     DenseMap<const AllocaInst*, int>::iterator SI =
       FuncInfo.StaticAllocaMap.find(AI);
     if (SI != FuncInfo.StaticAllocaMap.end())
-      return DAG.getFrameIndex(SI->second, TLI.getPointerTy());
+      return DAG.getFrameIndex(SI->second, TLI->getPointerTy());
   }
 
   // If this is an instruction which fast-isel has deferred, select it now.
   if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
     unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
-    RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType());
+    RegsForValue RFV(*DAG.getContext(), *TLI, InReg, Inst->getType());
     SDValue Chain = DAG.getEntryNode();
     return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, NULL, V);
   }
@@ -1172,6 +1187,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
 }
 
 void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
+  const TargetLowering *TLI = TM.getTargetLowering();
   SDValue Chain = getControlRoot();
   SmallVector<ISD::OutputArg, 8> Outs;
   SmallVector<SDValue, 8> OutVals;
@@ -1184,7 +1200,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
     // Leave Outs empty so that LowerReturn won't try to load return
     // registers the usual way.
     SmallVector<EVT, 1> PtrValueVTs;
-    ComputeValueVTs(TLI, PointerType::getUnqual(F->getReturnType()),
+    ComputeValueVTs(*TLI, PointerType::getUnqual(F->getReturnType()),
                     PtrValueVTs);
 
     SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]);
@@ -1192,7 +1208,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
 
     SmallVector<EVT, 4> ValueVTs;
     SmallVector<uint64_t, 4> Offsets;
-    ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets);
+    ComputeValueVTs(*TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets);
     unsigned NumValues = ValueVTs.size();
 
     SmallVector<SDValue, 4> Chains(NumValues);
@@ -1211,7 +1227,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
                         MVT::Other, &Chains[0], NumValues);
   } else if (I.getNumOperands() != 0) {
     SmallVector<EVT, 4> ValueVTs;
-    ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs);
+    ComputeValueVTs(*TLI, I.getOperand(0)->getType(), ValueVTs);
     unsigned NumValues = ValueVTs.size();
     if (NumValues) {
       SDValue RetOp = getValue(I.getOperand(0));
@@ -1229,10 +1245,10 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
           ExtendKind = ISD::ZERO_EXTEND;
 
         if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
-          VT = TLI.getTypeForExtArgOrReturn(VT.getSimpleVT(), ExtendKind);
+          VT = TLI->getTypeForExtArgOrReturn(VT.getSimpleVT(), ExtendKind);
 
-        unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT);
-        MVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT);
+        unsigned NumParts = TLI->getNumRegisters(*DAG.getContext(), VT);
+        MVT PartVT = TLI->getRegisterType(*DAG.getContext(), VT);
         SmallVector<SDValue, 4> Parts(NumParts);
         getCopyToParts(DAG, getCurSDLoc(),
                        SDValue(RetOp.getNode(), RetOp.getResNo() + j),
@@ -1262,8 +1278,9 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
   bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
   CallingConv::ID CallConv =
     DAG.getMachineFunction().getFunction()->getCallingConv();
-  Chain = TLI.LowerReturn(Chain, CallConv, isVarArg,
-                          Outs, OutVals, getCurSDLoc(), DAG);
+  Chain = TM.getTargetLowering()->LowerReturn(Chain, CallConv, isVarArg,
+                                              Outs, OutVals, getCurSDLoc(),
+                                              DAG);
 
   // Verify that the target's LowerReturn behaved as expected.
   assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
@@ -1462,7 +1479,7 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
 /// If we should emit this as a bunch of and/or'd together conditions, return
 /// false.
 bool
-SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){
+SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases) {
   if (Cases.size() != 2) return true;
 
   // If this is two comparisons of the same values or'd or and'd together, they
@@ -1536,7 +1553,7 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
   //     jle foo
   //
   if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
-    if (!TLI.isJumpExpensive() &&
+    if (!TM.getTargetLowering()->isJumpExpensive() &&
         BOp->hasOneUse() &&
         (BOp->getOpcode() == Instruction::And ||
          BOp->getOpcode() == Instruction::Or)) {
@@ -1608,7 +1625,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
 
     SDValue CmpOp = getValue(CB.CmpMHS);
     EVT VT = CmpOp.getValueType();
-    
+
     if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(false)) {
       Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT),
                           ISD::SETULE);
@@ -1659,7 +1676,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
 void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) {
   // Emit the code for the jump table
   assert(JT.Reg != -1U && "Should lower JT Header first!");
-  EVT PTy = TLI.getPointerTy();
+  EVT PTy = TM.getTargetLowering()->getPointerTy();
   SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(),
                                      JT.Reg, PTy);
   SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
@@ -1687,9 +1704,10 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
   // can be used as an index into the jump table in a subsequent basic block.
   // This value may be smaller or larger than the target's pointer type, and
   // therefore require extension or truncating.
-  SwitchOp = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), TLI.getPointerTy());
+  const TargetLowering *TLI = TM.getTargetLowering();
+  SwitchOp = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), TLI->getPointerTy());
 
-  unsigned JumpTableReg = FuncInfo.CreateReg(TLI.getPointerTy());
+  unsigned JumpTableReg = FuncInfo.CreateReg(TLI->getPointerTy());
   SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurSDLoc(),
                                     JumpTableReg, SwitchOp);
   JT.Reg = JumpTableReg;
@@ -1698,8 +1716,8 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
   // for the switch statement if the value being switched on exceeds the largest
   // case in the switch.
   SDValue CMP = DAG.getSetCC(getCurSDLoc(),
-                             TLI.getSetCCResultType(*DAG.getContext(),
-                                                    Sub.getValueType()),
+                             TLI->getSetCCResultType(*DAG.getContext(),
+                                                     Sub.getValueType()),
                              Sub,
                              DAG.getConstant(JTH.Last - JTH.First,VT),
                              ISD::SETUGT);
@@ -1734,15 +1752,16 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
                             DAG.getConstant(B.First, VT));
 
   // Check range
+  const TargetLowering *TLI = TM.getTargetLowering();
   SDValue RangeCmp = DAG.getSetCC(getCurSDLoc(),
-                                  TLI.getSetCCResultType(*DAG.getContext(),
+                                  TLI->getSetCCResultType(*DAG.getContext(),
                                                          Sub.getValueType()),
                                   Sub, DAG.getConstant(B.Range, VT),
                                   ISD::SETUGT);
 
   // Determine the type of the test operands.
   bool UsePtrType = false;
-  if (!TLI.isTypeLegal(VT))
+  if (!TLI->isTypeLegal(VT))
     UsePtrType = true;
   else {
     for (unsigned i = 0, e = B.Cases.size(); i != e; ++i)
@@ -1754,7 +1773,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
       }
   }
   if (UsePtrType) {
-    VT = TLI.getPointerTy();
+    VT = TLI->getPointerTy();
     Sub = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), VT);
   }
 
@@ -1798,18 +1817,19 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
                                        Reg, VT);
   SDValue Cmp;
   unsigned PopCount = CountPopulation_64(B.Mask);
+  const TargetLowering *TLI = TM.getTargetLowering();
   if (PopCount == 1) {
     // Testing for a single bit; just compare the shift count with what it
     // would need to be to shift a 1 bit in that position.
     Cmp = DAG.getSetCC(getCurSDLoc(),
-                       TLI.getSetCCResultType(*DAG.getContext(), VT),
+                       TLI->getSetCCResultType(*DAG.getContext(), VT),
                        ShiftOp,
                        DAG.getConstant(countTrailingZeros(B.Mask), VT),
                        ISD::SETEQ);
   } else if (PopCount == BB.Range) {
     // There is only one zero bit in the range, test for it directly.
     Cmp = DAG.getSetCC(getCurSDLoc(),
-                       TLI.getSetCCResultType(*DAG.getContext(), VT),
+                       TLI->getSetCCResultType(*DAG.getContext(), VT),
                        ShiftOp,
                        DAG.getConstant(CountTrailingOnes_64(B.Mask), VT),
                        ISD::SETNE);
@@ -1822,7 +1842,7 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
     SDValue AndOp = DAG.getNode(ISD::AND, getCurSDLoc(),
                                 VT, SwitchVal, DAG.getConstant(B.Mask, VT));
     Cmp = DAG.getSetCC(getCurSDLoc(),
-                       TLI.getSetCCResultType(*DAG.getContext(), VT),
+                       TLI->getSetCCResultType(*DAG.getContext(), VT),
                        AndOp, DAG.getConstant(0, VT),
                        ISD::SETNE);
   }
@@ -1895,39 +1915,32 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
 
   // If there aren't registers to copy the values into (e.g., during SjLj
   // exceptions), then don't bother to create these DAG nodes.
-  if (TLI.getExceptionPointerRegister() == 0 &&
-      TLI.getExceptionSelectorRegister() == 0)
+  const TargetLowering *TLI = TM.getTargetLowering();
+  if (TLI->getExceptionPointerRegister() == 0 &&
+      TLI->getExceptionSelectorRegister() == 0)
     return;
 
   SmallVector<EVT, 2> ValueVTs;
-  ComputeValueVTs(TLI, LP.getType(), ValueVTs);
+  ComputeValueVTs(*TLI, LP.getType(), ValueVTs);
+  assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported");
 
-  // Insert the EXCEPTIONADDR instruction.
-  assert(FuncInfo.MBB->isLandingPad() &&
-         "Call to eh.exception not in landing pad!");
-  SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
+  // Get the two live-in registers as SDValues. The physregs have already been
+  // copied into virtual registers.
   SDValue Ops[2];
-  Ops[0] = DAG.getRoot();
-  SDValue Op1 = DAG.getNode(ISD::EXCEPTIONADDR, getCurSDLoc(), VTs, Ops, 1);
-  SDValue Chain = Op1.getValue(1);
-
-  // Insert the EHSELECTION instruction.
-  VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
-  Ops[0] = Op1;
-  Ops[1] = Chain;
-  SDValue Op2 = DAG.getNode(ISD::EHSELECTION, getCurSDLoc(), VTs, Ops, 2);
-  Chain = Op2.getValue(1);
-  Op2 = DAG.getSExtOrTrunc(Op2, getCurSDLoc(), MVT::i32);
-
-  Ops[0] = Op1;
-  Ops[1] = Op2;
+  Ops[0] = DAG.getZExtOrTrunc(
+    DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
+                       FuncInfo.ExceptionPointerVirtReg, TLI->getPointerTy()),
+    getCurSDLoc(), ValueVTs[0]);
+  Ops[1] = DAG.getZExtOrTrunc(
+    DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
+                       FuncInfo.ExceptionSelectorVirtReg, TLI->getPointerTy()),
+    getCurSDLoc(), ValueVTs[1]);
+
+  // Merge into one.
   SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
                             DAG.getVTList(&ValueVTs[0], ValueVTs.size()),
                             &Ops[0], 2);
-
-  std::pair<SDValue, SDValue> RetPair = std::make_pair(Res, Chain);
-  setValue(&LP, RetPair.first);
-  DAG.setRoot(RetPair.second);
+  setValue(&LP, Res);
 }
 
 /// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for
@@ -2029,12 +2042,11 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
     // The last case block won't fall through into 'NextBlock' if we emit the
     // branches in this order.  See if rearranging a case value would help.
     // We start at the bottom as it's the case with the least weight.
-    for (Case *I = &*(CR.Range.second-2), *E = &*CR.Range.first-1; I != E; --I){
+    for (Case *I = &*(CR.Range.second-2), *E = &*CR.Range.first-1; I != E; --I)
       if (I->BB == NextBlock) {
         std::swap(*I, BackCase);
         break;
       }
-    }
   }
 
   // Create a CaseBlock record representing a conditional branch to
@@ -2061,7 +2073,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
       CC = ISD::SETEQ;
       LHS = SV; RHS = I->High; MHS = NULL;
     } else {
-      CC = ISD::SETCC_INVALID; 
+      CC = ISD::SETCC_INVALID;
       LHS = I->Low; MHS = SV; RHS = I->High;
     }
 
@@ -2115,7 +2127,8 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR,
   for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I)
     TSize += I->size();
 
-  if (!areJTsAllowed(TLI) || TSize.ult(TLI.getMinimumJumpTableEntries()))
+  const TargetLowering *TLI = TM.getTargetLowering();
+  if (!areJTsAllowed(*TLI) || TSize.ult(TLI->getMinimumJumpTableEntries()))
     return false;
 
   APInt Range = ComputeRange(First, Last);
@@ -2176,7 +2189,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR,
     for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
       DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr =
           DestWeights.find(I->BB);
-      if (Itr != DestWeights.end()) 
+      if (Itr != DestWeights.end())
         Itr->second += I->ExtraWeight;
       else
         DestWeights[I->BB] = I->ExtraWeight;
@@ -2196,7 +2209,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR,
   }
 
   // Create a jump table index for this jump table.
-  unsigned JTEncoding = TLI.getJumpTableEncoding();
+  unsigned JTEncoding = TLI->getJumpTableEncoding();
   unsigned JTI = CurMF->getOrCreateJumpTableInfo(JTEncoding)
                        ->createJumpTableIndex(DestBBs);
 
@@ -2216,8 +2229,8 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR,
 bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
                                                   CaseRecVector& WorkList,
                                                   const Value* SV,
-                                                  MachineBasicBlock *Default,
-                                                  MachineBasicBlock *SwitchBB) {
+                                                  MachineBasicBlock* Default,
+                                                  MachineBasicBlock* SwitchBB) {
   // Get the MachineFunction which holds the current MBB.  This is used when
   // inserting any additional MBBs necessary to represent the switch.
   MachineFunction *CurMF = FuncInfo.MF;
@@ -2281,7 +2294,9 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
     LSize += J->size();
     RSize -= J->size();
   }
-  if (areJTsAllowed(TLI)) {
+
+  const TargetLowering *TLI = TM.getTargetLowering();
+  if (areJTsAllowed(*TLI)) {
     // If our case is dense we *really* should handle it earlier!
     assert((FMetric > 0) && "Should handle dense range earlier!");
   } else {
@@ -2350,8 +2365,9 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
                                                    CaseRecVector& WorkList,
                                                    const Value* SV,
                                                    MachineBasicBlock* Default,
-                                                   MachineBasicBlock *SwitchBB){
-  EVT PTy = TLI.getPointerTy();
+                                                   MachineBasicBlock* SwitchBB) {
+  const TargetLowering *TLI = TM.getTargetLowering();
+  EVT PTy = TLI->getPointerTy();
   unsigned IntPtrBits = PTy.getSizeInBits();
 
   Case& FrontCase = *CR.Range.first;
@@ -2362,7 +2378,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
   MachineFunction *CurMF = FuncInfo.MF;
 
   // If target does not have legal shift left, do not emit bit tests at all.
-  if (!TLI.isOperationLegal(ISD::SHL, TLI.getPointerTy()))
+  if (!TLI->isOperationLegal(ISD::SHL, TLI->getPointerTy()))
     return false;
 
   size_t numCmps = 0;
@@ -2480,11 +2496,11 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
 /// Clusterify - Transform simple list of Cases into list of CaseRange's
 size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
                                        const SwitchInst& SI) {
-  
+
   /// Use a shorter form of declaration, and also
   /// show the we want to use CRSBuilder as Clusterifier.
   typedef IntegersSubsetMapping<MachineBasicBlock> Clusterifier;
-  
+
   Clusterifier TheClusterifier;
 
   BranchProbabilityInfo *BPI = FuncInfo.BPI;
@@ -2494,12 +2510,12 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
     const BasicBlock *SuccBB = i.getCaseSuccessor();
     MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB];
 
-    TheClusterifier.add(i.getCaseValueEx(), SMBB, 
+    TheClusterifier.add(i.getCaseValueEx(), SMBB,
         BPI ? BPI->getEdgeWeight(SI.getParent(), i.getSuccessorIndex()) : 0);
   }
-  
+
   TheClusterifier.optimize();
-  
+
   size_t numCmps = 0;
   for (Clusterifier::RangeIterator i = TheClusterifier.begin(),
        e = TheClusterifier.end(); i != e; ++i, ++numCmps) {
@@ -2511,7 +2527,7 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
     // Changing it to APInt based is a pretty heavy for this commit.
     Cases.push_back(Case(C.first.getLow().toConstantInt(),
                          C.first.getHigh().toConstantInt(), C.second, W));
-    
+
     if (C.first.getLow() != C.first.getHigh())
     // A range counts double, since it requires two compares.
     ++numCmps;
@@ -2645,7 +2661,7 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
   SDValue Op1 = getValue(I.getOperand(0));
   SDValue Op2 = getValue(I.getOperand(1));
 
-  EVT ShiftTy = TLI.getShiftAmountTy(Op2.getValueType());
+  EVT ShiftTy = TM.getTargetLowering()->getShiftAmountTy(Op2.getValueType());
 
   // Coerce the shift amount to the right type if we can.
   if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
@@ -2683,7 +2699,8 @@ void SelectionDAGBuilder::visitSDiv(const User &I) {
   if (isa<BinaryOperator>(&I) && cast<BinaryOperator>(&I)->isExact() &&
       !isa<ConstantSDNode>(Op1) &&
       isa<ConstantSDNode>(Op2) && !cast<ConstantSDNode>(Op2)->isNullValue())
-    setValue(&I, TLI.BuildExactSDIV(Op1, Op2, getCurSDLoc(), DAG));
+    setValue(&I, TM.getTargetLowering()->BuildExactSDIV(Op1, Op2,
+                                                        getCurSDLoc(), DAG));
   else
     setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(),
                              Op1, Op2));
@@ -2699,7 +2716,7 @@ void SelectionDAGBuilder::visitICmp(const User &I) {
   SDValue Op2 = getValue(I.getOperand(1));
   ISD::CondCode Opcode = getICmpCondCode(predicate);
 
-  EVT DestVT = TLI.getValueType(I.getType());
+  EVT DestVT = TM.getTargetLowering()->getValueType(I.getType());
   setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode));
 }
 
@@ -2714,13 +2731,13 @@ void SelectionDAGBuilder::visitFCmp(const User &I) {
   ISD::CondCode Condition = getFCmpCondCode(predicate);
   if (TM.Options.NoNaNsFPMath)
     Condition = getFCmpCodeWithoutNaN(Condition);
-  EVT DestVT = TLI.getValueType(I.getType());
+  EVT DestVT = TM.getTargetLowering()->getValueType(I.getType());
   setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition));
 }
 
 void SelectionDAGBuilder::visitSelect(const User &I) {
   SmallVector<EVT, 4> ValueVTs;
-  ComputeValueVTs(TLI, I.getType(), ValueVTs);
+  ComputeValueVTs(*TM.getTargetLowering(), I.getType(), ValueVTs);
   unsigned NumValues = ValueVTs.size();
   if (NumValues == 0) return;
 
@@ -2748,7 +2765,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
 void SelectionDAGBuilder::visitTrunc(const User &I) {
   // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
   SDValue N = getValue(I.getOperand(0));
-  EVT DestVT = TLI.getValueType(I.getType());
+  EVT DestVT = TM.getTargetLowering()->getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N));
 }
 
@@ -2756,7 +2773,7 @@ void SelectionDAGBuilder::visitZExt(const User &I) {
   // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
   // ZExt also can't be a cast to bool for same reason. So, nothing much to do
   SDValue N = getValue(I.getOperand(0));
-  EVT DestVT = TLI.getValueType(I.getType());
+  EVT DestVT = TM.getTargetLowering()->getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N));
 }
 
@@ -2764,51 +2781,52 @@ void SelectionDAGBuilder::visitSExt(const User &I) {
   // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
   // SExt also can't be a cast to bool for same reason. So, nothing much to do
   SDValue N = getValue(I.getOperand(0));
-  EVT DestVT = TLI.getValueType(I.getType());
+  EVT DestVT = TM.getTargetLowering()->getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N));
 }
 
 void SelectionDAGBuilder::visitFPTrunc(const User &I) {
   // FPTrunc is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
-  EVT DestVT = TLI.getValueType(I.getType());
+  const TargetLowering *TLI = TM.getTargetLowering();
+  EVT DestVT = TLI->getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurSDLoc(),
                            DestVT, N,
-                           DAG.getTargetConstant(0, TLI.getPointerTy())));
+                           DAG.getTargetConstant(0, TLI->getPointerTy())));
 }
 
-void SelectionDAGBuilder::visitFPExt(const User &I){
+void SelectionDAGBuilder::visitFPExt(const User &I) {
   // FPExt is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
-  EVT DestVT = TLI.getValueType(I.getType());
+  EVT DestVT = TM.getTargetLowering()->getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N));
 }
 
 void SelectionDAGBuilder::visitFPToUI(const User &I) {
   // FPToUI is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
-  EVT DestVT = TLI.getValueType(I.getType());
+  EVT DestVT = TM.getTargetLowering()->getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurSDLoc(), DestVT, N));
 }
 
 void SelectionDAGBuilder::visitFPToSI(const User &I) {
   // FPToSI is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
-  EVT DestVT = TLI.getValueType(I.getType());
+  EVT DestVT = TM.getTargetLowering()->getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurSDLoc(), DestVT, N));
 }
 
 void SelectionDAGBuilder::visitUIToFP(const User &I) {
   // UIToFP is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
-  EVT DestVT = TLI.getValueType(I.getType());
+  EVT DestVT = TM.getTargetLowering()->getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N));
 }
 
-void SelectionDAGBuilder::visitSIToFP(const User &I){
+void SelectionDAGBuilder::visitSIToFP(const User &I) {
   // SIToFP is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
-  EVT DestVT = TLI.getValueType(I.getType());
+  EVT DestVT = TM.getTargetLowering()->getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N));
 }
 
@@ -2816,7 +2834,7 @@ void SelectionDAGBuilder::visitPtrToInt(const User &I) {
   // What to do depends on the size of the integer and the size of the pointer.
   // We can either truncate, zero extend, or no-op, accordingly.
   SDValue N = getValue(I.getOperand(0));
-  EVT DestVT = TLI.getValueType(I.getType());
+  EVT DestVT = TM.getTargetLowering()->getValueType(I.getType());
   setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT));
 }
 
@@ -2824,13 +2842,13 @@ void SelectionDAGBuilder::visitIntToPtr(const User &I) {
   // What to do depends on the size of the integer and the size of the pointer.
   // We can either truncate, zero extend, or no-op, accordingly.
   SDValue N = getValue(I.getOperand(0));
-  EVT DestVT = TLI.getValueType(I.getType());
+  EVT DestVT = TM.getTargetLowering()->getValueType(I.getType());
   setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT));
 }
 
 void SelectionDAGBuilder::visitBitCast(const User &I) {
   SDValue N = getValue(I.getOperand(0));
-  EVT DestVT = TLI.getValueType(I.getType());
+  EVT DestVT = TM.getTargetLowering()->getValueType(I.getType());
 
   // BitCast assures us that source and destination are the same size so this is
   // either a BITCAST or a no-op.
@@ -2842,23 +2860,24 @@ void SelectionDAGBuilder::visitBitCast(const User &I) {
 }
 
 void SelectionDAGBuilder::visitInsertElement(const User &I) {
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   SDValue InVec = getValue(I.getOperand(0));
   SDValue InVal = getValue(I.getOperand(1));
-  SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(),
-                              TLI.getPointerTy(),
-                              getValue(I.getOperand(2)));
+  SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)),
+                                     getCurSDLoc(), TLI.getVectorIdxTy());
   setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(),
-                           TLI.getValueType(I.getType()),
+                           TM.getTargetLowering()->getValueType(I.getType()),
                            InVec, InVal, InIdx));
 }
 
 void SelectionDAGBuilder::visitExtractElement(const User &I) {
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   SDValue InVec = getValue(I.getOperand(0));
-  SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(),
-                              TLI.getPointerTy(),
-                              getValue(I.getOperand(1)));
+  SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)),
+                                     getCurSDLoc(), TLI.getVectorIdxTy());
   setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(),
-                           TLI.getValueType(I.getType()), InVec, InIdx));
+                           TM.getTargetLowering()->getValueType(I.getType()),
+                           InVec, InIdx));
 }
 
 // Utility for visitShuffleVector - Return true if every element in Mask,
@@ -2879,8 +2898,9 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
   SmallVector<int, 8> Mask;
   ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask);
   unsigned MaskNumElts = Mask.size();
-  
-  EVT VT = TLI.getValueType(I.getType());
+
+  const TargetLowering *TLI = TM.getTargetLowering();
+  EVT VT = TLI->getValueType(I.getType());
   EVT SrcVT = Src1.getValueType();
   unsigned SrcNumElts = SrcVT.getVectorNumElements();
 
@@ -3002,7 +3022,8 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
           Src = DAG.getUNDEF(VT);
         else
           Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, getCurSDLoc(), VT,
-                            Src, DAG.getIntPtrConstant(StartIdx[Input]));
+                            Src, DAG.getConstant(StartIdx[Input],
+                                                 TLI->getVectorIdxTy()));
       }
 
       // Calculate new mask.
@@ -3028,7 +3049,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
   // replacing the shuffle with extract and build vector.
   // to insert and build vector.
   EVT EltVT = VT.getVectorElementType();
-  EVT PtrVT = TLI.getPointerTy();
+  EVT IdxVT = TLI->getVectorIdxTy();
   SmallVector<SDValue,8> Ops;
   for (unsigned i = 0; i != MaskNumElts; ++i) {
     int Idx = Mask[i];
@@ -3041,7 +3062,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
       if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts;
 
       Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(),
-                        EltVT, Src, DAG.getConstant(Idx, PtrVT));
+                        EltVT, Src, DAG.getConstant(Idx, IdxVT));
     }
 
     Ops.push_back(Res);
@@ -3061,10 +3082,11 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
 
   unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices());
 
+  const TargetLowering *TLI = TM.getTargetLowering();
   SmallVector<EVT, 4> AggValueVTs;
-  ComputeValueVTs(TLI, AggTy, AggValueVTs);
+  ComputeValueVTs(*TLI, AggTy, AggValueVTs);
   SmallVector<EVT, 4> ValValueVTs;
-  ComputeValueVTs(TLI, ValTy, ValValueVTs);
+  ComputeValueVTs(*TLI, ValTy, ValValueVTs);
 
   unsigned NumAggValues = AggValueVTs.size();
   unsigned NumValValues = ValValueVTs.size();
@@ -3101,8 +3123,9 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
 
   unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices());
 
+  const TargetLowering *TLI = TM.getTargetLowering();
   SmallVector<EVT, 4> ValValueVTs;
-  ComputeValueVTs(TLI, ValTy, ValValueVTs);
+  ComputeValueVTs(*TLI, ValTy, ValValueVTs);
 
   unsigned NumValValues = ValValueVTs.size();
 
@@ -3150,16 +3173,17 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
       Ty = cast<SequentialType>(Ty)->getElementType();
 
       // If this is a constant subscript, handle it quickly.
+      const TargetLowering *TLI = TM.getTargetLowering();
       if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
         if (CI->isZero()) continue;
         uint64_t Offs =
             TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
         SDValue OffsVal;
-        EVT PTy = TLI.getPointerTy();
+        EVT PTy = TLI->getPointerTy();
         unsigned PtrBits = PTy.getSizeInBits();
         if (PtrBits < 64)
           OffsVal = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(),
-                                TLI.getPointerTy(),
+                                TLI->getPointerTy(),
                                 DAG.getConstant(Offs, MVT::i64));
         else
           OffsVal = DAG.getIntPtrConstant(Offs);
@@ -3170,7 +3194,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
       }
 
       // N = N + Idx * ElementSize;
-      APInt ElementSize = APInt(TLI.getPointerTy().getSizeInBits(),
+      APInt ElementSize = APInt(TLI->getPointerTy().getSizeInBits(),
                                 TD->getTypeAllocSize(Ty));
       SDValue IdxN = getValue(Idx);
 
@@ -3208,14 +3232,15 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
     return;   // getValue will auto-populate this.
 
   Type *Ty = I.getAllocatedType();
-  uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty);
+  const TargetLowering *TLI = TM.getTargetLowering();
+  uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty);
   unsigned Align =
-    std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty),
+    std::max((unsigned)TLI->getDataLayout()->getPrefTypeAlignment(Ty),
              I.getAlignment());
 
   SDValue AllocSize = getValue(I.getArraySize());
 
-  EVT IntPtr = TLI.getPointerTy();
+  EVT IntPtr = TLI->getPointerTy();
   if (AllocSize.getValueType() != IntPtr)
     AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurSDLoc(), IntPtr);
 
@@ -3271,7 +3296,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
 
   SmallVector<EVT, 4> ValueVTs;
   SmallVector<uint64_t, 4> Offsets;
-  ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets);
+  ComputeValueVTs(*TM.getTargetLowering(), Ty, ValueVTs, &Offsets);
   unsigned NumValues = ValueVTs.size();
   if (NumValues == 0)
     return;
@@ -3345,7 +3370,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
 
   SmallVector<EVT, 4> ValueVTs;
   SmallVector<uint64_t, 4> Offsets;
-  ComputeValueVTs(TLI, SrcV->getType(), ValueVTs, &Offsets);
+  ComputeValueVTs(*TM.getTargetLowering(), SrcV->getType(), ValueVTs, &Offsets);
   unsigned NumValues = ValueVTs.size();
   if (NumValues == 0)
     return;
@@ -3419,9 +3444,10 @@ void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
 
   SDValue InChain = getRoot();
 
-  if (TLI.getInsertFencesForAtomic())
+  const TargetLowering *TLI = TM.getTargetLowering();
+  if (TLI->getInsertFencesForAtomic())
     InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl,
-                                   DAG, TLI);
+                                   DAG, *TLI);
 
   SDValue L =
     DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl,
@@ -3431,14 +3457,14 @@ void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
                   getValue(I.getCompareOperand()),
                   getValue(I.getNewValOperand()),
                   MachinePointerInfo(I.getPointerOperand()), 0 /* Alignment */,
-                  TLI.getInsertFencesForAtomic() ? Monotonic : Order,
+                  TLI->getInsertFencesForAtomic() ? Monotonic : Order,
                   Scope);
 
   SDValue OutChain = L.getValue(1);
 
-  if (TLI.getInsertFencesForAtomic())
+  if (TLI->getInsertFencesForAtomic())
     OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
-                                    DAG, TLI);
+                                    DAG, *TLI);
 
   setValue(&I, L);
   DAG.setRoot(OutChain);
@@ -3466,9 +3492,10 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
 
   SDValue InChain = getRoot();
 
-  if (TLI.getInsertFencesForAtomic())
+  const TargetLowering *TLI = TM.getTargetLowering();
+  if (TLI->getInsertFencesForAtomic())
     InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl,
-                                   DAG, TLI);
+                                   DAG, *TLI);
 
   SDValue L =
     DAG.getAtomic(NT, dl,
@@ -3477,14 +3504,14 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
                   getValue(I.getPointerOperand()),
                   getValue(I.getValOperand()),
                   I.getPointerOperand(), 0 /* Alignment */,
-                  TLI.getInsertFencesForAtomic() ? Monotonic : Order,
+                  TLI->getInsertFencesForAtomic() ? Monotonic : Order,
                   Scope);
 
   SDValue OutChain = L.getValue(1);
 
-  if (TLI.getInsertFencesForAtomic())
+  if (TLI->getInsertFencesForAtomic())
     OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
-                                    DAG, TLI);
+                                    DAG, *TLI);
 
   setValue(&I, L);
   DAG.setRoot(OutChain);
@@ -3492,10 +3519,11 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
 
 void SelectionDAGBuilder::visitFence(const FenceInst &I) {
   SDLoc dl = getCurSDLoc();
+  const TargetLowering *TLI = TM.getTargetLowering();
   SDValue Ops[3];
   Ops[0] = getRoot();
-  Ops[1] = DAG.getConstant(I.getOrdering(), TLI.getPointerTy());
-  Ops[2] = DAG.getConstant(I.getSynchScope(), TLI.getPointerTy());
+  Ops[1] = DAG.getConstant(I.getOrdering(), TLI->getPointerTy());
+  Ops[2] = DAG.getConstant(I.getSynchScope(), TLI->getPointerTy());
   DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops, 3));
 }
 
@@ -3506,7 +3534,8 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
 
   SDValue InChain = getRoot();
 
-  EVT VT = TLI.getValueType(I.getType());
+  const TargetLowering *TLI = TM.getTargetLowering();
+  EVT VT = TLI->getValueType(I.getType());
 
   if (I.getAlignment() < VT.getSizeInBits() / 8)
     report_fatal_error("Cannot generate unaligned atomic load");
@@ -3515,14 +3544,14 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
     DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain,
                   getValue(I.getPointerOperand()),
                   I.getPointerOperand(), I.getAlignment(),
-                  TLI.getInsertFencesForAtomic() ? Monotonic : Order,
+                  TLI->getInsertFencesForAtomic() ? Monotonic : Order,
                   Scope);
 
   SDValue OutChain = L.getValue(1);
 
-  if (TLI.getInsertFencesForAtomic())
+  if (TLI->getInsertFencesForAtomic())
     OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
-                                    DAG, TLI);
+                                    DAG, *TLI);
 
   setValue(&I, L);
   DAG.setRoot(OutChain);
@@ -3536,14 +3565,15 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
 
   SDValue InChain = getRoot();
 
-  EVT VT = TLI.getValueType(I.getValueOperand()->getType());
+  const TargetLowering *TLI = TM.getTargetLowering();
+  EVT VT = TLI->getValueType(I.getValueOperand()->getType());
 
   if (I.getAlignment() < VT.getSizeInBits() / 8)
     report_fatal_error("Cannot generate unaligned atomic store");
 
-  if (TLI.getInsertFencesForAtomic())
+  if (TLI->getInsertFencesForAtomic())
     InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl,
-                                   DAG, TLI);
+                                   DAG, *TLI);
 
   SDValue OutChain =
     DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT,
@@ -3551,12 +3581,12 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
                   getValue(I.getPointerOperand()),
                   getValue(I.getValueOperand()),
                   I.getPointerOperand(), I.getAlignment(),
-                  TLI.getInsertFencesForAtomic() ? Monotonic : Order,
+                  TLI->getInsertFencesForAtomic() ? Monotonic : Order,
                   Scope);
 
-  if (TLI.getInsertFencesForAtomic())
+  if (TLI->getInsertFencesForAtomic())
     OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
-                                    DAG, TLI);
+                                    DAG, *TLI);
 
   DAG.setRoot(OutChain);
 }
@@ -3581,12 +3611,13 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
 
   // Info is set by getTgtMemInstrinsic
   TargetLowering::IntrinsicInfo Info;
-  bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic);
+  const TargetLowering *TLI = TM.getTargetLowering();
+  bool IsTgtIntrinsic = TLI->getTgtMemIntrinsic(Info, I, Intrinsic);
 
   // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
   if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
       Info.opc == ISD::INTRINSIC_W_CHAIN)
-    Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI.getPointerTy()));
+    Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI->getPointerTy()));
 
   // Add all operands of the call to the operand list.
   for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
@@ -3595,7 +3626,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
   }
 
   SmallVector<EVT, 4> ValueVTs;
-  ComputeValueVTs(TLI, I.getType(), ValueVTs);
+  ComputeValueVTs(*TLI, I.getType(), ValueVTs);
 
   if (HasChain)
     ValueVTs.push_back(MVT::Other);
@@ -3633,7 +3664,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
 
   if (!I.getType()->isVoidTy()) {
     if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
-      EVT VT = TLI.getValueType(PTy);
+      EVT VT = TLI->getValueType(PTy);
       Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result);
     }
 
@@ -4326,7 +4357,8 @@ static unsigned getTruncatedArgReg(const SDValue &N) {
     return 0;
 
   const SDValue &Ext = N.getOperand(0);
-  if (Ext.getOpcode() == ISD::AssertZext || Ext.getOpcode() == ISD::AssertSext){
+  if (Ext.getOpcode() == ISD::AssertZext ||
+      Ext.getOpcode() == ISD::AssertSext) {
     const SDValue &CFR = Ext.getOperand(0);
     if (CFR.getOpcode() == ISD::CopyFromReg)
       return cast<RegisterSDNode>(CFR.getOperand(1))->getReg();
@@ -4349,20 +4381,19 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
 
   MachineFunction &MF = DAG.getMachineFunction();
   const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo();
-  const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
 
   // Ignore inlined function arguments here.
   DIVariable DV(Variable);
   if (DV.isInlinedFnArgument(MF.getFunction()))
     return false;
 
-  unsigned Reg = 0;
+  Optional<MachineOperand> Op;
   // Some arguments' frame index is recorded during argument lowering.
-  Offset = FuncInfo.getArgumentFrameIndex(Arg);
-  if (Offset)
-    Reg = TRI->getFrameRegister(MF);
+  if (int FI = FuncInfo.getArgumentFrameIndex(Arg))
+    Op = MachineOperand::CreateFI(FI);
 
-  if (!Reg && N.getNode()) {
+  if (!Op && N.getNode()) {
+    unsigned Reg;
     if (N.getOpcode() == ISD::CopyFromReg)
       Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg();
     else
@@ -4373,32 +4404,39 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
       if (PR)
         Reg = PR;
     }
+    if (Reg)
+      Op = MachineOperand::CreateReg(Reg, false);
   }
 
-  if (!Reg) {
+  if (!Op) {
     // Check if ValueMap has reg number.
     DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
     if (VMI != FuncInfo.ValueMap.end())
-      Reg = VMI->second;
+      Op = MachineOperand::CreateReg(VMI->second, false);
   }
 
-  if (!Reg && N.getNode()) {
+  if (!Op && N.getNode())
     // Check if frame index is available.
     if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode()))
       if (FrameIndexSDNode *FINode =
-          dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) {
-        Reg = TRI->getFrameRegister(MF);
-        Offset = FINode->getIndex();
-      }
-  }
+          dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
+        Op = MachineOperand::CreateFI(FINode->getIndex());
 
-  if (!Reg)
+  if (!Op)
     return false;
 
-  MachineInstrBuilder MIB = BuildMI(MF, getCurDebugLoc(),
-                                    TII->get(TargetOpcode::DBG_VALUE))
-    .addReg(Reg, RegState::Debug).addImm(Offset).addMetadata(Variable);
-  FuncInfo.ArgDbgValues.push_back(&*MIB);
+  // FIXME: This does not handle register-indirect values at offset 0.
+  bool IsIndirect = Offset != 0;
+  if (Op->isReg())
+    FuncInfo.ArgDbgValues.push_back(BuildMI(MF, getCurDebugLoc(),
+                                            TII->get(TargetOpcode::DBG_VALUE),
+                                            IsIndirect,
+                                            Op->getReg(), Offset, Variable));
+  else
+    FuncInfo.ArgDbgValues.push_back(
+      BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE))
+          .addOperand(*Op).addImm(Offset).addMetadata(Variable));
+
   return true;
 }
 
@@ -4415,6 +4453,7 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
 /// otherwise lower it and return null.
 const char *
 SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
+  const TargetLowering *TLI = TM.getTargetLowering();
   SDLoc sdl = getCurSDLoc();
   DebugLoc dl = getCurDebugLoc();
   SDValue Res;
@@ -4428,17 +4467,17 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::vaend:    visitVAEnd(I); return 0;
   case Intrinsic::vacopy:   visitVACopy(I); return 0;
   case Intrinsic::returnaddress:
-    setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl, TLI.getPointerTy(),
+    setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl, TLI->getPointerTy(),
                              getValue(I.getArgOperand(0))));
     return 0;
   case Intrinsic::frameaddress:
-    setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, TLI.getPointerTy(),
+    setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, TLI->getPointerTy(),
                              getValue(I.getArgOperand(0))));
     return 0;
   case Intrinsic::setjmp:
-    return &"_setjmp"[!TLI.usesUnderscoreSetJmp()];
+    return &"_setjmp"[!TLI->usesUnderscoreSetJmp()];
   case Intrinsic::longjmp:
-    return &"_longjmp"[!TLI.usesUnderscoreLongJmp()];
+    return &"_longjmp"[!TLI->usesUnderscoreLongJmp()];
   case Intrinsic::memcpy: {
     // Assert for address < 256 since we support only user defined address
     // spaces.
@@ -4500,7 +4539,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
     MDNode *Variable = DI.getVariable();
     const Value *Address = DI.getAddress();
-    if (!Address || !DIVariable(Variable).Verify()) {
+    DIVariable DIVar(Variable);
+    assert((!DIVar || DIVar.isVariable()) &&
+      "Variable in DbgDeclareInst should be either null or a DIVariable.");
+    if (!Address || !DIVar) {
       DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
       return 0;
     }
@@ -4575,7 +4617,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   }
   case Intrinsic::dbg_value: {
     const DbgValueInst &DI = cast<DbgValueInst>(I);
-    if (!DIVariable(DI.getVariable()).Verify())
+    DIVariable DIVar(DI.getVariable());
+    assert((!DIVar || DIVar.isVariable()) &&
+      "Variable in DbgValueInst should be either null or a DIVariable.");
+    if (!DIVar)
       return 0;
 
     MDNode *Variable = DI.getVariable();
@@ -4658,16 +4703,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     return 0;
   case Intrinsic::eh_dwarf_cfa: {
     SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), sdl,
-                                        TLI.getPointerTy());
+                                        TLI->getPointerTy());
     SDValue Offset = DAG.getNode(ISD::ADD, sdl,
-                                 TLI.getPointerTy(),
+                                 TLI->getPointerTy(),
                                  DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, sdl,
-                                             TLI.getPointerTy()),
+                                             TLI->getPointerTy()),
                                  CfaArg);
     SDValue FA = DAG.getNode(ISD::FRAMEADDR, sdl,
-                             TLI.getPointerTy(),
-                             DAG.getConstant(0, TLI.getPointerTy()));
-    setValue(&I, DAG.getNode(ISD::ADD, sdl, TLI.getPointerTy(),
+                             TLI->getPointerTy(),
+                             DAG.getConstant(0, TLI->getPointerTy()));
+    setValue(&I, DAG.getNode(ISD::ADD, sdl, TLI->getPointerTy(),
                              FA, Offset));
     return 0;
   }
@@ -4757,7 +4802,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     ShOps[0] = ShAmt;
     ShOps[1] = DAG.getConstant(0, MVT::i32);
     ShAmt =  DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, &ShOps[0], 2);
-    EVT DestVT = TLI.getValueType(I.getType());
+    EVT DestVT = TLI->getValueType(I.getType());
     ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt);
     Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT,
                        DAG.getConstant(NewIntrinsic, MVT::i32),
@@ -4769,14 +4814,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::x86_avx_vinsertf128_ps_256:
   case Intrinsic::x86_avx_vinsertf128_si_256:
   case Intrinsic::x86_avx2_vinserti128: {
-    EVT DestVT = TLI.getValueType(I.getType());
-    EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType());
+    EVT DestVT = TLI->getValueType(I.getType());
+    EVT ElVT = TLI->getValueType(I.getArgOperand(1)->getType());
     uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue() & 1) *
                    ElVT.getVectorNumElements();
     Res = DAG.getNode(ISD::INSERT_SUBVECTOR, sdl, DestVT,
                       getValue(I.getArgOperand(0)),
                       getValue(I.getArgOperand(1)),
-                      DAG.getIntPtrConstant(Idx));
+                      DAG.getConstant(Idx, TLI->getVectorIdxTy()));
     setValue(&I, Res);
     return 0;
   }
@@ -4784,12 +4829,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::x86_avx_vextractf128_ps_256:
   case Intrinsic::x86_avx_vextractf128_si_256:
   case Intrinsic::x86_avx2_vextracti128: {
-    EVT DestVT = TLI.getValueType(I.getType());
+    EVT DestVT = TLI->getValueType(I.getType());
     uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(1))->getZExtValue() & 1) *
                    DestVT.getVectorNumElements();
     Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, sdl, DestVT,
                       getValue(I.getArgOperand(0)),
-                      DAG.getIntPtrConstant(Idx));
+                      DAG.getConstant(Idx, TLI->getVectorIdxTy()));
     setValue(&I, Res);
     return 0;
   }
@@ -4815,7 +4860,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     case Intrinsic::convertus:  Code = ISD::CVT_US; break;
     case Intrinsic::convertuu:  Code = ISD::CVT_UU; break;
     }
-    EVT DestVT = TLI.getValueType(I.getType());
+    EVT DestVT = TLI->getValueType(I.getType());
     const Value *Op1 = I.getArgOperand(0);
     Res = DAG.getConvertRndSat(DestVT, sdl, getValue(Op1),
                                DAG.getValueType(DestVT),
@@ -4831,23 +4876,23 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                             getValue(I.getArgOperand(1)), DAG));
     return 0;
   case Intrinsic::log:
-    setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
+    setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, *TLI));
     return 0;
   case Intrinsic::log2:
-    setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
+    setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, *TLI));
     return 0;
   case Intrinsic::log10:
-    setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
+    setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, *TLI));
     return 0;
   case Intrinsic::exp:
-    setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
+    setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, *TLI));
     return 0;
   case Intrinsic::exp2:
-    setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
+    setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, *TLI));
     return 0;
   case Intrinsic::pow:
     setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)),
-                           getValue(I.getArgOperand(1)), DAG, TLI));
+                           getValue(I.getArgOperand(1)), DAG, *TLI));
     return 0;
   case Intrinsic::sqrt:
   case Intrinsic::fabs:
@@ -4885,9 +4930,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                              getValue(I.getArgOperand(2))));
     return 0;
   case Intrinsic::fmuladd: {
-    EVT VT = TLI.getValueType(I.getType());
+    EVT VT = TLI->getValueType(I.getType());
     if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
-        TLI.isFMAFasterThanMulAndAdd(VT)){
+        TLI->isFMAFasterThanFMulAndFAdd(VT)) {
       setValue(&I, DAG.getNode(ISD::FMA, sdl,
                                getValue(I.getArgOperand(0)).getValueType(),
                                getValue(I.getArgOperand(0)),
@@ -4958,7 +5003,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::stacksave: {
     SDValue Op = getRoot();
     Res = DAG.getNode(ISD::STACKSAVE, sdl,
-                      DAG.getVTList(TLI.getPointerTy(), MVT::Other), &Op, 1);
+                      DAG.getVTList(TLI->getPointerTy(), MVT::Other), &Op, 1);
     setValue(&I, Res);
     DAG.setRoot(Res.getValue(1));
     return 0;
@@ -4972,7 +5017,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     // Emit code into the DAG to store the stack guard onto the stack.
     MachineFunction &MF = DAG.getMachineFunction();
     MachineFrameInfo *MFI = MF.getFrameInfo();
-    EVT PtrTy = TLI.getPointerTy();
+    EVT PtrTy = TLI->getPointerTy();
 
     SDValue Src = getValue(I.getArgOperand(0));   // The guard's value.
     AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1));
@@ -5034,7 +5079,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   }
   case Intrinsic::adjust_trampoline: {
     setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl,
-                             TLI.getPointerTy(),
+                             TLI->getPointerTy(),
                              getValue(I.getArgOperand(0))));
     return 0;
   }
@@ -5064,7 +5109,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::trap: {
     StringRef TrapFuncName = TM.Options.getTrapFunctionName();
     if (TrapFuncName.empty()) {
-      ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ? 
+      ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ?
         ISD::TRAP : ISD::DEBUGTRAP;
       DAG.setRoot(DAG.getNode(Op, sdl,MVT::Other, getRoot()));
       return 0;
@@ -5075,9 +5120,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                  false, false, false, false, 0, CallingConv::C,
                  /*isTailCall=*/false,
                  /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
-                 DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()),
+                 DAG.getExternalSymbol(TrapFuncName.data(),
+                                       TLI->getPointerTy()),
                  Args, DAG, sdl);
-    std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
+    std::pair<SDValue, SDValue> Result = TLI->LowerCallTo(CLI);
     DAG.setRoot(Result.second);
     return 0;
   }
@@ -5134,8 +5180,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     SmallVector<Value *, 4> Allocas;
     GetUnderlyingObjects(I.getArgOperand(1), Allocas, TD);
 
-    for (SmallVector<Value*, 4>::iterator Object = Allocas.begin(),
-         E = Allocas.end(); Object != E; ++Object) {
+    for (SmallVectorImpl<Value*>::iterator Object = Allocas.begin(),
+           E = Allocas.end(); Object != E; ++Object) {
       AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(*Object);
 
       // Could not find an Alloca.
@@ -5146,7 +5192,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
 
       SDValue Ops[2];
       Ops[0] = getRoot();
-      Ops[1] = DAG.getFrameIndex(FI, TLI.getPointerTy(), true);
+      Ops[1] = DAG.getFrameIndex(FI, TLI->getPointerTy(), true);
       unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END);
 
       Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops, 2);
@@ -5156,7 +5202,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   }
   case Intrinsic::invariant_start:
     // Discard region information.
-    setValue(&I, DAG.getUNDEF(TLI.getPointerTy()));
+    setValue(&I, DAG.getUNDEF(TLI->getPointerTy()));
     return 0;
   case Intrinsic::invariant_end:
     // Discard region information.
@@ -5182,26 +5228,27 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
 
   // Check whether the function can return without sret-demotion.
   SmallVector<ISD::OutputArg, 4> Outs;
-  GetReturnInfo(RetTy, CS.getAttributes(), Outs, TLI);
+  const TargetLowering *TLI = TM.getTargetLowering();
+  GetReturnInfo(RetTy, CS.getAttributes(), Outs, *TLI);
 
-  bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
-                                           DAG.getMachineFunction(),
-                                           FTy->isVarArg(), Outs,
-                                           FTy->getContext());
+  bool CanLowerReturn = TLI->CanLowerReturn(CS.getCallingConv(),
+                                            DAG.getMachineFunction(),
+                                            FTy->isVarArg(), Outs,
+                                            FTy->getContext());
 
   SDValue DemoteStackSlot;
   int DemoteStackIdx = -100;
 
   if (!CanLowerReturn) {
-    uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(
+    uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(
                       FTy->getReturnType());
-    unsigned Align  = TLI.getDataLayout()->getPrefTypeAlignment(
+    unsigned Align  = TLI->getDataLayout()->getPrefTypeAlignment(
                       FTy->getReturnType());
     MachineFunction &MF = DAG.getMachineFunction();
     DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
     Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType());
 
-    DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI.getPointerTy());
+    DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI->getPointerTy());
     Entry.Node = DemoteStackSlot;
     Entry.Ty = StackSlotPtrType;
     Entry.isSExt = false;
@@ -5262,14 +5309,14 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
   }
 
   // Check if target-independent constraints permit a tail call here.
-  // Target-dependent constraints are checked within TLI.LowerCallTo.
-  if (isTailCall && !isInTailCallPosition(CS, TLI))
+  // Target-dependent constraints are checked within TLI->LowerCallTo.
+  if (isTailCall && !isInTailCallPosition(CS, *TLI))
     isTailCall = false;
 
   TargetLowering::
   CallLoweringInfo CLI(getRoot(), RetTy, FTy, isTailCall, Callee, Args, DAG,
                        getCurSDLoc(), CS);
-  std::pair<SDValue,SDValue> Result = TLI.LowerCallTo(CLI);
+  std::pair<SDValue,SDValue> Result = TLI->LowerCallTo(CLI);
   assert((isTailCall || Result.second.getNode()) &&
          "Non-null chain expected with non-tail call!");
   assert((Result.second.getNode() || !Result.first.getNode()) &&
@@ -5282,14 +5329,14 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
     SmallVector<EVT, 1> PVTs;
     Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType());
 
-    ComputeValueVTs(TLI, PtrRetTy, PVTs);
+    ComputeValueVTs(*TLI, PtrRetTy, PVTs);
     assert(PVTs.size() == 1 && "Pointers should fit in one register");
     EVT PtrVT = PVTs[0];
 
     SmallVector<EVT, 4> RetTys;
     SmallVector<uint64_t, 4> Offsets;
     RetTy = FTy->getReturnType();
-    ComputeValueVTs(TLI, RetTy, RetTys, &Offsets);
+    ComputeValueVTs(*TLI, RetTy, RetTys, &Offsets);
 
     unsigned NumValues = RetTys.size();
     SmallVector<SDValue, 4> Values(NumValues);
@@ -5320,6 +5367,10 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
     // As a special case, a null chain means that a tail call has been emitted and
     // the DAG root is already updated.
     HasTailCall = true;
+
+    // Since there's no actual continuation from this block, nothing can be
+    // relying on us setting vregs for them.
+    PendingExports.clear();
   } else {
     DAG.setRoot(Result.second);
   }
@@ -5386,7 +5437,7 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
   SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurSDLoc(), Root,
                                         Ptr, MachinePointerInfo(PtrVal),
                                         false /*volatile*/,
-                                        false /*nontemporal*/, 
+                                        false /*nontemporal*/,
                                         false /*isinvariant*/, 1 /* align=1 */);
 
   if (!ConstantMemory)
@@ -5451,10 +5502,11 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
     // Require that we can find a legal MVT, and only do this if the target
     // supports unaligned loads of that type.  Expanding into byte loads would
     // bloat the code.
+    const TargetLowering *TLI = TM.getTargetLowering();
     if (ActuallyDoIt && Size->getZExtValue() > 4) {
       // TODO: Handle 5 byte compare as 4-byte + 1 byte.
       // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads.
-      if (!TLI.isTypeLegal(LoadVT) ||!TLI.allowsUnalignedMemoryAccesses(LoadVT))
+      if (!TLI->isTypeLegal(LoadVT) ||!TLI->allowsUnalignedMemoryAccesses(LoadVT))
         ActuallyDoIt = false;
     }
 
@@ -5464,7 +5516,7 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
 
       SDValue Res = DAG.getSetCC(getCurSDLoc(), MVT::i1, LHSVal, RHSVal,
                                  ISD::SETNE);
-      EVT CallVT = TLI.getValueType(I.getType(), true);
+      EVT CallVT = TLI->getValueType(I.getType(), true);
       setValue(&I, DAG.getZExtOrTrunc(Res, getCurSDLoc(), CallVT));
       return true;
     }
@@ -5622,7 +5674,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
   if (!RenameFn)
     Callee = getValue(I.getCalledValue());
   else
-    Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy());
+    Callee = DAG.getExternalSymbol(RenameFn,
+                                   TM.getTargetLowering()->getPointerTy());
 
   // Check if we can potentially perform a tail call. More detailed checking is
   // be done within LowerCallTo, after more information about the call is known.
@@ -5817,8 +5870,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
   /// ConstraintOperands - Information about all of the constraints.
   SDISelAsmOperandInfoVector ConstraintOperands;
 
+  const TargetLowering *TLI = TM.getTargetLowering();
   TargetLowering::AsmOperandInfoVector
-    TargetConstraints = TLI.ParseConstraints(CS);
+    TargetConstraints = TLI->ParseConstraints(CS);
 
   bool hasMemory = false;
 
@@ -5843,10 +5897,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
       // corresponding argument.
       assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
       if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
-        OpVT = TLI.getSimpleValueType(STy->getElementType(ResNo));
+        OpVT = TLI->getSimpleValueType(STy->getElementType(ResNo));
       } else {
         assert(ResNo == 0 && "Asm only has one result!");
-        OpVT = TLI.getSimpleValueType(CS.getType());
+        OpVT = TLI->getSimpleValueType(CS.getType());
       }
       ++ResNo;
       break;
@@ -5867,7 +5921,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
         OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
       }
 
-      OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, TD).
+      OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), *TLI, TD).
         getSimpleVT();
     }
 
@@ -5879,7 +5933,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
     else {
       for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) {
         TargetLowering::ConstraintType
-          CType = TLI.getConstraintType(OpInfo.Codes[j]);
+          CType = TLI->getConstraintType(OpInfo.Codes[j]);
         if (CType == TargetLowering::C_Memory) {
           hasMemory = true;
           break;
@@ -5911,11 +5965,11 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
 
       if (OpInfo.ConstraintVT != Input.ConstraintVT) {
         std::pair<unsigned, const TargetRegisterClass*> MatchRC =
-          TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
-                                           OpInfo.ConstraintVT);
+          TLI->getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
+                                            OpInfo.ConstraintVT);
         std::pair<unsigned, const TargetRegisterClass*> InputRC =
-          TLI.getRegForInlineAsmConstraint(Input.ConstraintCode,
-                                           Input.ConstraintVT);
+          TLI->getRegForInlineAsmConstraint(Input.ConstraintCode,
+                                            Input.ConstraintVT);
         if ((OpInfo.ConstraintVT.isInteger() !=
              Input.ConstraintVT.isInteger()) ||
             (MatchRC.second != InputRC.second)) {
@@ -5928,7 +5982,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
     }
 
     // Compute the constraint code and ConstraintType to use.
-    TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG);
+    TLI->ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG);
 
     if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
         OpInfo.Type == InlineAsm::isClobber)
@@ -5956,16 +6010,16 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
       if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
           isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) {
         OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal),
-                                                 TLI.getPointerTy());
+                                                 TLI->getPointerTy());
       } else {
         // Otherwise, create a stack slot and emit a store to it before the
         // asm.
         Type *Ty = OpVal->getType();
-        uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty);
-        unsigned Align  = TLI.getDataLayout()->getPrefTypeAlignment(Ty);
+        uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty);
+        unsigned Align  = TLI->getDataLayout()->getPrefTypeAlignment(Ty);
         MachineFunction &MF = DAG.getMachineFunction();
         int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
-        SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
+        SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI->getPointerTy());
         Chain = DAG.getStore(Chain, getCurSDLoc(),
                              OpInfo.CallOperand, StackSlot,
                              MachinePointerInfo::getFixedStack(SSFI),
@@ -5983,7 +6037,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
     // If this constraint is for a specific register, allocate it before
     // anything else.
     if (OpInfo.ConstraintType == TargetLowering::C_Register)
-      GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo);
+      GetRegistersForValue(DAG, *TLI, getCurSDLoc(), OpInfo);
   }
 
   // Second pass - Loop over all of the operands, assigning virtual or physregs
@@ -5994,7 +6048,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
     // C_Register operands have already been allocated, Other/Memory don't need
     // to be.
     if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass)
-      GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo);
+      GetRegistersForValue(DAG, *TLI, getCurSDLoc(), OpInfo);
   }
 
   // AsmNodeOperands - The operands for the ISD::INLINEASM node.
@@ -6002,7 +6056,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
   AsmNodeOperands.push_back(SDValue());  // reserve space for input chain
   AsmNodeOperands.push_back(
           DAG.getTargetExternalSymbol(IA->getAsmString().c_str(),
-                                      TLI.getPointerTy()));
+                                      TLI->getPointerTy()));
 
   // If we have a !srcloc metadata node associated with it, we want to attach
   // this to the ultimately generated inline asm machineinstr.  To do this, we
@@ -6025,7 +6079,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
     TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
 
     // Compute the constraint code and ConstraintType to use.
-    TLI.ComputeConstraintToUse(OpInfo, SDValue());
+    TLI->ComputeConstraintToUse(OpInfo, SDValue());
 
     // Ideally, we would only check against memory constraints.  However, the
     // meaning of an other constraint can be target-specific and we can't easily
@@ -6043,7 +6097,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
   }
 
   AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo,
-                                                  TLI.getPointerTy()));
+                                                  TLI->getPointerTy()));
 
   // Loop over all of the inputs, copying the operand values into the
   // appropriate registers and processing the output regs.
@@ -6065,7 +6119,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
         // Add information to the INLINEASM node to know about this output.
         unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
         AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags,
-                                                        TLI.getPointerTy()));
+                                                        TLI->getPointerTy()));
         AsmNodeOperands.push_back(OpInfo.CallOperand);
         break;
       }
@@ -6076,10 +6130,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
       // we can use.
       if (OpInfo.AssignedRegs.Regs.empty()) {
         LLVMContext &Ctx = *DAG.getContext();
-        Ctx.emitError(CS.getInstruction(),  
+        Ctx.emitError(CS.getInstruction(),
                       "couldn't allocate output register for constraint '" +
-                           Twine(OpInfo.ConstraintCode) + "'");
-        break;
+                          Twine(OpInfo.ConstraintCode) + "'");
+        return;
       }
 
       // If this is an indirect operand, store through the pointer after the
@@ -6096,13 +6150,11 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
 
       // Add information to the INLINEASM node to know that this register is
       // set.
-      OpInfo.AssignedRegs.AddInlineAsmOperands(OpInfo.isEarlyClobber ?
-                                           InlineAsm::Kind_RegDefEarlyClobber :
-                                               InlineAsm::Kind_RegDef,
-                                               false,
-                                               0,
-                                               DAG,
-                                               AsmNodeOperands);
+      OpInfo.AssignedRegs
+          .AddInlineAsmOperands(OpInfo.isEarlyClobber
+                                    ? InlineAsm::Kind_RegDefEarlyClobber
+                                    : InlineAsm::Kind_RegDef,
+                                false, 0, DAG, AsmNodeOperands);
       break;
     }
     case InlineAsm::isInput: {
@@ -6134,10 +6186,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
           if (OpInfo.isIndirect) {
             // This happens on gcc/testsuite/gcc.dg/pr8788-1.c
             LLVMContext &Ctx = *DAG.getContext();
-            Ctx.emitError(CS.getInstruction(),  "inline asm not supported yet:"
-                          " don't know how to handle tied "
-                          "indirect register inputs");
-            report_fatal_error("Cannot handle indirect register inputs!");
+            Ctx.emitError(CS.getInstruction(), "inline asm not supported yet:"
+                                               " don't know how to handle tied "
+                                               "indirect register inputs");
+            return;
           }
 
           RegsForValue MatchedRegs;
@@ -6147,14 +6199,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
           MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
           for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag);
                i != e; ++i) {
-            if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT))
+            if (const TargetRegisterClass *RC = TLI->getRegClassFor(RegVT))
               MatchedRegs.Regs.push_back(RegInfo.createVirtualRegister(RC));
             else {
               LLVMContext &Ctx = *DAG.getContext();
-              Ctx.emitError(CS.getInstruction(), "inline asm error: This value"
+              Ctx.emitError(CS.getInstruction(),
+                            "inline asm error: This value"
                             " type register class is not natively supported!");
-              report_fatal_error("inline asm error: This value type register "
-                                 "class is not natively supported!");
+              return;
             }
           }
           // Use the produced MatchedRegs object to
@@ -6174,7 +6226,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
         OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag,
                                                     OpInfo.getMatchedOperand());
         AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag,
-                                                        TLI.getPointerTy()));
+                                                        TLI->getPointerTy()));
         AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
         break;
       }
@@ -6186,34 +6238,34 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
 
       if (OpInfo.ConstraintType == TargetLowering::C_Other) {
         std::vector<SDValue> Ops;
-        TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode,
-                                         Ops, DAG);
+        TLI->LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode,
+                                          Ops, DAG);
         if (Ops.empty()) {
           LLVMContext &Ctx = *DAG.getContext();
           Ctx.emitError(CS.getInstruction(),
                         "invalid operand for inline asm constraint '" +
-                        Twine(OpInfo.ConstraintCode) + "'");
-          break;
+                            Twine(OpInfo.ConstraintCode) + "'");
+          return;
         }
 
         // Add information to the INLINEASM node to know about this input.
         unsigned ResOpType =
           InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size());
         AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
-                                                        TLI.getPointerTy()));
+                                                        TLI->getPointerTy()));
         AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end());
         break;
       }
 
       if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
         assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
-        assert(InOperandVal.getValueType() == TLI.getPointerTy() &&
+        assert(InOperandVal.getValueType() == TLI->getPointerTy() &&
                "Memory operands expect pointer values");
 
         // Add information to the INLINEASM node to know about this input.
         unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
         AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
-                                                        TLI.getPointerTy()));
+                                                        TLI->getPointerTy()));
         AsmNodeOperands.push_back(InOperandVal);
         break;
       }
@@ -6227,17 +6279,18 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
         LLVMContext &Ctx = *DAG.getContext();
         Ctx.emitError(CS.getInstruction(),
                       "Don't know how to handle indirect register inputs yet "
-                      "for constraint '" + Twine(OpInfo.ConstraintCode) + "'");
-        break;
+                      "for constraint '" +
+                          Twine(OpInfo.ConstraintCode) + "'");
+        return;
       }
 
       // Copy the input into the appropriate registers.
       if (OpInfo.AssignedRegs.Regs.empty()) {
         LLVMContext &Ctx = *DAG.getContext();
-        Ctx.emitError(CS.getInstruction(), 
+        Ctx.emitError(CS.getInstruction(),
                       "couldn't allocate input reg for constraint '" +
-                           Twine(OpInfo.ConstraintCode) + "'");
-        break;
+                          Twine(OpInfo.ConstraintCode) + "'");
+        return;
       }
 
       OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurSDLoc(),
@@ -6276,7 +6329,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
 
     // FIXME: Why don't we do this for inline asms with MRVs?
     if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) {
-      EVT ResultType = TLI.getValueType(CS.getType());
+      EVT ResultType = TLI->getValueType(CS.getType());
 
       // If any of the results of the inline asm is a vector, it may have the
       // wrong width/num elts.  This can happen for register classes that can
@@ -6342,8 +6395,9 @@ void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
 }
 
 void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
-  const DataLayout &TD = *TLI.getDataLayout();
-  SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurSDLoc(),
+  const TargetLowering *TLI = TM.getTargetLowering();
+  const DataLayout &TD = *TLI->getDataLayout();
+  SDValue V = DAG.getVAArg(TLI->getValueType(I.getType()), getCurSDLoc(),
                            getRoot(), getValue(I.getOperand(0)),
                            DAG.getSrcValue(I.getOperand(0)),
                            TD.getABITypeAlignment(I.getType()));
@@ -6566,7 +6620,8 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
          "Copy from a reg to the same reg!");
   assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg");
 
-  RegsForValue RFV(V->getContext(), TLI, Reg, V->getType());
+  const TargetLowering *TLI = TM.getTargetLowering();
+  RegsForValue RFV(V->getContext(), *TLI, Reg, V->getType());
   SDValue Chain = DAG.getEntryNode();
   RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, 0, V);
   PendingExports.push_back(Chain);
@@ -6596,13 +6651,15 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
 void SelectionDAGISel::LowerArguments(const Function &F) {
   SelectionDAG &DAG = SDB->DAG;
   SDLoc dl = SDB->getCurSDLoc();
+  const TargetLowering *TLI = getTargetLowering();
   const DataLayout *TD = TLI->getDataLayout();
   SmallVector<ISD::InputArg, 16> Ins;
 
   if (!FuncInfo->CanLowerReturn) {
     // Put in an sret pointer parameter before all the other parameters.
     SmallVector<EVT, 1> ValueVTs;
-    ComputeValueVTs(*TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
+    ComputeValueVTs(*getTargetLowering(),
+                    PointerType::getUnqual(F.getReturnType()), ValueVTs);
 
     // NOTE: Assuming that a pointer will never break down to more than one VT
     // or one register.
@@ -6771,7 +6828,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
 
     SDB->setValue(I, Res);
     if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
-      if (LoadSDNode *LNode = 
+      if (LoadSDNode *LNode =
           dyn_cast<LoadSDNode>(Res.getOperand(0).getNode()))
         if (FrameIndexSDNode *FI =
             dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
@@ -6869,15 +6926,17 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
       // Remember that this register needs to added to the machine PHI node as
       // the input for this MBB.
       SmallVector<EVT, 4> ValueVTs;
-      ComputeValueVTs(TLI, PN->getType(), ValueVTs);
+      const TargetLowering *TLI = TM.getTargetLowering();
+      ComputeValueVTs(*TLI, PN->getType(), ValueVTs);
       for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
         EVT VT = ValueVTs[vti];
-        unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT);
+        unsigned NumRegisters = TLI->getNumRegisters(*DAG.getContext(), VT);
         for (unsigned i = 0, e = NumRegisters; i != e; ++i)
           FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i));
         Reg += NumRegisters;
       }
     }
   }
+
   ConstantsOut.clear();
 }
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index bff92ca..ef73c00 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -1,4 +1,4 @@
-//===-- SelectionDAGBuilder.h - Selection-DAG building --------------------===//
+//===-- SelectionDAGBuilder.h - Selection-DAG building --------*- c++ -*---===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -278,12 +278,9 @@ private:
     BitTestInfo Cases;
   };
 
-public:
-  // TLI - This is information that describes the available target features we
-  // need for lowering.  This indicates when operations are unavailable,
-  // implemented with a libcall, etc.
+private:
   const TargetMachine &TM;
-  const TargetLowering &TLI;
+public:
   SelectionDAG &DAG;
   const DataLayout *TD;
   AliasAnalysis *AA;
@@ -328,7 +325,6 @@ public:
   SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo,
                       CodeGenOpt::Level ol)
     : CurInst(NULL), SDNodeOrder(0), TM(dag.getTarget()),
-      TLI(dag.getTargetLoweringInfo()),
       DAG(dag), FuncInfo(funcinfo), OptLevel(ol),
       HasTailCall(false) {
   }
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index b6cc7ea..d8ee221 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -92,9 +92,6 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::RETURNADDR:                 return "RETURNADDR";
   case ISD::FRAMEADDR:                  return "FRAMEADDR";
   case ISD::FRAME_TO_ARGS_OFFSET:       return "FRAME_TO_ARGS_OFFSET";
-  case ISD::EXCEPTIONADDR:              return "EXCEPTIONADDR";
-  case ISD::LSDAADDR:                   return "LSDAADDR";
-  case ISD::EHSELECTION:                return "EHSELECTION";
   case ISD::EH_RETURN:                  return "EH_RETURN";
   case ISD::EH_SJLJ_SETJMP:             return "EH_SJLJ_SETJMP";
   case ISD::EH_SJLJ_LONGJMP:            return "EH_SJLJ_LONGJMP";
@@ -500,8 +497,10 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
     DIScope
       Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext()));
     OS << " dbg:";
+    assert((!Scope || Scope.isScope()) &&
+      "Scope of a DebugLoc should be null or a DIScope.");
     // Omit the directory, since it's usually long and uninteresting.
-    if (Scope.Verify())
+    if (Scope)
       OS << Scope.getFilename();
     else
       OS << "<unknown>";
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 475017a..01da51c 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -19,6 +19,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
@@ -275,9 +276,9 @@ void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
 // SelectionDAGISel code
 //===----------------------------------------------------------------------===//
 
-SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm,
+SelectionDAGISel::SelectionDAGISel(TargetMachine &tm,
                                    CodeGenOpt::Level OL) :
-  MachineFunctionPass(ID), TM(tm), TLI(tm.getTargetLowering()),
+  MachineFunctionPass(ID), TM(tm),
   FuncInfo(new FunctionLoweringInfo(TM)),
   CurDAG(new SelectionDAG(tm, OL)),
   SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)),
@@ -401,7 +402,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   // Insert DBG_VALUE instructions for function arguments to the entry block.
   for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) {
     MachineInstr *MI = FuncInfo->ArgDbgValues[e-i-1];
-    unsigned Reg = MI->getOperand(0).getReg();
+    bool hasFI = MI->getOperand(0).isFI();
+    unsigned Reg = hasFI ? TRI.getFrameRegister(*MF) : MI->getOperand(0).getReg();
     if (TargetRegisterInfo::isPhysicalRegister(Reg))
       EntryMBB->insert(EntryMBB->begin(), MI);
     else {
@@ -414,16 +416,19 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
     // If Reg is live-in then update debug info to track its copy in a vreg.
     DenseMap<unsigned, unsigned>::iterator LDI = LiveInMap.find(Reg);
     if (LDI != LiveInMap.end()) {
+      assert(!hasFI && "There's no handling of frame pointer updating here yet "
+                       "- add if needed");
       MachineInstr *Def = RegInfo->getVRegDef(LDI->second);
       MachineBasicBlock::iterator InsertPos = Def;
       const MDNode *Variable =
         MI->getOperand(MI->getNumOperands()-1).getMetadata();
-      unsigned Offset = MI->getOperand(1).getImm();
+      bool IsIndirect = MI->getOperand(1).isImm();
+      unsigned Offset = IsIndirect ? MI->getOperand(1).getImm() : 0;
       // Def is never a terminator here, so it is ok to increment InsertPos.
       BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(),
-              TII.get(TargetOpcode::DBG_VALUE))
-        .addReg(LDI->second, RegState::Debug)
-        .addImm(Offset).addMetadata(Variable);
+              TII.get(TargetOpcode::DBG_VALUE),
+              IsIndirect,
+              LDI->second, Offset, Variable);
 
       // If this vreg is directly copied into an exported register then
       // that COPY instructions also need DBG_VALUE, if it is the only
@@ -442,9 +447,10 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
       if (CopyUseMI) {
         MachineInstr *NewMI =
           BuildMI(*MF, CopyUseMI->getDebugLoc(),
-                  TII.get(TargetOpcode::DBG_VALUE))
-          .addReg(CopyUseMI->getOperand(0).getReg(), RegState::Debug)
-          .addImm(Offset).addMetadata(Variable);
+                  TII.get(TargetOpcode::DBG_VALUE),
+                  IsIndirect,
+                  CopyUseMI->getOperand(0).getReg(),
+                  Offset, Variable);
         MachineBasicBlock::iterator Pos = CopyUseMI;
         EntryMBB->insertAfter(Pos, NewMI);
       }
@@ -825,12 +831,14 @@ void SelectionDAGISel::PrepareEHLandingPad() {
     .addSym(Label);
 
   // Mark exception register as live in.
-  unsigned Reg = TLI->getExceptionPointerRegister();
-  if (Reg) MBB->addLiveIn(Reg);
+  const TargetLowering *TLI = getTargetLowering();
+  const TargetRegisterClass *PtrRC = TLI->getRegClassFor(TLI->getPointerTy());
+  if (unsigned Reg = TLI->getExceptionPointerRegister())
+    FuncInfo->ExceptionPointerVirtReg = MBB->addLiveIn(Reg, PtrRC);
 
   // Mark exception selector register as live in.
-  Reg = TLI->getExceptionSelectorRegister();
-  if (Reg) MBB->addLiveIn(Reg);
+  if (unsigned Reg = TLI->getExceptionSelectorRegister())
+    FuncInfo->ExceptionSelectorVirtReg = MBB->addLiveIn(Reg, PtrRC);
 }
 
 /// isFoldedOrDeadInstruction - Return true if the specified instruction is
@@ -929,7 +937,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
   // Initialize the Fast-ISel state, if needed.
   FastISel *FastIS = 0;
   if (TM.Options.EnableFastISel)
-    FastIS = TLI->createFastISel(*FuncInfo, LibInfo);
+    FastIS = getTargetLowering()->createFastISel(*FuncInfo, LibInfo);
 
   // Iterate over all basic blocks in the function.
   ReversePostOrderTraversal<const Function*> RPOT(&Fn);
@@ -968,6 +976,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
     FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI();
 
     // Setup an EH landing-pad block.
+    FuncInfo->ExceptionPointerVirtReg = 0;
+    FuncInfo->ExceptionSelectorVirtReg = 0;
     if (FuncInfo->MBB->isLandingPad())
       PrepareEHLandingPad();
 
@@ -2076,7 +2086,7 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table,
     Result = !::CheckOpcode(Table, Index, N.getNode());
     return Index;
   case SelectionDAGISel::OPC_CheckType:
-    Result = !::CheckType(Table, Index, N, SDISel.TLI);
+    Result = !::CheckType(Table, Index, N, SDISel.getTargetLowering());
     return Index;
   case SelectionDAGISel::OPC_CheckChild0Type:
   case SelectionDAGISel::OPC_CheckChild1Type:
@@ -2086,14 +2096,14 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table,
   case SelectionDAGISel::OPC_CheckChild5Type:
   case SelectionDAGISel::OPC_CheckChild6Type:
   case SelectionDAGISel::OPC_CheckChild7Type:
-    Result = !::CheckChildType(Table, Index, N, SDISel.TLI,
+    Result = !::CheckChildType(Table, Index, N, SDISel.getTargetLowering(),
                         Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Type);
     return Index;
   case SelectionDAGISel::OPC_CheckCondCode:
     Result = !::CheckCondCode(Table, Index, N);
     return Index;
   case SelectionDAGISel::OPC_CheckValueType:
-    Result = !::CheckValueType(Table, Index, N, SDISel.TLI);
+    Result = !::CheckValueType(Table, Index, N, SDISel.getTargetLowering());
     return Index;
   case SelectionDAGISel::OPC_CheckInteger:
     Result = !::CheckInteger(Table, Index, N);
@@ -2386,7 +2396,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
       continue;
 
     case OPC_CheckType:
-      if (!::CheckType(MatcherTable, MatcherIndex, N, TLI)) break;
+      if (!::CheckType(MatcherTable, MatcherIndex, N, getTargetLowering()))
+        break;
       continue;
 
     case OPC_SwitchOpcode: {
@@ -2433,7 +2444,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
 
         MVT CaseVT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
         if (CaseVT == MVT::iPTR)
-          CaseVT = TLI->getPointerTy();
+          CaseVT = getTargetLowering()->getPointerTy();
 
         // If the VT matches, then we will execute this case.
         if (CurNodeVT == CaseVT)
@@ -2455,7 +2466,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
     case OPC_CheckChild2Type: case OPC_CheckChild3Type:
     case OPC_CheckChild4Type: case OPC_CheckChild5Type:
     case OPC_CheckChild6Type: case OPC_CheckChild7Type:
-      if (!::CheckChildType(MatcherTable, MatcherIndex, N, TLI,
+      if (!::CheckChildType(MatcherTable, MatcherIndex, N, getTargetLowering(),
                             Opcode-OPC_CheckChild0Type))
         break;
       continue;
@@ -2463,7 +2474,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
       if (!::CheckCondCode(MatcherTable, MatcherIndex, N)) break;
       continue;
     case OPC_CheckValueType:
-      if (!::CheckValueType(MatcherTable, MatcherIndex, N, TLI)) break;
+      if (!::CheckValueType(MatcherTable, MatcherIndex, N, getTargetLowering()))
+        break;
       continue;
     case OPC_CheckInteger:
       if (!::CheckInteger(MatcherTable, MatcherIndex, N)) break;
@@ -2655,7 +2667,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
       for (unsigned i = 0; i != NumVTs; ++i) {
         MVT::SimpleValueType VT =
           (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
-        if (VT == MVT::iPTR) VT = TLI->getPointerTy().SimpleTy;
+        if (VT == MVT::iPTR) VT = getTargetLowering()->getPointerTy().SimpleTy;
         VTs.push_back(VT);
       }
 
@@ -2757,8 +2769,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
         bool mayStore = MCID.mayStore();
 
         unsigned NumMemRefs = 0;
-        for (SmallVector<MachineMemOperand*, 2>::const_iterator I =
-             MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) {
+        for (SmallVectorImpl<MachineMemOperand *>::const_iterator I =
+               MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) {
           if ((*I)->isLoad()) {
             if (mayLoad)
               ++NumMemRefs;
@@ -2774,8 +2786,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
           MF->allocateMemRefsArray(NumMemRefs);
 
         MachineSDNode::mmo_iterator MemRefsPos = MemRefs;
-        for (SmallVector<MachineMemOperand*, 2>::const_iterator I =
-             MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) {
+        for (SmallVectorImpl<MachineMemOperand *>::const_iterator I =
+               MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) {
           if ((*I)->isLoad()) {
             if (mayLoad)
               *MemRefsPos++ = *I;
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index f02e3d6..e3c6306 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -510,7 +510,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     // into an AND, as we know the bits will be cleared.
     //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
     // NB: it is okay if more bits are known than are requested
-    if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known on one side 
+    if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known on one side
       if (KnownOne == KnownOne2) { // set bits are the same on both sides
         EVT VT = Op.getValueType();
         SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, VT);
@@ -1992,7 +1992,7 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
 
 std::pair<unsigned, const TargetRegisterClass*> TargetLowering::
 getRegForInlineAsmConstraint(const std::string &Constraint,
-                             EVT VT) const {
+                             MVT VT) const {
   if (Constraint[0] != '{')
     return std::make_pair(0u, static_cast<TargetRegisterClass*>(0));
   assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?");
diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp
index 2feea59..6c826de 100644
--- a/lib/CodeGen/ShrinkWrapping.cpp
+++ b/lib/CodeGen/ShrinkWrapping.cpp
@@ -554,7 +554,7 @@ bool PEI::calculateSets(MachineFunction &Fn) {
 /// _outside_ the computed minimal placement regions have been covered.
 ///
 bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB,
-                               SmallVector<MachineBasicBlock*, 4>& blks) {
+                               SmallVectorImpl<MachineBasicBlock *> &blks) {
   if (MBB->succ_size() < 2 && MBB->pred_size() < 2) {
     bool processThisBlock = false;
     for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
@@ -629,7 +629,7 @@ bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB,
 /// addUsesForTopLevelLoops - add uses for CSRs used inside top
 /// level loops to the exit blocks of those loops.
 ///
-bool PEI::addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks) {
+bool PEI::addUsesForTopLevelLoops(SmallVectorImpl<MachineBasicBlock *> &blks) {
   bool addedUses = false;
 
   // Place restores for top level loops where needed.
@@ -674,7 +674,7 @@ bool PEI::addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks) {
 /// multi-entry/exit regions.
 ///
 bool PEI::calcSpillPlacements(MachineBasicBlock* MBB,
-                              SmallVector<MachineBasicBlock*, 4> &blks,
+                              SmallVectorImpl<MachineBasicBlock *> &blks,
                               CSRegBlockMap &prevSpills) {
   bool placedSpills = false;
   // Intersect (CSRegs - AnticIn[P]) for P in Predecessors(MBB)
@@ -736,7 +736,7 @@ bool PEI::calcSpillPlacements(MachineBasicBlock* MBB,
 /// multi-entry/exit regions.
 ///
 bool PEI::calcRestorePlacements(MachineBasicBlock* MBB,
-                                SmallVector<MachineBasicBlock*, 4> &blks,
+                                SmallVectorImpl<MachineBasicBlock *> &blks,
                                 CSRegBlockMap &prevRestores) {
   bool placedRestores = false;
   // Intersect (CSRegs - AvailOut[S]) for S in Successors(MBB)
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index 23984e9..2fc8f46 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -43,7 +43,7 @@ STATISTIC(NumSpilled, "Number of registers live across unwind edges");
 
 namespace {
   class SjLjEHPrepare : public FunctionPass {
-    const TargetLoweringBase *TLI;
+    const TargetMachine *TM;
     Type *FunctionContextTy;
     Constant *RegisterFn;
     Constant *UnregisterFn;
@@ -58,8 +58,8 @@ namespace {
     AllocaInst *FuncCtx;
   public:
     static char ID; // Pass identification, replacement for typeid
-    explicit SjLjEHPrepare(const TargetLoweringBase *tli = NULL)
-      : FunctionPass(ID), TLI(tli) { }
+    explicit SjLjEHPrepare(const TargetMachine *TM)
+      : FunctionPass(ID), TM(TM) { }
     bool doInitialization(Module &M);
     bool runOnFunction(Function &F);
 
@@ -82,8 +82,8 @@ namespace {
 char SjLjEHPrepare::ID = 0;
 
 // Public Interface To the SjLjEHPrepare pass.
-FunctionPass *llvm::createSjLjEHPreparePass(const TargetLoweringBase *TLI) {
-  return new SjLjEHPrepare(TLI);
+FunctionPass *llvm::createSjLjEHPreparePass(const TargetMachine *TM) {
+  return new SjLjEHPrepare(TM);
 }
 // doInitialization - Set up decalarations and types needed to process
 // exceptions.
@@ -190,6 +190,7 @@ setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) {
   // Create an alloca for the incoming jump buffer ptr and the new jump buffer
   // that needs to be restored on all exits from the function. This is an alloca
   // because the value needs to be added to the global context list.
+  const TargetLowering *TLI = TM->getTargetLowering();
   unsigned Align =
     TLI->getDataLayout()->getPrefTypeAlignment(FunctionContextTy);
   FuncCtx =
diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp
index c5bbba3..10a93b7 100644
--- a/lib/CodeGen/SpillPlacement.cpp
+++ b/lib/CodeGen/SpillPlacement.cpp
@@ -31,8 +31,8 @@
 #include "SpillPlacement.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/CodeGen/EdgeBundles.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/Passes.h"
@@ -53,11 +53,16 @@ char &llvm::SpillPlacementID = SpillPlacement::ID;
 
 void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
+  AU.addRequired<MachineBlockFrequencyInfo>();
   AU.addRequiredTransitive<EdgeBundles>();
   AU.addRequiredTransitive<MachineLoopInfo>();
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
+/// Decision threshold. A node gets the output value 0 if the weighted sum of
+/// its inputs falls in the open interval (-Threshold;Threshold).
+static const BlockFrequency Threshold = 2;
+
 /// Node - Each edge bundle corresponds to a Hopfield node.
 ///
 /// The node contains precomputed frequency data that only depends on the CFG,
@@ -68,31 +73,25 @@ void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const {
 /// because all weights are positive.
 ///
 struct SpillPlacement::Node {
-  /// Scale - Inverse block frequency feeding into[0] or out of[1] the bundle.
-  /// Ideally, these two numbers should be identical, but inaccuracies in the
-  /// block frequency estimates means that we need to normalize ingoing and
-  /// outgoing frequencies separately so they are commensurate.
-  float Scale[2];
-
-  /// Bias - Normalized contributions from non-transparent blocks.
-  /// A bundle connected to a MustSpill block has a huge negative bias,
-  /// otherwise it is a number in the range [-2;2].
-  float Bias;
+  /// BiasN - Sum of blocks that prefer a spill.
+  BlockFrequency BiasN;
+  /// BiasP - Sum of blocks that prefer a register.
+  BlockFrequency BiasP;
 
   /// Value - Output value of this node computed from the Bias and links.
-  /// This is always in the range [-1;1]. A positive number means the variable
-  /// should go in a register through this bundle.
-  float Value;
+  /// This is always on of the values {-1, 0, 1}. A positive number means the
+  /// variable should go in a register through this bundle.
+  int Value;
 
-  typedef SmallVector<std::pair<float, unsigned>, 4> LinkVector;
+  typedef SmallVector<std::pair<BlockFrequency, unsigned>, 4> LinkVector;
 
   /// Links - (Weight, BundleNo) for all transparent blocks connecting to other
-  /// bundles. The weights are all positive and add up to at most 2, weights
-  /// from ingoing and outgoing nodes separately add up to a most 1. The weight
-  /// sum can be less than 2 when the variable is not live into / out of some
-  /// connected basic blocks.
+  /// bundles. The weights are all positive block frequencies.
   LinkVector Links;
 
+  /// SumLinkWeights - Cached sum of the weights of all links + ThresHold.
+  BlockFrequency SumLinkWeights;
+
   /// preferReg - Return true when this node prefers to be in a register.
   bool preferReg() const {
     // Undecided nodes (Value==0) go on the stack.
@@ -101,28 +100,24 @@ struct SpillPlacement::Node {
 
   /// mustSpill - Return True if this node is so biased that it must spill.
   bool mustSpill() const {
-    // Actually, we must spill if Bias < sum(weights).
-    // It may be worth it to compute the weight sum here?
-    return Bias < -2.0f;
-  }
-
-  /// Node - Create a blank Node.
-  Node() {
-    Scale[0] = Scale[1] = 0;
+    // We must spill if Bias < -sum(weights) or the MustSpill flag was set.
+    // BiasN is saturated when MustSpill is set, make sure this still returns
+    // true when the RHS saturates. Note that SumLinkWeights includes Threshold.
+    return BiasN >= BiasP + SumLinkWeights;
   }
 
   /// clear - Reset per-query data, but preserve frequencies that only depend on
   // the CFG.
   void clear() {
-    Bias = Value = 0;
+    BiasN = BiasP = Value = 0;
+    SumLinkWeights = Threshold;
     Links.clear();
   }
 
   /// addLink - Add a link to bundle b with weight w.
-  /// out=0 for an ingoing link, and 1 for an outgoing link.
-  void addLink(unsigned b, float w, bool out) {
-    // Normalize w relative to all connected blocks from that direction.
-    w *= Scale[out];
+  void addLink(unsigned b, BlockFrequency w) {
+    // Update cached sum.
+    SumLinkWeights += w;
 
     // There can be multiple links to the same bundle, add them up.
     for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I)
@@ -134,33 +129,48 @@ struct SpillPlacement::Node {
     Links.push_back(std::make_pair(w, b));
   }
 
-  /// addBias - Bias this node from an ingoing[0] or outgoing[1] link.
-  /// Return the change to the total number of positive biases.
-  void addBias(float w, bool out) {
-    // Normalize w relative to all connected blocks from that direction.
-    w *= Scale[out];
-    Bias += w;
+  /// addBias - Bias this node.
+  void addBias(BlockFrequency freq, BorderConstraint direction) {
+    switch (direction) {
+    default:
+      break;
+    case PrefReg:
+      BiasP += freq;
+      break;
+    case PrefSpill:
+      BiasN += freq;
+      break;
+    case MustSpill:
+      BiasN = BlockFrequency::getMaxFrequency();
+      break;
+    }
   }
 
   /// update - Recompute Value from Bias and Links. Return true when node
   /// preference changes.
   bool update(const Node nodes[]) {
     // Compute the weighted sum of inputs.
-    float Sum = Bias;
-    for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I)
-      Sum += I->first * nodes[I->second].Value;
+    BlockFrequency SumN = BiasN;
+    BlockFrequency SumP = BiasP;
+    for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I) {
+      if (nodes[I->second].Value == -1)
+        SumN += I->first;
+      else if (nodes[I->second].Value == 1)
+        SumP += I->first;
+    }
 
-    // The weighted sum is going to be in the range [-2;2]. Ideally, we should
-    // simply set Value = sign(Sum), but we will add a dead zone around 0 for
-    // two reasons:
+    // Each weighted sum is going to be less than the total frequency of the
+    // bundle. Ideally, we should simply set Value = sign(SumP - SumN), but we
+    // will add a dead zone around 0 for two reasons:
+    //
     //  1. It avoids arbitrary bias when all links are 0 as is possible during
     //     initial iterations.
     //  2. It helps tame rounding errors when the links nominally sum to 0.
-    const float Thres = 1e-4f;
+    //
     bool Before = preferReg();
-    if (Sum < -Thres)
+    if (SumN >= SumP + Threshold)
       Value = -1;
-    else if (Sum > Thres)
+    else if (SumP >= SumN + Threshold)
       Value = 1;
     else
       Value = 0;
@@ -177,22 +187,13 @@ bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) {
   nodes = new Node[bundles->getNumBundles()];
 
   // Compute total ingoing and outgoing block frequencies for all bundles.
-  BlockFrequency.resize(mf.getNumBlockIDs());
+  BlockFrequencies.resize(mf.getNumBlockIDs());
+  MachineBlockFrequencyInfo &MBFI = getAnalysis<MachineBlockFrequencyInfo>();
   for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I) {
-    float Freq = LiveIntervals::getSpillWeight(true, false,
-                                               loops->getLoopDepth(I));
     unsigned Num = I->getNumber();
-    BlockFrequency[Num] = Freq;
-    nodes[bundles->getBundle(Num, 1)].Scale[0] += Freq;
-    nodes[bundles->getBundle(Num, 0)].Scale[1] += Freq;
+    BlockFrequencies[Num] = MBFI.getBlockFreq(I);
   }
 
-  // Scales are reciprocal frequencies.
-  for (unsigned i = 0, e = bundles->getNumBundles(); i != e; ++i)
-    for (unsigned d = 0; d != 2; ++d)
-      if (nodes[i].Scale[d] > 0)
-        nodes[i].Scale[d] = 1 / nodes[i].Scale[d];
-
   // We never change the function.
   return false;
 }
@@ -213,12 +214,15 @@ void SpillPlacement::activate(unsigned n) {
   // landing pads, or loops with many 'continue' statements. It is difficult to
   // allocate registers when so many different blocks are involved.
   //
-  // Give a small negative bias to large bundles such that 1/32 of the
-  // connected blocks need to be interested before we consider expanding the
-  // region through the bundle. This helps compile time by limiting the number
-  // of blocks visited and the number of links in the Hopfield network.
-  if (bundles->getBlocks(n).size() > 100)
-    nodes[n].Bias = -0.0625f;
+  // Give a small negative bias to large bundles such that a substantial
+  // fraction of the connected blocks need to be interested before we consider
+  // expanding the region through the bundle. This helps compile time by
+  // limiting the number of blocks visited and the number of links in the
+  // Hopfield network.
+  if (bundles->getBlocks(n).size() > 100) {
+    nodes[n].BiasP = 0;
+    nodes[n].BiasN = (BlockFrequency::getEntryFrequency() / 16);
+  }
 }
 
 
@@ -227,27 +231,20 @@ void SpillPlacement::activate(unsigned n) {
 void SpillPlacement::addConstraints(ArrayRef<BlockConstraint> LiveBlocks) {
   for (ArrayRef<BlockConstraint>::iterator I = LiveBlocks.begin(),
        E = LiveBlocks.end(); I != E; ++I) {
-    float Freq = getBlockFrequency(I->Number);
-    const float Bias[] = {
-      0,           // DontCare,
-      1,           // PrefReg,
-      -1,          // PrefSpill
-      0,           // PrefBoth
-      -HUGE_VALF   // MustSpill
-    };
+    BlockFrequency Freq = BlockFrequencies[I->Number];
 
     // Live-in to block?
     if (I->Entry != DontCare) {
       unsigned ib = bundles->getBundle(I->Number, 0);
       activate(ib);
-      nodes[ib].addBias(Freq * Bias[I->Entry], 1);
+      nodes[ib].addBias(Freq, I->Entry);
     }
 
     // Live-out from block?
     if (I->Exit != DontCare) {
       unsigned ob = bundles->getBundle(I->Number, 1);
       activate(ob);
-      nodes[ob].addBias(Freq * Bias[I->Exit], 0);
+      nodes[ob].addBias(Freq, I->Exit);
     }
   }
 }
@@ -256,15 +253,15 @@ void SpillPlacement::addConstraints(ArrayRef<BlockConstraint> LiveBlocks) {
 void SpillPlacement::addPrefSpill(ArrayRef<unsigned> Blocks, bool Strong) {
   for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end();
        I != E; ++I) {
-    float Freq = getBlockFrequency(*I);
+    BlockFrequency Freq = BlockFrequencies[*I];
     if (Strong)
       Freq += Freq;
     unsigned ib = bundles->getBundle(*I, 0);
     unsigned ob = bundles->getBundle(*I, 1);
     activate(ib);
     activate(ob);
-    nodes[ib].addBias(-Freq, 1);
-    nodes[ob].addBias(-Freq, 0);
+    nodes[ib].addBias(Freq, PrefSpill);
+    nodes[ob].addBias(Freq, PrefSpill);
   }
 }
 
@@ -284,9 +281,9 @@ void SpillPlacement::addLinks(ArrayRef<unsigned> Links) {
       Linked.push_back(ib);
     if (nodes[ob].Links.empty() && !nodes[ob].mustSpill())
       Linked.push_back(ob);
-    float Freq = getBlockFrequency(Number);
-    nodes[ib].addLink(ob, Freq, 1);
-    nodes[ob].addLink(ib, Freq, 0);
+    BlockFrequency Freq = BlockFrequencies[Number];
+    nodes[ib].addLink(ob, Freq);
+    nodes[ob].addLink(ib, Freq);
   }
 }
 
diff --git a/lib/CodeGen/SpillPlacement.h b/lib/CodeGen/SpillPlacement.h
index fc412f8..105516b 100644
--- a/lib/CodeGen/SpillPlacement.h
+++ b/lib/CodeGen/SpillPlacement.h
@@ -30,6 +30,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/BlockFrequency.h"
 
 namespace llvm {
 
@@ -57,7 +58,7 @@ class SpillPlacement  : public MachineFunctionPass {
   SmallVector<unsigned, 8> RecentPositive;
 
   // Block frequencies are computed once. Indexed by block number.
-  SmallVector<float, 4> BlockFrequency;
+  SmallVector<BlockFrequency, 4> BlockFrequencies;
 
 public:
   static char ID; // Pass identification, replacement for typeid.
@@ -139,8 +140,8 @@ public:
 
   /// getBlockFrequency - Return the estimated block execution frequency per
   /// function invocation.
-  float getBlockFrequency(unsigned Number) const {
-    return BlockFrequency[Number];
+  BlockFrequency getBlockFrequency(unsigned Number) const {
+    return BlockFrequencies[Number];
   }
 
 private:
diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp
index 0a3818e..e717fac 100644
--- a/lib/CodeGen/SplitKit.cpp
+++ b/lib/CodeGen/SplitKit.cpp
@@ -325,12 +325,14 @@ void SplitAnalysis::analyze(const LiveInterval *li) {
 SplitEditor::SplitEditor(SplitAnalysis &sa,
                          LiveIntervals &lis,
                          VirtRegMap &vrm,
-                         MachineDominatorTree &mdt)
+                         MachineDominatorTree &mdt,
+                         MachineBlockFrequencyInfo &mbfi)
   : SA(sa), LIS(lis), VRM(vrm),
     MRI(vrm.getMachineFunction().getRegInfo()),
     MDT(mdt),
     TII(*vrm.getMachineFunction().getTarget().getInstrInfo()),
     TRI(*vrm.getMachineFunction().getTarget().getRegisterInfo()),
+    MBFI(mbfi),
     Edit(0),
     OpenIdx(0),
     SpillMode(SM_Partition),
@@ -1119,7 +1121,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
   }
 
   // Calculate spill weight and allocation hints for new intervals.
-  Edit->calculateRegClassAndHint(VRM.getMachineFunction(), SA.Loops);
+  Edit->calculateRegClassAndHint(VRM.getMachineFunction(), SA.Loops, MBFI);
 
   assert(!LRMap || LRMap->size() == Edit->size());
 }
diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h
index 4005a3d..f029c73 100644
--- a/lib/CodeGen/SplitKit.h
+++ b/lib/CodeGen/SplitKit.h
@@ -27,6 +27,7 @@ class ConnectedVNInfoEqClasses;
 class LiveInterval;
 class LiveIntervals;
 class LiveRangeEdit;
+class MachineBlockFrequencyInfo;
 class MachineInstr;
 class MachineLoopInfo;
 class MachineRegisterInfo;
@@ -215,6 +216,7 @@ class SplitEditor {
   MachineDominatorTree &MDT;
   const TargetInstrInfo &TII;
   const TargetRegisterInfo &TRI;
+  const MachineBlockFrequencyInfo &MBFI;
 
 public:
 
@@ -349,7 +351,7 @@ public:
   /// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
   /// Newly created intervals will be appended to newIntervals.
   SplitEditor(SplitAnalysis &SA, LiveIntervals&, VirtRegMap&,
-              MachineDominatorTree&);
+              MachineDominatorTree&, MachineBlockFrequencyInfo &);
 
   /// reset - Prepare for a new split.
   void reset(LiveRangeEdit&, ComplementSpillMode = SM_Partition);
diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp
index 5f4c68b..faaa6e7 100644
--- a/lib/CodeGen/StackColoring.cpp
+++ b/lib/CodeGen/StackColoring.cpp
@@ -310,9 +310,9 @@ void StackColoring::calculateLocalLiveness() {
 
     SmallPtrSet<const MachineBasicBlock*, 8> NextBBSet;
 
-    for (SmallVector<const MachineBasicBlock*, 8>::iterator
-         PI = BasicBlockNumbering.begin(), PE = BasicBlockNumbering.end();
-         PI != PE; ++PI) {
+    for (SmallVectorImpl<const MachineBasicBlock *>::iterator
+           PI = BasicBlockNumbering.begin(), PE = BasicBlockNumbering.end();
+           PI != PE; ++PI) {
 
       const MachineBasicBlock *BB = *PI;
       if (!BBSet.count(BB)) continue;
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index 389793e..4c56380 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -33,6 +33,7 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Target/TargetLowering.h"
+#include <cstdlib>
 using namespace llvm;
 
 STATISTIC(NumFunProtected, "Number of functions protected");
@@ -41,9 +42,11 @@ STATISTIC(NumAddrTaken, "Number of local variables that have their address"
 
 namespace {
   class StackProtector : public FunctionPass {
+    const TargetMachine *TM;
+
     /// TLI - Keep a pointer of a TargetLowering to consult for determining
     /// target type sizes.
-    const TargetLoweringBase *const TLI;
+    const TargetLoweringBase *TLI;
     const Triple Trip;
 
     Function *F;
@@ -51,6 +54,10 @@ namespace {
 
     DominatorTree *DT;
 
+    /// \brief The minimum size of buffers that will receive stack smashing
+    /// protection when -fstack-protection is used.
+    unsigned SSPBufferSize;
+
     /// VisitedPHIs - The set of PHI nodes visited when determining
     /// if a variable's reference has been taken.  This set 
     /// is maintained to ensure we don't visit the same PHI node multiple
@@ -83,12 +90,12 @@ namespace {
     bool RequiresStackProtector();
   public:
     static char ID;             // Pass identification, replacement for typeid.
-    StackProtector() : FunctionPass(ID), TLI(0) {
+    StackProtector() : FunctionPass(ID), TM(0), TLI(0), SSPBufferSize(0) {
       initializeStackProtectorPass(*PassRegistry::getPassRegistry());
     }
-    StackProtector(const TargetLoweringBase *tli)
-        : FunctionPass(ID), TLI(tli),
-          Trip(tli->getTargetMachine().getTargetTriple()) {
+    StackProtector(const TargetMachine *TM)
+      : FunctionPass(ID), TM(TM), TLI(0), Trip(TM->getTargetTriple()),
+        SSPBufferSize(8) {
       initializeStackProtectorPass(*PassRegistry::getPassRegistry());
     }
 
@@ -104,17 +111,24 @@ char StackProtector::ID = 0;
 INITIALIZE_PASS(StackProtector, "stack-protector",
                 "Insert stack protectors", false, false)
 
-FunctionPass *llvm::createStackProtectorPass(const TargetLoweringBase *tli) {
-  return new StackProtector(tli);
+FunctionPass *llvm::createStackProtectorPass(const TargetMachine *TM) {
+  return new StackProtector(TM);
 }
 
 bool StackProtector::runOnFunction(Function &Fn) {
   F = &Fn;
   M = F->getParent();
   DT = getAnalysisIfAvailable<DominatorTree>();
+  TLI = TM->getTargetLowering();
 
   if (!RequiresStackProtector()) return false;
 
+  Attribute Attr =
+    Fn.getAttributes().getAttribute(AttributeSet::FunctionIndex,
+                                    "stack-protector-buffer-size");
+  if (Attr.isStringAttribute())
+    SSPBufferSize = atoi(Attr.getValueAsString().data());
+
   ++NumFunProtected;
   return InsertStackProtectors();
 }
@@ -130,7 +144,6 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool Strong,
     // protector
     if (Strong)
       return true;
-    const TargetMachine &TM = TLI->getTargetMachine();
     if (!AT->getElementType()->isIntegerTy(8)) {
       // If we're on a non-Darwin platform or we're inside of a structure, don't
       // add stack protectors unless the array is a character array.
@@ -140,7 +153,7 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool Strong,
 
     // If an array has more than SSPBufferSize bytes of allocated space, then we
     // emit stack protectors.
-    if (TM.Options.SSPBufferSize <= TLI->getDataLayout()->getTypeAllocSize(AT))
+    if (SSPBufferSize <= TLI->getDataLayout()->getTypeAllocSize(AT))
       return true;
   }
 
@@ -228,13 +241,14 @@ bool StackProtector::RequiresStackProtector() {
   
           if (const ConstantInt *CI =
                dyn_cast<ConstantInt>(AI->getArraySize())) {
-            unsigned BufferSize = TLI->getTargetMachine().Options.SSPBufferSize;
-            if (CI->getLimitedValue(BufferSize) >= BufferSize)
+            if (CI->getLimitedValue(SSPBufferSize) >= SSPBufferSize)
               // A call to alloca with size >= SSPBufferSize requires
               // stack protectors.
               return true;
-          } else // A call to alloca with a variable size requires protectors.
+          } else {
+            // A call to alloca with a variable size requires protectors.
             return true;
+          }
         }
 
         if (ContainsProtectableArray(AI->getAllocatedType(), Strong))
@@ -251,6 +265,46 @@ bool StackProtector::RequiresStackProtector() {
   return false;
 }
 
+/// Insert code into the entry block that stores the __stack_chk_guard
+/// variable onto the stack:
+///
+///   entry:
+///     StackGuardSlot = alloca i8*
+///     StackGuard = load __stack_chk_guard
+///     call void @llvm.stackprotect.create(StackGuard, StackGuardSlot)
+///
+static void CreatePrologue(Function *F, Module *M, ReturnInst *RI,
+                           const TargetLoweringBase *TLI, const Triple &Trip,
+                           AllocaInst *&AI, Value *&StackGuardVar) {
+  PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext());
+  unsigned AddressSpace, Offset;
+  if (TLI->getStackCookieLocation(AddressSpace, Offset)) {
+    Constant *OffsetVal =
+      ConstantInt::get(Type::getInt32Ty(RI->getContext()), Offset);
+    
+    StackGuardVar = ConstantExpr::getIntToPtr(OffsetVal,
+                                              PointerType::get(PtrTy,
+                                                               AddressSpace));
+  } else if (Trip.getOS() == llvm::Triple::OpenBSD) {
+    StackGuardVar = M->getOrInsertGlobal("__guard_local", PtrTy);
+    cast<GlobalValue>(StackGuardVar)
+      ->setVisibility(GlobalValue::HiddenVisibility);
+  } else {
+    StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy);
+  }
+  
+  BasicBlock &Entry = F->getEntryBlock();
+  Instruction *InsPt = &Entry.front();
+  
+  AI = new AllocaInst(PtrTy, "StackGuardSlot", InsPt);
+  LoadInst *LI = new LoadInst(StackGuardVar, "StackGuard", false, InsPt);
+  
+  Value *Args[] = { LI, AI };
+  CallInst::
+    Create(Intrinsic::getDeclaration(M, Intrinsic::stackprotector),
+           Args, "", InsPt);
+}
+
 /// InsertStackProtectors - Insert code into the prologue and epilogue of the
 /// function.
 ///
@@ -269,41 +323,7 @@ bool StackProtector::InsertStackProtectors() {
     if (!RI) continue;
 
     if (!FailBB) {
-      // Insert code into the entry block that stores the __stack_chk_guard
-      // variable onto the stack:
-      //
-      //   entry:
-      //     StackGuardSlot = alloca i8*
-      //     StackGuard = load __stack_chk_guard
-      //     call void @llvm.stackprotect.create(StackGuard, StackGuardSlot)
-      //
-      PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext());
-      unsigned AddressSpace, Offset;
-      if (TLI->getStackCookieLocation(AddressSpace, Offset)) {
-        Constant *OffsetVal =
-          ConstantInt::get(Type::getInt32Ty(RI->getContext()), Offset);
-
-        StackGuardVar = ConstantExpr::getIntToPtr(OffsetVal,
-                                      PointerType::get(PtrTy, AddressSpace));
-      } else if (Trip.getOS() == llvm::Triple::OpenBSD) {
-        StackGuardVar = M->getOrInsertGlobal("__guard_local", PtrTy);
-        cast<GlobalValue>(StackGuardVar)
-            ->setVisibility(GlobalValue::HiddenVisibility);
-      } else {
-        StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy);
-      }
-
-      BasicBlock &Entry = F->getEntryBlock();
-      Instruction *InsPt = &Entry.front();
-
-      AI = new AllocaInst(PtrTy, "StackGuardSlot", InsPt);
-      LoadInst *LI = new LoadInst(StackGuardVar, "StackGuard", false, InsPt);
-
-      Value *Args[] = { LI, AI };
-      CallInst::
-        Create(Intrinsic::getDeclaration(M, Intrinsic::stackprotector),
-               Args, "", InsPt);
-
+      CreatePrologue(F, M, RI, TLI, Trip, AI, StackGuardVar);
       // Create the basic block to jump to when the guard check fails.
       FailBB = CreateFailBB();
     }
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index f951561..9f44df8 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -14,20 +14,20 @@
 #define DEBUG_TYPE "stackslotcoloring"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include <vector>
@@ -48,13 +48,16 @@ namespace {
     LiveStacks* LS;
     MachineFrameInfo *MFI;
     const TargetInstrInfo  *TII;
-    const MachineLoopInfo *loopInfo;
+    const MachineBlockFrequencyInfo *MBFI;
 
     // SSIntervals - Spill slot intervals.
     std::vector<LiveInterval*> SSIntervals;
 
-    // SSRefs - Keep a list of frame index references for each spill slot.
-    SmallVector<SmallVector<MachineInstr*, 8>, 16> SSRefs;
+    // SSRefs - Keep a list of MachineMemOperands for each spill slot.
+    // MachineMemOperands can be shared between instructions, so we need
+    // to be careful that renames like [FI0, FI1] -> [FI1, FI2] do not
+    // become FI0 -> FI1 -> FI2.
+    SmallVector<SmallVector<MachineMemOperand *, 8>, 16> SSRefs;
 
     // OrigAlignments - Alignments of stack objects before coloring.
     SmallVector<unsigned, 16> OrigAlignments;
@@ -89,8 +92,8 @@ namespace {
       AU.addRequired<SlotIndexes>();
       AU.addPreserved<SlotIndexes>();
       AU.addRequired<LiveStacks>();
-      AU.addRequired<MachineLoopInfo>();
-      AU.addPreserved<MachineLoopInfo>();
+      AU.addRequired<MachineBlockFrequencyInfo>();
+      AU.addPreserved<MachineBlockFrequencyInfo>();
       AU.addPreservedID(MachineDominatorsID);
       MachineFunctionPass::getAnalysisUsage(AU);
     }
@@ -103,7 +106,7 @@ namespace {
     bool OverlapWithAssignments(LiveInterval *li, int Color) const;
     int ColorSlot(LiveInterval *li);
     bool ColorSlots(MachineFunction &MF);
-    void RewriteInstruction(MachineInstr *MI, int OldFI, int NewFI,
+    void RewriteInstruction(MachineInstr *MI, SmallVectorImpl<int> &SlotMapping,
                             MachineFunction &MF);
     bool RemoveDeadStores(MachineBasicBlock* MBB);
   };
@@ -139,7 +142,7 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) {
   for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
        MBBI != E; ++MBBI) {
     MachineBasicBlock *MBB = &*MBBI;
-    unsigned loopDepth = loopInfo->getLoopDepth(MBB);
+    BlockFrequency Freq = MBFI->getBlockFreq(MBB);
     for (MachineBasicBlock::iterator MII = MBB->begin(), EE = MBB->end();
          MII != EE; ++MII) {
       MachineInstr *MI = &*MII;
@@ -154,8 +157,19 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) {
           continue;
         LiveInterval &li = LS->getInterval(FI);
         if (!MI->isDebugValue())
-          li.weight += LiveIntervals::getSpillWeight(false, true, loopDepth);
-        SSRefs[FI].push_back(MI);
+          li.weight += LiveIntervals::getSpillWeight(false, true, Freq);
+      }
+      for (MachineInstr::mmo_iterator MMOI = MI->memoperands_begin(),
+           EE = MI->memoperands_end(); MMOI != EE; ++MMOI) {
+        MachineMemOperand *MMO = *MMOI;
+        if (const Value *V = MMO->getValue()) {
+          if (const FixedStackPseudoSourceValue *FSV =
+              dyn_cast<FixedStackPseudoSourceValue>(V)) {
+            int FI = FSV->getFrameIndex();
+            if (FI >= 0)
+              SSRefs[FI].push_back(MMO);
+          }
+        }
       }
     }
   }
@@ -197,7 +211,7 @@ void StackSlotColoring::InitializeSlots() {
 /// LiveIntervals that have already been assigned to the specified color.
 bool
 StackSlotColoring::OverlapWithAssignments(LiveInterval *li, int Color) const {
-  const SmallVector<LiveInterval*,4> &OtherLIs = Assignments[Color];
+  const SmallVectorImpl<LiveInterval *> &OtherLIs = Assignments[Color];
   for (unsigned i = 0, e = OtherLIs.size(); i != e; ++i) {
     LiveInterval *OtherLI = OtherLIs[i];
     if (OtherLI->overlaps(*li))
@@ -291,16 +305,26 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
   if (!Changed)
     return false;
 
-  // Rewrite all MO_FrameIndex operands.
-  SmallVector<SmallSet<unsigned, 4>, 4> NewDefs(MF.getNumBlockIDs());
+  // Rewrite all MachineMemOperands.
   for (unsigned SS = 0, SE = SSRefs.size(); SS != SE; ++SS) {
     int NewFI = SlotMapping[SS];
     if (NewFI == -1 || (NewFI == (int)SS))
       continue;
 
-    SmallVector<MachineInstr*, 8> &RefMIs = SSRefs[SS];
-    for (unsigned i = 0, e = RefMIs.size(); i != e; ++i)
-      RewriteInstruction(RefMIs[i], SS, NewFI, MF);
+    const Value *NewSV = PseudoSourceValue::getFixedStack(NewFI);
+    SmallVectorImpl<MachineMemOperand *> &RefMMOs = SSRefs[SS];
+    for (unsigned i = 0, e = RefMMOs.size(); i != e; ++i)
+      RefMMOs[i]->setValue(NewSV);
+  }
+
+  // Rewrite all MO_FrameIndex operands.  Look for dead stores.
+  for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+       MBBI != E; ++MBBI) {
+    MachineBasicBlock *MBB = &*MBBI;
+    for (MachineBasicBlock::iterator MII = MBB->begin(), EE = MBB->end();
+         MII != EE; ++MII)
+      RewriteInstruction(MII, SlotMapping, MF);
+    RemoveDeadStores(MBB);
   }
 
   // Delete unused stack slots.
@@ -315,28 +339,24 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
 
 /// RewriteInstruction - Rewrite specified instruction by replacing references
 /// to old frame index with new one.
-void StackSlotColoring::RewriteInstruction(MachineInstr *MI, int OldFI,
-                                           int NewFI, MachineFunction &MF) {
+void StackSlotColoring::RewriteInstruction(MachineInstr *MI,
+                                           SmallVectorImpl<int> &SlotMapping,
+                                           MachineFunction &MF) {
   // Update the operands.
   for (unsigned i = 0, ee = MI->getNumOperands(); i != ee; ++i) {
     MachineOperand &MO = MI->getOperand(i);
     if (!MO.isFI())
       continue;
-    int FI = MO.getIndex();
-    if (FI != OldFI)
+    int OldFI = MO.getIndex();
+    if (OldFI < 0)
+      continue;
+    int NewFI = SlotMapping[OldFI];
+    if (NewFI == -1 || NewFI == OldFI)
       continue;
     MO.setIndex(NewFI);
   }
 
-  // Update the memory references. This changes the MachineMemOperands
-  // directly. They may be in use by multiple instructions, however all
-  // instructions using OldFI are being rewritten to use NewFI.
-  const Value *OldSV = PseudoSourceValue::getFixedStack(OldFI);
-  const Value *NewSV = PseudoSourceValue::getFixedStack(NewFI);
-  for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
-       E = MI->memoperands_end(); I != E; ++I)
-    if ((*I)->getValue() == OldSV)
-      (*I)->setValue(NewSV);
+  // The MachineMemOperands have already been updated.
 }
 
 
@@ -357,10 +377,19 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
     if (DCELimit != -1 && (int)NumDead >= DCELimit)
       break;
 
+    int FirstSS, SecondSS;
+    if (TII->isStackSlotCopy(I, FirstSS, SecondSS) &&
+        FirstSS == SecondSS &&
+        FirstSS != -1) {
+      ++NumDead;
+      changed = true;
+      toErase.push_back(I);
+      continue;
+    }
+        
     MachineBasicBlock::iterator NextMI = llvm::next(I);
     if (NextMI == MBB->end()) continue;
 
-    int FirstSS, SecondSS;
     unsigned LoadReg = 0;
     unsigned StoreReg = 0;
     if (!(LoadReg = TII->isLoadFromStackSlot(I, FirstSS))) continue;
@@ -379,7 +408,7 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
     ++I;
   }
 
-  for (SmallVector<MachineInstr*, 4>::iterator I = toErase.begin(),
+  for (SmallVectorImpl<MachineInstr *>::iterator I = toErase.begin(),
        E = toErase.end(); I != E; ++I)
     (*I)->eraseFromParent();
 
@@ -396,7 +425,7 @@ bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) {
   MFI = MF.getFrameInfo();
   TII = MF.getTarget().getInstrInfo();
   LS = &getAnalysis<LiveStacks>();
-  loopInfo = &getAnalysis<MachineLoopInfo>();
+  MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
 
   bool Changed = false;
 
@@ -430,10 +459,5 @@ bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) {
     Assignments[i].clear();
   Assignments.clear();
 
-  if (Changed) {
-    for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
-      Changed |= RemoveDeadStores(I);
-  }
-
   return Changed;
 }
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index 1ec8817..8a1d567 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -86,7 +86,7 @@ namespace {
     void ProcessPHI(MachineInstr *MI, MachineBasicBlock *TailBB,
                     MachineBasicBlock *PredBB,
                     DenseMap<unsigned, unsigned> &LocalVRMap,
-                    SmallVector<std::pair<unsigned,unsigned>, 4> &Copies,
+                    SmallVectorImpl<std::pair<unsigned,unsigned> > &Copies,
                     const DenseSet<unsigned> &UsedByPhi,
                     bool Remove);
     void DuplicateInstruction(MachineInstr *MI,
@@ -96,7 +96,7 @@ namespace {
                               DenseMap<unsigned, unsigned> &LocalVRMap,
                               const DenseSet<unsigned> &UsedByPhi);
     void UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
-                              SmallVector<MachineBasicBlock*, 8> &TDBBs,
+                              SmallVectorImpl<MachineBasicBlock *> &TDBBs,
                               SmallSetVector<MachineBasicBlock*, 8> &Succs);
     bool TailDuplicateBlocks(MachineFunction &MF);
     bool shouldTailDuplicate(const MachineFunction &MF,
@@ -104,14 +104,14 @@ namespace {
     bool isSimpleBB(MachineBasicBlock *TailBB);
     bool canCompletelyDuplicateBB(MachineBasicBlock &BB);
     bool duplicateSimpleBB(MachineBasicBlock *TailBB,
-                           SmallVector<MachineBasicBlock*, 8> &TDBBs,
+                           SmallVectorImpl<MachineBasicBlock *> &TDBBs,
                            const DenseSet<unsigned> &RegsUsedByPhi,
-                           SmallVector<MachineInstr*, 16> &Copies);
+                           SmallVectorImpl<MachineInstr *> &Copies);
     bool TailDuplicate(MachineBasicBlock *TailBB,
                        bool IsSimple,
                        MachineFunction &MF,
-                       SmallVector<MachineBasicBlock*, 8> &TDBBs,
-                       SmallVector<MachineInstr*, 16> &Copies);
+                       SmallVectorImpl<MachineBasicBlock *> &TDBBs,
+                       SmallVectorImpl<MachineInstr *> &Copies);
     bool TailDuplicateAndUpdate(MachineBasicBlock *MBB,
                                 bool IsSimple,
                                 MachineFunction &MF);
@@ -382,13 +382,11 @@ void TailDuplicatePass::AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
 /// ProcessPHI - Process PHI node in TailBB by turning it into a copy in PredBB.
 /// Remember the source register that's contributed by PredBB and update SSA
 /// update map.
-void TailDuplicatePass::ProcessPHI(MachineInstr *MI,
-                                   MachineBasicBlock *TailBB,
-                                   MachineBasicBlock *PredBB,
-                                   DenseMap<unsigned, unsigned> &LocalVRMap,
-                           SmallVector<std::pair<unsigned,unsigned>, 4> &Copies,
-                                   const DenseSet<unsigned> &RegsUsedByPhi,
-                                   bool Remove) {
+void TailDuplicatePass::ProcessPHI(
+    MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB,
+    DenseMap<unsigned, unsigned> &LocalVRMap,
+    SmallVectorImpl<std::pair<unsigned, unsigned> > &Copies,
+    const DenseSet<unsigned> &RegsUsedByPhi, bool Remove) {
   unsigned DefReg = MI->getOperand(0).getReg();
   unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB);
   assert(SrcOpIdx && "Unable to find matching PHI source?");
@@ -452,7 +450,7 @@ void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI,
 /// instructions in them accordingly.
 void
 TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
-                                  SmallVector<MachineBasicBlock*, 8> &TDBBs,
+                                  SmallVectorImpl<MachineBasicBlock *> &TDBBs,
                                   SmallSetVector<MachineBasicBlock*,8> &Succs) {
   for (SmallSetVector<MachineBasicBlock*, 8>::iterator SI = Succs.begin(),
          SE = Succs.end(); SI != SE; ++SI) {
@@ -662,9 +660,9 @@ TailDuplicatePass::canCompletelyDuplicateBB(MachineBasicBlock &BB) {
 
 bool
 TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB,
-                                     SmallVector<MachineBasicBlock*, 8> &TDBBs,
-                                     const DenseSet<unsigned> &UsedByPhi,
-                                     SmallVector<MachineInstr*, 16> &Copies) {
+                                    SmallVectorImpl<MachineBasicBlock *> &TDBBs,
+                                    const DenseSet<unsigned> &UsedByPhi,
+                                    SmallVectorImpl<MachineInstr *> &Copies) {
   SmallPtrSet<MachineBasicBlock*, 8> Succs(TailBB->succ_begin(),
                                            TailBB->succ_end());
   SmallVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(),
@@ -742,8 +740,8 @@ bool
 TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
                                  bool IsSimple,
                                  MachineFunction &MF,
-                                 SmallVector<MachineBasicBlock*, 8> &TDBBs,
-                                 SmallVector<MachineInstr*, 16> &Copies) {
+                                 SmallVectorImpl<MachineBasicBlock *> &TDBBs,
+                                 SmallVectorImpl<MachineInstr *> &Copies) {
   DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');
 
   DenseSet<unsigned> UsedByPhi;
diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp
index 20eb918..bb8bd42 100644
--- a/lib/CodeGen/TargetInstrInfo.cpp
+++ b/lib/CodeGen/TargetInstrInfo.cpp
@@ -668,27 +668,13 @@ getOperandLatency(const InstrItineraryData *ItinData,
 /// lookup, do so. Otherwise return -1.
 int TargetInstrInfo::computeDefOperandLatency(
   const InstrItineraryData *ItinData,
-  const MachineInstr *DefMI, bool FindMin) const {
+  const MachineInstr *DefMI) const {
 
   // Let the target hook getInstrLatency handle missing itineraries.
   if (!ItinData)
     return getInstrLatency(ItinData, DefMI);
 
-  // Return a latency based on the itinerary properties and defining instruction
-  // if possible. Some common subtargets don't require per-operand latency,
-  // especially for minimum latencies.
-  if (FindMin) {
-    // If MinLatency is valid, call getInstrLatency. This uses Stage latency if
-    // it exists before defaulting to MinLatency.
-    if (ItinData->SchedModel->MinLatency >= 0)
-      return getInstrLatency(ItinData, DefMI);
-
-    // If MinLatency is invalid, OperandLatency is interpreted as MinLatency.
-    // For empty itineraries, short-cirtuit the check and default to one cycle.
-    if (ItinData->isEmpty())
-      return 1;
-  }
-  else if(ItinData->isEmpty())
+  if(ItinData->isEmpty())
     return defaultDefLatency(ItinData->SchedModel, DefMI);
 
   // ...operand lookup required
@@ -709,10 +695,9 @@ int TargetInstrInfo::computeDefOperandLatency(
 unsigned TargetInstrInfo::
 computeOperandLatency(const InstrItineraryData *ItinData,
                       const MachineInstr *DefMI, unsigned DefIdx,
-                      const MachineInstr *UseMI, unsigned UseIdx,
-                      bool FindMin) const {
+                      const MachineInstr *UseMI, unsigned UseIdx) const {
 
-  int DefLatency = computeDefOperandLatency(ItinData, DefMI, FindMin);
+  int DefLatency = computeDefOperandLatency(ItinData, DefMI);
   if (DefLatency >= 0)
     return DefLatency;
 
@@ -732,8 +717,7 @@ computeOperandLatency(const InstrItineraryData *ItinData,
   unsigned InstrLatency = getInstrLatency(ItinData, DefMI);
 
   // Expected latency is the max of the stage latency and itinerary props.
-  if (!FindMin)
-    InstrLatency = std::max(InstrLatency,
-                            defaultDefLatency(ItinData->SchedModel, DefMI));
+  InstrLatency = std::max(InstrLatency,
+                          defaultDefLatency(ItinData->SchedModel, DefMI));
   return InstrLatency;
 }
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 7e7359a..07cf871 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -726,14 +726,13 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
   if (GV->isWeakForLinker()) {
     Selection = COFF::IMAGE_COMDAT_SELECT_ANY;
     Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
-    MCSymbol *Sym = Mang->getSymbol(GV);
     Name.append("$");
-    Name.append(Sym->getName().begin() + 1, Sym->getName().end());
+    Mang->getNameWithPrefix(Name, GV, false, false);
   }
   return getContext().getCOFFSection(Name,
                                      Characteristics,
-                                     Selection,
-                                     Kind);
+                                     Kind,
+                                     Selection);
 }
 
 static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) {
@@ -761,15 +760,14 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
   if (GV->isWeakForLinker()) {
     const char *Prefix = getCOFFSectionPrefixForUniqueGlobal(Kind);
     SmallString<128> Name(Prefix, Prefix+strlen(Prefix));
-    MCSymbol *Sym = Mang->getSymbol(GV);
-    Name.append(Sym->getName().begin() + 1, Sym->getName().end());
+    Mang->getNameWithPrefix(Name, GV, false, false);
 
     unsigned Characteristics = getCOFFSectionFlags(Kind);
 
     Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
 
     return getContext().getCOFFSection(Name.str(), Characteristics,
-                          COFF::IMAGE_COMDAT_SELECT_ANY, Kind);
+                                       Kind, COFF::IMAGE_COMDAT_SELECT_ANY);
   }
 
   if (Kind.isText())
diff --git a/lib/CodeGen/TargetOptionsImpl.cpp b/lib/CodeGen/TargetOptionsImpl.cpp
index c9c88c1..7a39a4c 100644
--- a/lib/CodeGen/TargetOptionsImpl.cpp
+++ b/lib/CodeGen/TargetOptionsImpl.cpp
@@ -11,6 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/IR/Function.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/Target/TargetOptions.h"
@@ -21,6 +22,9 @@ using namespace llvm;
 bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const {
   // Check to see if we should eliminate non-leaf frame pointers and then
   // check to see if we should eliminate all frame pointers.
+  bool NoFramePointerElimNonLeaf =
+    MF.getFunction()->getFnAttribute("no-frame-pointer-elim-non-leaf")
+      .getValueAsString() == "true";
   if (NoFramePointerElimNonLeaf && !NoFramePointerElim) {
     const MachineFrameInfo *MFI = MF.getFrameInfo();
     return MFI->hasCalls();
@@ -49,29 +53,3 @@ bool TargetOptions::HonorSignDependentRoundingFPMath() const {
 StringRef TargetOptions::getTrapFunctionName() const {
   return TrapFuncName;
 }
-
-bool TargetOptions::operator==(const TargetOptions &TO) {
-#define ARE_EQUAL(X) X == TO.X
-  return
-    ARE_EQUAL(UnsafeFPMath) &&
-    ARE_EQUAL(NoInfsFPMath) &&
-    ARE_EQUAL(NoNaNsFPMath) &&
-    ARE_EQUAL(HonorSignDependentRoundingFPMathOption) &&
-    ARE_EQUAL(UseSoftFloat) &&
-    ARE_EQUAL(NoZerosInBSS) &&
-    ARE_EQUAL(JITEmitDebugInfo) &&
-    ARE_EQUAL(JITEmitDebugInfoToDisk) &&
-    ARE_EQUAL(GuaranteedTailCallOpt) &&
-    ARE_EQUAL(DisableTailCalls) &&
-    ARE_EQUAL(StackAlignmentOverride) &&
-    ARE_EQUAL(RealignStack) &&
-    ARE_EQUAL(SSPBufferSize) &&
-    ARE_EQUAL(EnableFastISel) &&
-    ARE_EQUAL(PositionIndependentExecutable) &&
-    ARE_EQUAL(EnableSegmentedStacks) &&
-    ARE_EQUAL(UseInitArray) &&
-    ARE_EQUAL(TrapFuncName) &&
-    ARE_EQUAL(FloatABIType) &&
-    ARE_EQUAL(AllowFPOpFusion);
-#undef ARE_EQUAL
-}
diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp
index 1bf14db..64ee9d1 100644
--- a/lib/CodeGen/TargetSchedule.cpp
+++ b/lib/CodeGen/TargetSchedule.cpp
@@ -93,33 +93,10 @@ unsigned TargetSchedModel::getNumMicroOps(const MachineInstr *MI,
 // effectively means infinite latency. Since users of the TargetSchedule API
 // don't know how to handle this, we convert it to a very large latency that is
 // easy to distinguish when debugging the DAG but won't induce overflow.
-static unsigned convertLatency(int Cycles) {
+static unsigned capLatency(int Cycles) {
   return Cycles >= 0 ? Cycles : 1000;
 }
 
-/// If we can determine the operand latency from the def only, without machine
-/// model or itinerary lookup, do so. Otherwise return -1.
-int TargetSchedModel::getDefLatency(const MachineInstr *DefMI,
-                                    bool FindMin) const {
-
-  // Return a latency based on the itinerary properties and defining instruction
-  // if possible. Some common subtargets don't require per-operand latency,
-  // especially for minimum latencies.
-  if (FindMin) {
-    // If MinLatency is invalid, then use the itinerary for MinLatency. If no
-    // itinerary exists either, then use single cycle latency.
-    if (SchedModel.MinLatency < 0 && !hasInstrItineraries()) {
-      return 1;
-    }
-    return SchedModel.MinLatency;
-  }
-  else if (!hasInstrSchedModel() && !hasInstrItineraries()) {
-    return TII->defaultDefLatency(&SchedModel, DefMI);
-  }
-  // ...operand lookup required
-  return -1;
-}
-
 /// Return the MCSchedClassDesc for this instruction. Some SchedClasses require
 /// evaluation of predicates that depend on instruction operands or flags.
 const MCSchedClassDesc *TargetSchedModel::
@@ -177,18 +154,16 @@ static unsigned findUseIdx(const MachineInstr *MI, unsigned UseOperIdx) {
 // Top-level API for clients that know the operand indices.
 unsigned TargetSchedModel::computeOperandLatency(
   const MachineInstr *DefMI, unsigned DefOperIdx,
-  const MachineInstr *UseMI, unsigned UseOperIdx,
-  bool FindMin) const {
+  const MachineInstr *UseMI, unsigned UseOperIdx) const {
 
-  int DefLatency = getDefLatency(DefMI, FindMin);
-  if (DefLatency >= 0)
-    return DefLatency;
+  if (!hasInstrSchedModel() && !hasInstrItineraries())
+    return TII->defaultDefLatency(&SchedModel, DefMI);
 
   if (hasInstrItineraries()) {
     int OperLatency = 0;
     if (UseMI) {
-      OperLatency =
-        TII->getOperandLatency(&InstrItins, DefMI, DefOperIdx, UseMI, UseOperIdx);
+      OperLatency = TII->getOperandLatency(&InstrItins, DefMI, DefOperIdx,
+                                           UseMI, UseOperIdx);
     }
     else {
       unsigned DefClass = DefMI->getDesc().getSchedClass();
@@ -205,13 +180,11 @@ unsigned TargetSchedModel::computeOperandLatency(
     // hook to allow subtargets to specialize latency. This hook is only
     // applicable to the InstrItins model. InstrSchedModel should model all
     // special cases without TII hooks.
-    if (!FindMin)
-      InstrLatency = std::max(InstrLatency,
-                              TII->defaultDefLatency(&SchedModel, DefMI));
+    InstrLatency = std::max(InstrLatency,
+                            TII->defaultDefLatency(&SchedModel, DefMI));
     return InstrLatency;
   }
-  assert(!FindMin && hasInstrSchedModel() &&
-         "Expected a SchedModel for this cpu");
+  // hasInstrSchedModel()
   const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI);
   unsigned DefIdx = findDefIdx(DefMI, DefOperIdx);
   if (DefIdx < SCDesc->NumWriteLatencyEntries) {
@@ -219,7 +192,7 @@ unsigned TargetSchedModel::computeOperandLatency(
     const MCWriteLatencyEntry *WLEntry =
       STI->getWriteLatencyEntry(SCDesc, DefIdx);
     unsigned WriteID = WLEntry->WriteResourceID;
-    unsigned Latency = convertLatency(WLEntry->Cycles);
+    unsigned Latency = capLatency(WLEntry->Cycles);
     if (!UseMI)
       return Latency;
 
@@ -228,7 +201,10 @@ unsigned TargetSchedModel::computeOperandLatency(
     if (UseDesc->NumReadAdvanceEntries == 0)
       return Latency;
     unsigned UseIdx = findUseIdx(UseMI, UseOperIdx);
-    return Latency - STI->getReadAdvanceCycles(UseDesc, UseIdx, WriteID);
+    int Advance = STI->getReadAdvanceCycles(UseDesc, UseIdx, WriteID);
+    if (Advance > 0 && (unsigned)Advance > Latency) // unsigned wrap
+      return 0;
+    return Latency - Advance;
   }
   // If DefIdx does not exist in the model (e.g. implicit defs), then return
   // unit latency (defaultDefLatency may be too conservative).
@@ -263,7 +239,7 @@ unsigned TargetSchedModel::computeInstrLatency(const MachineInstr *MI) const {
         // Lookup the definition's write latency in SubtargetInfo.
         const MCWriteLatencyEntry *WLEntry =
           STI->getWriteLatencyEntry(SCDesc, DefIdx);
-        Latency = std::max(Latency, convertLatency(WLEntry->Cycles));
+        Latency = std::max(Latency, capLatency(WLEntry->Cycles));
       }
       return Latency;
     }
@@ -274,13 +250,10 @@ unsigned TargetSchedModel::computeInstrLatency(const MachineInstr *MI) const {
 unsigned TargetSchedModel::
 computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx,
                      const MachineInstr *DepMI) const {
-  // MinLatency == -1 is for in-order processors that always have unit
-  // MinLatency. MinLatency > 0 is for in-order processors with varying min
-  // latencies, but since this is not a RAW dep, we always use unit latency.
-  if (SchedModel.MinLatency != 0)
+  if (SchedModel.MicroOpBufferSize <= 1)
     return 1;
 
-  // MinLatency == 0 indicates an out-of-order processor that can dispatch
+  // MicroOpBufferSize > 1 indicates an out-of-order processor that can dispatch
   // WAW dependencies in the same cycle.
 
   // Treat predication as a data dependency for out-of-order cpus. In-order
@@ -302,7 +275,7 @@ computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx,
     if (SCDesc->isValid()) {
       for (const MCWriteProcResEntry *PRI = STI->getWriteProcResBegin(SCDesc),
              *PRE = STI->getWriteProcResEnd(SCDesc); PRI != PRE; ++PRI) {
-        if (!SchedModel.getProcResource(PRI->ProcResourceIdx)->IsBuffered)
+        if (!SchedModel.getProcResource(PRI->ProcResourceIdx)->BufferSize)
           return 1;
       }
     }
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 7ca2bee..c52e675 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1539,7 +1539,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
       // transformations that may either eliminate the tied operands or
       // improve the opportunities for coalescing away the register copy.
       if (TiedOperands.size() == 1) {
-        SmallVector<std::pair<unsigned, unsigned>, 4> &TiedPairs
+        SmallVectorImpl<std::pair<unsigned, unsigned> > &TiedPairs
           = TiedOperands.begin()->second;
         if (TiedPairs.size() == 1) {
           unsigned SrcIdx = TiedPairs[0].first;
author	Stephen Hines <srhines@google.com>	2013-08-07 15:07:10 -0700
committer	Stephen Hines <srhines@google.com>	2013-08-07 15:07:10 -0700
commit	fab2daa4a1127ecb217abe2b07c1769122b6fee1 (patch)
tree	268ebfd1963fd98ba412e76819afdf95a7d4267b /lib/CodeGen
parent	8197ac1c1a0a91baa70c4dea8cb488f254ef974c (diff)
parent	10251753b6897adcd22cc981c0cc42f348c109de (diff)
download	external_llvm-fab2daa4a1127ecb217abe2b07c1769122b6fee1.zip external_llvm-fab2daa4a1127ecb217abe2b07c1769122b6fee1.tar.gz external_llvm-fab2daa4a1127ecb217abe2b07c1769122b6fee1.tar.bz2