62 files changed, 3790 insertions, 1544 deletions
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index 6fc83a2..1bdf312 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -7,13 +7,14 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file defines several CodeGen-specific LLVM IR analysis utilties.
+// This file defines several CodeGen-specific LLVM IR analysis utilities.
 //
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
@@ -474,8 +475,7 @@ static bool nextRealType(SmallVectorImpl<CompositeType *> &SubTypes,
 /// between it and the return.
 ///
 /// This function only tests target-independent requirements.
-bool llvm::isInTailCallPosition(ImmutableCallSite CS,
-                                const TargetLowering &TLI) {
+bool llvm::isInTailCallPosition(ImmutableCallSite CS, const SelectionDAG &DAG) {
   const Instruction *I = CS.getInstruction();
   const BasicBlock *ExitBB = I->getParent();
   const TerminatorInst *Term = ExitBB->getTerminator();
@@ -490,7 +490,7 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS,
   // longjmp on x86), it can end up causing miscompilation that has not
   // been fully understood.
   if (!Ret &&
-      (!TLI.getTargetMachine().Options.GuaranteedTailCallOpt ||
+      (!DAG.getTarget().Options.GuaranteedTailCallOpt ||
        !isa<UnreachableInst>(Term)))
     return false;
 
@@ -509,7 +509,8 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS,
         return false;
     }
 
-  return returnTypeIsEligibleForTailCall(ExitBB->getParent(), I, Ret, TLI);
+  return returnTypeIsEligibleForTailCall(ExitBB->getParent(), I, Ret,
+                                         *DAG.getTarget().getTargetLowering());
 }
 
 bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
diff --git a/lib/CodeGen/Android.mk b/lib/CodeGen/Android.mk
index 7feb42c..05e5c45 100644
--- a/lib/CodeGen/Android.mk
+++ b/lib/CodeGen/Android.mk
@@ -24,11 +24,13 @@ codegen_SRC_FILES := \
   GCMetadata.cpp \
   GCMetadataPrinter.cpp \
   GCStrategy.cpp \
+  GlobalMerge.cpp \
   IfConversion.cpp \
   InlineSpiller.cpp \
   InterferenceCache.cpp \
   IntrinsicLowering.cpp \
   JITCodeEmitter.cpp \
+  JumpInstrTables.cpp \
   LatencyPriorityQueue.cpp \
   LexicalScopes.cpp \
   LiveDebugVariables.cpp \
diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp
index 1cb0159..251f5ef 100644
--- a/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -37,8 +37,7 @@
 using namespace llvm;
 
 ARMException::ARMException(AsmPrinter *A)
-  : DwarfException(A),
-    shouldEmitCFI(false) {}
+  : EHStreamer(A), shouldEmitCFI(false) {}
 
 ARMException::~ARMException() {}
 
@@ -100,7 +99,7 @@ void ARMException::endFunction(const MachineFunction *) {
       ATS.emitHandlerData();
 
       // Emit actual exception table
-      EmitExceptionTable();
+      emitExceptionTable();
     }
   }
 
@@ -108,7 +107,7 @@ void ARMException::endFunction(const MachineFunction *) {
     ATS.emitFnEnd();
 }
 
-void ARMException::EmitTypeInfos(unsigned TTypeEncoding) {
+void ARMException::emitTypeInfos(unsigned TTypeEncoding) {
   const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
   const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
 
diff --git a/lib/CodeGen/AsmPrinter/Android.mk b/lib/CodeGen/AsmPrinter/Android.mk
index f56eb6e..083cc0d 100644
--- a/lib/CodeGen/AsmPrinter/Android.mk
+++ b/lib/CodeGen/AsmPrinter/Android.mk
@@ -1,33 +1,33 @@
 LOCAL_PATH := $(call my-dir)
 
 codegen_asmprinter_SRC_FILES := \
-  AsmPrinter.cpp
+  AddressPool.cpp \
+  ARMException.cpp \
+  AsmPrinter.cpp \
+  AsmPrinterDwarf.cpp \
+  AsmPrinterInlineAsm.cpp \
+  DbgValueHistoryCalculator.cpp \
+  DIE.cpp \
+  DIEHash.cpp \
+  DwarfAccelTable.cpp \
+  DwarfCFIException.cpp \
+  DwarfDebug.cpp \
+  DwarfFile.cpp \
+  DwarfStringPool.cpp \
+  DwarfUnit.cpp \
+  EHStreamer.cpp \
+  ErlangGCPrinter.cpp \
+  OcamlGCPrinter.cpp \
+  Win64Exception.cpp \
+  WinCodeViewLineTables.cpp
+
+
 
 # For the host
 # =====================================================
 include $(CLEAR_VARS)
 
-LOCAL_SRC_FILES :=	\
-	AddressPool.cpp \
-	AsmPrinter.cpp	\
-	AsmPrinterDwarf.cpp	\
-	AsmPrinterInlineAsm.cpp	\
-	ARMException.cpp	\
-	DbgValueHistoryCalculator.cpp \
-	DIE.cpp	\
-	DIEHash.cpp \
-	DwarfAccelTable.cpp \
-	DwarfCFIException.cpp \
-	DwarfDebug.cpp	\
-	DwarfException.cpp	\
-	DwarfFile.cpp \
-	DwarfStringPool.cpp \
-	DwarfUnit.cpp \
-	ErlangGCPrinter.cpp \
-	OcamlGCPrinter.cpp \
-	Win64Exception.cpp \
-	WinCodeViewLineTables.cpp
-
+LOCAL_SRC_FILES := $(codegen_asmprinter_SRC_FILES)
 LOCAL_MODULE:= libLLVMAsmPrinter
 
 LOCAL_MODULE_TAGS := optional
@@ -41,27 +41,7 @@ include $(BUILD_HOST_STATIC_LIBRARY)
 ifneq (true,$(DISABLE_LLVM_DEVICE_BUILDS))
 include $(CLEAR_VARS)
 
-LOCAL_SRC_FILES :=	\
-	AddressPool.cpp \
-	AsmPrinter.cpp \
-	AsmPrinterDwarf.cpp \
-	AsmPrinterInlineAsm.cpp \
-	ARMException.cpp        \
-	DbgValueHistoryCalculator.cpp \
-	DIE.cpp \
-	DIEHash.cpp \
-	DwarfAccelTable.cpp \
-	DwarfCFIException.cpp \
-	DwarfDebug.cpp  \
-	DwarfException.cpp      \
-	DwarfFile.cpp \
-	DwarfStringPool.cpp \
-	DwarfUnit.cpp \
-	ErlangGCPrinter.cpp \
-	OcamlGCPrinter.cpp \
-	Win64Exception.cpp \
-	WinCodeViewLineTables.cpp
-
+LOCAL_SRC_FILES := $(codegen_asmprinter_SRC_FILES)
 LOCAL_MODULE:= libLLVMAsmPrinter
 
 LOCAL_MODULE_TAGS := optional
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 7de9c6d..f80fdea 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -18,6 +18,7 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/JumpInstrTableInfo.h"
 #include "llvm/CodeGen/GCMetadataPrinter.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -46,7 +47,6 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Transforms/Utils/GlobalStatus.h"
@@ -232,23 +232,23 @@ bool AsmPrinter::doInitialization(Module &M) {
     }
   }
 
-  DwarfException *DE = nullptr;
+  EHStreamer *ES = nullptr;
   switch (MAI->getExceptionHandlingType()) {
   case ExceptionHandling::None:
     break;
   case ExceptionHandling::SjLj:
   case ExceptionHandling::DwarfCFI:
-    DE = new DwarfCFIException(this);
+    ES = new DwarfCFIException(this);
     break;
   case ExceptionHandling::ARM:
-    DE = new ARMException(this);
+    ES = new ARMException(this);
     break;
-  case ExceptionHandling::Win64:
-    DE = new Win64Exception(this);
+  case ExceptionHandling::WinEH:
+    ES = new Win64Exception(this);
     break;
   }
-  if (DE)
-    Handlers.push_back(HandlerInfo(DE, EHTimerName, DWARFGroupName));
+  if (ES)
+    Handlers.push_back(HandlerInfo(ES, EHTimerName, DWARFGroupName));
   return false;
 }
 
@@ -709,13 +709,12 @@ AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() {
 }
 
 bool AsmPrinter::needsSEHMoves() {
-  return MAI->getExceptionHandlingType() == ExceptionHandling::Win64 &&
+  return MAI->getExceptionHandlingType() == ExceptionHandling::WinEH &&
     MF->getFunction()->needsUnwindTableEntry();
 }
 
 void AsmPrinter::emitCFIInstruction(const MachineInstr &MI) {
-  ExceptionHandling::ExceptionsType ExceptionHandlingType =
-      MAI->getExceptionHandlingType();
+  ExceptionHandling ExceptionHandlingType = MAI->getExceptionHandlingType();
   if (ExceptionHandlingType != ExceptionHandling::DwarfCFI &&
       ExceptionHandlingType != ExceptionHandling::ARM)
     return;
@@ -870,6 +869,8 @@ void AsmPrinter::EmitFunctionBody() {
   OutStreamer.AddBlankLine();
 }
 
+static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP);
+
 bool AsmPrinter::doFinalization(Module &M) {
   // Emit global variables.
   for (const auto &G : M.globals())
@@ -887,6 +888,54 @@ bool AsmPrinter::doFinalization(Module &M) {
     EmitVisibility(Name, V, false);
   }
 
+  // Get information about jump-instruction tables to print.
+  JumpInstrTableInfo *JITI = getAnalysisIfAvailable<JumpInstrTableInfo>();
+
+  if (JITI && !JITI->getTables().empty()) {
+    unsigned Arch = Triple(getTargetTriple()).getArch();
+    bool IsThumb = (Arch == Triple::thumb || Arch == Triple::thumbeb);
+    MCInst TrapInst;
+    TM.getInstrInfo()->getTrap(TrapInst);
+    for (const auto &KV : JITI->getTables()) {
+      uint64_t Count = 0;
+      for (const auto &FunPair : KV.second) {
+        // Emit the function labels to make this be a function entry point.
+        MCSymbol *FunSym =
+          OutContext.GetOrCreateSymbol(FunPair.second->getName());
+        OutStreamer.EmitSymbolAttribute(FunSym, MCSA_Global);
+        // FIXME: JumpTableInstrInfo should store information about the required
+        // alignment of table entries and the size of the padding instruction.
+        EmitAlignment(3);
+        if (IsThumb)
+          OutStreamer.EmitThumbFunc(FunSym);
+        if (MAI->hasDotTypeDotSizeDirective())
+          OutStreamer.EmitSymbolAttribute(FunSym, MCSA_ELF_TypeFunction);
+        OutStreamer.EmitLabel(FunSym);
+
+        // Emit the jump instruction to transfer control to the original
+        // function.
+        MCInst JumpToFun;
+        MCSymbol *TargetSymbol =
+          OutContext.GetOrCreateSymbol(FunPair.first->getName());
+        const MCSymbolRefExpr *TargetSymRef =
+          MCSymbolRefExpr::Create(TargetSymbol, MCSymbolRefExpr::VK_PLT,
+                                  OutContext);
+        TM.getInstrInfo()->getUnconditionalBranch(JumpToFun, TargetSymRef);
+        OutStreamer.EmitInstruction(JumpToFun, getSubtargetInfo());
+        ++Count;
+      }
+
+      // Emit enough padding instructions to fill up to the next power of two.
+      // This assumes that the trap instruction takes 8 bytes or fewer.
+      uint64_t Remaining = NextPowerOf2(Count) - Count;
+      for (uint64_t C = 0; C < Remaining; ++C) {
+        EmitAlignment(3);
+        OutStreamer.EmitInstruction(TrapInst, getSubtargetInfo());
+      }
+
+    }
+  }
+
   // Emit module flags.
   SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags;
   M.getModuleFlagsMetadata(ModuleFlags);
@@ -932,10 +981,6 @@ bool AsmPrinter::doFinalization(Module &M) {
     for (const auto &Alias : M.aliases()) {
       MCSymbol *Name = getSymbol(&Alias);
 
-      const GlobalValue *GV = Alias.getAliasee();
-      assert(!GV->isDeclaration());
-      MCSymbol *Target = getSymbol(GV);
-
       if (Alias.hasExternalLinkage() || !MAI->getWeakRefDirective())
         OutStreamer.EmitSymbolAttribute(Name, MCSA_Global);
       else if (Alias.hasWeakLinkage() || Alias.hasLinkOnceLinkage())
@@ -947,7 +992,7 @@ bool AsmPrinter::doFinalization(Module &M) {
 
       // Emit the directives as assignments aka .set:
       OutStreamer.EmitAssignment(Name,
-                                 MCSymbolRefExpr::Create(Target, OutContext));
+                                 lowerConstant(Alias.getAliasee(), *this));
     }
   }
 
@@ -1248,7 +1293,7 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
   }
 
   // Ignore debug and non-emitted data.  This handles llvm.compiler.used.
-  if (GV->getSection() == "llvm.metadata" ||
+  if (StringRef(GV->getSection()) == "llvm.metadata" ||
       GV->hasAvailableExternallyLinkage())
     return true;
 
@@ -1350,14 +1395,17 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) {
   for (Structor &S : Structors) {
     const TargetLoweringObjectFile &Obj = getObjFileLowering();
     const MCSymbol *KeySym = nullptr;
-    const MCSection *KeySec = nullptr;
-    if (S.ComdatKey) {
-      KeySym = getSymbol(S.ComdatKey);
-      KeySec = getObjFileLowering().SectionForGlobal(S.ComdatKey, *Mang, TM);
+    if (GlobalValue *GV = S.ComdatKey) {
+      if (GV->hasAvailableExternallyLinkage())
+        // If the associated variable is available_externally, some other TU
+        // will provide its dynamic initializer.
+        continue;
+
+      KeySym = getSymbol(GV);
     }
     const MCSection *OutputSection =
-        (isCtor ? Obj.getStaticCtorSection(S.Priority, KeySym, KeySec)
-                : Obj.getStaticDtorSection(S.Priority, KeySym, KeySec));
+        (isCtor ? Obj.getStaticCtorSection(S.Priority, KeySym)
+                : Obj.getStaticDtorSection(S.Priority, KeySym));
     OutStreamer.SwitchSection(OutputSection);
     if (OutStreamer.getCurrentSection() != OutStreamer.getPreviousSection())
       EmitAlignment(Align);
@@ -1817,7 +1865,10 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) {
     SmallString<8> StrVal;
     CFP->getValueAPF().toString(StrVal);
 
-    CFP->getType()->print(AP.OutStreamer.GetCommentOS());
+    if (CFP->getType())
+      CFP->getType()->print(AP.OutStreamer.GetCommentOS());
+    else
+      AP.OutStreamer.GetCommentOS() << "Printing <null> Type";
     AP.OutStreamer.GetCommentOS() << ' ' << StrVal << '\n';
   }
 
@@ -1830,7 +1881,8 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) {
 
   // PPC's long double has odd notions of endianness compared to how LLVM
   // handles it: p[0] goes first for *big* endian on PPC.
-  if (AP.TM.getDataLayout()->isBigEndian() != CFP->getType()->isPPC_FP128Ty()) {
+  if (AP.TM.getDataLayout()->isBigEndian() &&
+      !CFP->getType()->isPPC_FP128Ty()) {
     int Chunk = API.getNumWords() - 1;
 
     if (TrailingBytes)
diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt
index b4ef185..f555f21 100644
--- a/lib/CodeGen/AsmPrinter/CMakeLists.txt
+++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt
@@ -10,10 +10,10 @@ add_llvm_library(LLVMAsmPrinter
   DwarfAccelTable.cpp
   DwarfCFIException.cpp
   DwarfDebug.cpp
-  DwarfException.cpp
   DwarfFile.cpp
   DwarfStringPool.cpp
   DwarfUnit.cpp
+  EHStreamer.cpp
   ErlangGCPrinter.cpp
   OcamlGCPrinter.cpp
   Win64Exception.cpp
diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
index 6103254..a66d08e 100644
--- a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
+++ b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
@@ -15,6 +15,7 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include <algorithm>
 #include <map>
+#include <set>
 
 #define DEBUG_TYPE "dwarfdebug"
 
@@ -110,45 +111,73 @@ static void clobberRegisterUses(RegDescribedVarsMap &RegVars, unsigned RegNo,
   RegVars.erase(I);
 }
 
-// \brief Terminate location ranges for all variables, described by registers
-// clobbered by @MI.
-static void clobberRegisterUses(RegDescribedVarsMap &RegVars,
-                                const MachineInstr &MI,
-                                const TargetRegisterInfo *TRI,
-                                DbgValueHistoryMap &HistMap) {
+// \brief Collect all registers clobbered by @MI and insert them to @Regs.
+static void collectClobberedRegisters(const MachineInstr &MI,
+                                      const TargetRegisterInfo *TRI,
+                                      std::set<unsigned> &Regs) {
   for (const MachineOperand &MO : MI.operands()) {
     if (!MO.isReg() || !MO.isDef() || !MO.getReg())
       continue;
-    for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid();
-         ++AI) {
-      unsigned RegNo = *AI;
-      clobberRegisterUses(RegVars, RegNo, HistMap, MI);
-    }
+    for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI)
+      Regs.insert(*AI);
   }
 }
 
-// \brief Terminate the location range for all register-described variables
-// by inserting @ClobberingInstr to their history.
-static void clobberAllRegistersUses(RegDescribedVarsMap &RegVars,
-                                    DbgValueHistoryMap &HistMap,
-                                    const MachineInstr &ClobberingInstr) {
-  for (const auto &I : RegVars)
-    for (const auto &Var : I.second)
-      HistMap.endInstrRange(Var, ClobberingInstr);
-  RegVars.clear();
+// \brief Returns the first instruction in @MBB which corresponds to
+// the function epilogue, or nullptr if @MBB doesn't contain an epilogue.
+static const MachineInstr *getFirstEpilogueInst(const MachineBasicBlock &MBB) {
+  auto LastMI = MBB.getLastNonDebugInstr();
+  if (LastMI == MBB.end() || !LastMI->isReturn())
+    return nullptr;
+  // Assume that epilogue starts with instruction having the same debug location
+  // as the return instruction.
+  DebugLoc LastLoc = LastMI->getDebugLoc();
+  auto Res = LastMI;
+  for (MachineBasicBlock::const_reverse_iterator I(std::next(LastMI)); I != MBB.rend();
+       ++I) {
+    if (I->getDebugLoc() != LastLoc)
+      return Res;
+    Res = std::prev(I.base());
+  }
+  // If all instructions have the same debug location, assume whole MBB is
+  // an epilogue.
+  return MBB.begin();
+}
+
+// \brief Collect registers that are modified in the function body (their
+// contents is changed only in the prologue and epilogue).
+static void collectChangingRegs(const MachineFunction *MF,
+                                const TargetRegisterInfo *TRI,
+                                std::set<unsigned> &Regs) {
+  for (const auto &MBB : *MF) {
+    auto FirstEpilogueInst = getFirstEpilogueInst(MBB);
+    bool IsInEpilogue = false;
+    for (const auto &MI : MBB) {
+      IsInEpilogue |= &MI == FirstEpilogueInst;
+      if (!MI.getFlag(MachineInstr::FrameSetup) && !IsInEpilogue)
+        collectClobberedRegisters(MI, TRI, Regs);
+    }
+  }
 }
 
 void calculateDbgValueHistory(const MachineFunction *MF,
                               const TargetRegisterInfo *TRI,
                               DbgValueHistoryMap &Result) {
-  RegDescribedVarsMap RegVars;
+  std::set<unsigned> ChangingRegs;
+  collectChangingRegs(MF, TRI, ChangingRegs);
 
+  RegDescribedVarsMap RegVars;
   for (const auto &MBB : *MF) {
     for (const auto &MI : MBB) {
       if (!MI.isDebugValue()) {
         // Not a DBG_VALUE instruction. It may clobber registers which describe
         // some variables.
-        clobberRegisterUses(RegVars, MI, TRI, Result);
+        std::set<unsigned> MIClobberedRegs;
+        collectClobberedRegisters(MI, TRI, MIClobberedRegs);
+        for (unsigned RegNo : MIClobberedRegs) {
+          if (ChangingRegs.count(RegNo))
+            clobberRegisterUses(RegVars, RegNo, Result, MI);
+        }
         continue;
       }
 
@@ -167,8 +196,10 @@ void calculateDbgValueHistory(const MachineFunction *MF,
     // Make sure locations for register-described variables are valid only
     // until the end of the basic block (unless it's the last basic block, in
     // which case let their liveness run off to the end of the function).
-    if (!MBB.empty() &&  &MBB != &MF->back())
-      clobberAllRegistersUses(RegVars, Result, MBB.back());
+    if (!MBB.empty() &&  &MBB != &MF->back()) {
+      for (unsigned RegNo : ChangingRegs)
+        clobberRegisterUses(RegVars, RegNo, Result, MBB.back());
+    }
   }
 }
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index 30312ac..74215aa 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -40,9 +40,8 @@
 using namespace llvm;
 
 DwarfCFIException::DwarfCFIException(AsmPrinter *A)
-  : DwarfException(A),
-    shouldEmitPersonality(false), shouldEmitLSDA(false), shouldEmitMoves(false),
-    moveTypeModule(AsmPrinter::CFI_M_None) {}
+  : EHStreamer(A), shouldEmitPersonality(false), shouldEmitLSDA(false),
+    shouldEmitMoves(false), moveTypeModule(AsmPrinter::CFI_M_None) {}
 
 DwarfCFIException::~DwarfCFIException() {}
 
@@ -59,26 +58,16 @@ void DwarfCFIException::endModule() {
 
   unsigned PerEncoding = TLOF.getPersonalityEncoding();
 
-  if ((PerEncoding & 0x70) != dwarf::DW_EH_PE_pcrel)
+  if ((PerEncoding & 0x80) != dwarf::DW_EH_PE_indirect)
     return;
 
   // Emit references to all used personality functions
-  bool AtLeastOne = false;
   const std::vector<const Function*> &Personalities = MMI->getPersonalities();
   for (size_t i = 0, e = Personalities.size(); i != e; ++i) {
     if (!Personalities[i])
       continue;
     MCSymbol *Sym = Asm->getSymbol(Personalities[i]);
     TLOF.emitPersonalityValue(Asm->OutStreamer, Asm->TM, Sym);
-    AtLeastOne = true;
-  }
-
-  if (AtLeastOne && !TLOF.isFunctionEHFrameSymbolPrivate()) {
-    // This is a temporary hack to keep sections in the same order they
-    // were before. This lets us produce bit identical outputs while
-    // transitioning to CFI.
-    Asm->OutStreamer.SwitchSection(
-               const_cast<TargetLoweringObjectFile&>(TLOF).getEHFrameSection());
   }
 }
 
@@ -123,9 +112,17 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) {
       TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI);
   Asm->OutStreamer.EmitCFIPersonality(Sym, PerEncoding);
 
-  Asm->OutStreamer.EmitDebugLabel
-    (Asm->GetTempSymbol("eh_func_begin",
-                        Asm->getFunctionNumber()));
+  MCSymbol *EHBegin =
+      Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber());
+  if (Asm->MAI->useAssignmentForEHBegin()) {
+    MCContext &Ctx = Asm->OutContext;
+    MCSymbol *CurPos = Ctx.CreateTempSymbol();
+    Asm->OutStreamer.EmitLabel(CurPos);
+    Asm->OutStreamer.EmitAssignment(EHBegin,
+                                    MCSymbolRefExpr::Create(CurPos, Ctx));
+  } else {
+    Asm->OutStreamer.EmitLabel(EHBegin);
+  }
 
   // Provide LSDA information.
   if (!shouldEmitLSDA)
@@ -153,5 +150,5 @@ void DwarfCFIException::endFunction(const MachineFunction *) {
   // Map all labels and get rid of any dead landing pads.
   MMI->TidyLandingPads();
 
-  EmitExceptionTable();
+  emitExceptionTable();
 }
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 2a0615d..77860c0 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -98,10 +98,6 @@ DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden,
                             clEnumVal(Disable, "Disabled"), clEnumValEnd),
                  cl::init(Default));
 
-static cl::opt<unsigned>
-DwarfVersionNumber("dwarf-version", cl::Hidden,
-                   cl::desc("Generate DWARF for dwarf version."), cl::init(0));
-
 static const char *const DWARFGroupName = "DWARF Emission";
 static const char *const DbgTimerName = "DWARF Debug Writer";
 
@@ -209,9 +205,12 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
   else
     HasDwarfPubSections = DwarfPubSections == Enable;
 
+  unsigned DwarfVersionNumber = Asm->TM.Options.MCOptions.DwarfVersion;
   DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber
                                     : MMI->getModule()->getDwarfVersion();
 
+  Asm->OutStreamer.getContext().setDwarfVersion(DwarfVersion);
+
   {
     NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
     beginModule();
@@ -531,8 +530,7 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &TheCU,
   // shouldn't be found by lookup.
   AbsDef = &SPCU.createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE,
                                  DIDescriptor());
-  SPCU.applySubprogramAttributes(SP, *AbsDef);
-  SPCU.addGlobalName(SP.getName(), *AbsDef, resolve(SP.getContext()));
+  SPCU.applySubprogramAttributesToDefinition(SP, *AbsDef);
 
   SPCU.addUInt(*AbsDef, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined);
   createAndAddScopeChildren(SPCU, Scope, *AbsDef);
@@ -732,6 +730,8 @@ void DwarfDebug::beginModule() {
 
   const Module *M = MMI->getModule();
 
+  FunctionDIs = makeSubprogramMap(*M);
+
   // If module has named metadata anchors then use them, otherwise scan the
   // module using debug info finder to collect debug info.
   NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
@@ -784,6 +784,26 @@ void DwarfDebug::beginModule() {
   SectionMap[Asm->getObjFileLowering().getTextSection()];
 }
 
+void DwarfDebug::finishVariableDefinitions() {
+  for (const auto &Var : ConcreteVariables) {
+    DIE *VariableDie = Var->getDIE();
+    // FIXME: There shouldn't be any variables without DIEs.
+    if (!VariableDie)
+      continue;
+    // FIXME: Consider the time-space tradeoff of just storing the unit pointer
+    // in the ConcreteVariables list, rather than looking it up again here.
+    // DIE::getUnit isn't simple - it walks parent pointers, etc.
+    DwarfCompileUnit *Unit = lookupUnit(VariableDie->getUnit());
+    assert(Unit);
+    DbgVariable *AbsVar = getExistingAbstractVariable(Var->getVariable());
+    if (AbsVar && AbsVar->getDIE()) {
+      Unit->addDIEEntry(*VariableDie, dwarf::DW_AT_abstract_origin,
+                        *AbsVar->getDIE());
+    } else
+      Unit->applyVariableAttributes(*Var, *VariableDie);
+  }
+}
+
 void DwarfDebug::finishSubprogramDefinitions() {
   const Module *M = MMI->getModule();
 
@@ -811,8 +831,7 @@ void DwarfDebug::finishSubprogramDefinitions() {
           // inlined versions during codegen.
           D = SPCU->getOrCreateSubprogramDIE(SP);
         // And attach the attributes
-        SPCU->applySubprogramAttributes(SP, *D);
-        SPCU->addGlobalName(SP.getName(), *D, resolve(SP.getContext()));
+        SPCU->applySubprogramAttributesToDefinition(SP, *D);
       }
     }
   }
@@ -850,8 +869,10 @@ void DwarfDebug::collectDeadVariables() {
         for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) {
           DIVariable DV(Variables.getElement(vi));
           assert(DV.isVariable());
-          DbgVariable NewVar(DV, nullptr, this);
-          SPDIE->addChild(SPCU->constructVariableDIE(NewVar));
+          DbgVariable NewVar(DV, this);
+          auto VariableDie = SPCU->constructVariableDIE(NewVar);
+          SPCU->applyVariableAttributes(NewVar, *VariableDie);
+          SPDIE->addChild(std::move(VariableDie));
         }
       }
     }
@@ -861,6 +882,8 @@ void DwarfDebug::collectDeadVariables() {
 void DwarfDebug::finalizeModuleInfo() {
   finishSubprogramDefinitions();
 
+  finishVariableDefinitions();
+
   // Collect info for variables that were optimized out.
   collectDeadVariables();
 
@@ -1017,9 +1040,9 @@ void DwarfDebug::endModule() {
     emitDebugInfoDWO();
     emitDebugAbbrevDWO();
     emitDebugLineDWO();
+    emitDebugLocDWO();
     // Emit DWO addresses.
     AddrPool.emit(*Asm, Asm->getObjFileLowering().getDwarfAddrSection());
-    emitDebugLocDWO();
   } else
     // Emit info into a debug loc section.
     emitDebugLoc();
@@ -1047,27 +1070,51 @@ void DwarfDebug::endModule() {
 }
 
 // Find abstract variable, if any, associated with Var.
-DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &DV,
-                                              DebugLoc ScopeLoc) {
-  return findAbstractVariable(DV, ScopeLoc.getScope(DV->getContext()));
-}
-
-DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &DV,
-                                              const MDNode *ScopeNode) {
+DbgVariable *DwarfDebug::getExistingAbstractVariable(const DIVariable &DV,
+                                                     DIVariable &Cleansed) {
   LLVMContext &Ctx = DV->getContext();
   // More then one inlined variable corresponds to one abstract variable.
-  DIVariable Var = cleanseInlinedVariable(DV, Ctx);
-  auto I = AbstractVariables.find(Var);
+  // FIXME: This duplication of variables when inlining should probably be
+  // removed. It's done to allow each DIVariable to describe its location
+  // because the DebugLoc on the dbg.value/declare isn't accurate. We should
+  // make it accurate then remove this duplication/cleansing stuff.
+  Cleansed = cleanseInlinedVariable(DV, Ctx);
+  auto I = AbstractVariables.find(Cleansed);
   if (I != AbstractVariables.end())
     return I->second.get();
+  return nullptr;
+}
 
-  LexicalScope *Scope = LScopes.findAbstractScope(ScopeNode);
-  if (!Scope)
-    return nullptr;
+DbgVariable *DwarfDebug::getExistingAbstractVariable(const DIVariable &DV) {
+  DIVariable Cleansed;
+  return getExistingAbstractVariable(DV, Cleansed);
+}
 
-  auto AbsDbgVariable = make_unique<DbgVariable>(Var, nullptr, this);
+void DwarfDebug::createAbstractVariable(const DIVariable &Var,
+                                        LexicalScope *Scope) {
+  auto AbsDbgVariable = make_unique<DbgVariable>(Var, this);
   addScopeVariable(Scope, AbsDbgVariable.get());
-  return (AbstractVariables[Var] = std::move(AbsDbgVariable)).get();
+  AbstractVariables[Var] = std::move(AbsDbgVariable);
+}
+
+void DwarfDebug::ensureAbstractVariableIsCreated(const DIVariable &DV,
+                                                 const MDNode *ScopeNode) {
+  DIVariable Cleansed = DV;
+  if (getExistingAbstractVariable(DV, Cleansed))
+    return;
+
+  createAbstractVariable(Cleansed, LScopes.getOrCreateAbstractScope(ScopeNode));
+}
+
+void
+DwarfDebug::ensureAbstractVariableIsCreatedIfScoped(const DIVariable &DV,
+                                                    const MDNode *ScopeNode) {
+  DIVariable Cleansed = DV;
+  if (getExistingAbstractVariable(DV, Cleansed))
+    return;
+
+  if (LexicalScope *Scope = LScopes.findAbstractScope(ScopeNode))
+    createAbstractVariable(Cleansed, Scope);
 }
 
 // If Var is a current function argument then add it to CurrentFnArguments list.
@@ -1106,11 +1153,11 @@ void DwarfDebug::collectVariableInfoFromMMITable(
     if (!Scope)
       continue;
 
-    DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VI.Loc);
-    DbgVariable *RegVar = new DbgVariable(DV, AbsDbgVariable, this);
+    ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode());
+    ConcreteVariables.push_back(make_unique<DbgVariable>(DV, this));
+    DbgVariable *RegVar = ConcreteVariables.back().get();
     RegVar->setFrameIndex(VI.Slot);
-    if (!addCurrentFnArgument(RegVar, Scope))
-      addScopeVariable(Scope, RegVar);
+    addScopeVariable(Scope, RegVar);
   }
 }
 
@@ -1175,18 +1222,14 @@ DwarfDebug::collectVariableInfo(SmallPtrSet<const MDNode *, 16> &Processed) {
     Processed.insert(DV);
     const MachineInstr *MInsn = Ranges.front().first;
     assert(MInsn->isDebugValue() && "History must begin with debug value");
-    DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc());
-    DbgVariable *RegVar = new DbgVariable(DV, AbsVar, this);
-    if (!addCurrentFnArgument(RegVar, Scope))
-      addScopeVariable(Scope, RegVar);
-    if (AbsVar)
-      AbsVar->setMInsn(MInsn);
+    ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode());
+    ConcreteVariables.push_back(make_unique<DbgVariable>(MInsn, this));
+    DbgVariable *RegVar = ConcreteVariables.back().get();
+    addScopeVariable(Scope, RegVar);
 
     // Check if the first DBG_VALUE is valid for the rest of the function.
-    if (Ranges.size() == 1 && Ranges.front().second == nullptr) {
-      RegVar->setMInsn(MInsn);
+    if (Ranges.size() == 1 && Ranges.front().second == nullptr)
       continue;
-    }
 
     // Handle multiple DBG_VALUE instructions describing one variable.
     RegVar->setDotDebugLocOffset(DotDebugLocEntries.size());
@@ -1205,6 +1248,11 @@ DwarfDebug::collectVariableInfo(SmallPtrSet<const MDNode *, 16> &Processed) {
       if (Begin->getNumOperands() > 1 && Begin->getOperand(0).isReg() &&
           !Begin->getOperand(0).getReg())
         continue;
+      DEBUG(dbgs() << "DotDebugLoc Pair:\n" << "\t" << *Begin);
+      if (End != nullptr)
+        DEBUG(dbgs() << "\t" << *End);
+      else
+        DEBUG(dbgs() << "\tNULL\n");
 
       const MCSymbol *StartLabel = getLabelBeforeInsn(Begin);
       assert(StartLabel && "Forgot label before DBG_VALUE starting a range!");
@@ -1218,8 +1266,6 @@ DwarfDebug::collectVariableInfo(SmallPtrSet<const MDNode *, 16> &Processed) {
         EndLabel = getLabelBeforeInsn(std::next(I)->first);
       assert(EndLabel && "Forgot label after instruction ending a range!");
 
-      DEBUG(dbgs() << "DotDebugLoc Pair:\n"
-                   << "\t" << *Begin << "\t" << *End << "\n");
       DebugLocEntry Loc(StartLabel, EndLabel, getDebugLocValue(Begin), TheCU);
       if (DebugLoc.empty() || !DebugLoc.back().Merge(Loc))
         DebugLoc.push_back(std::move(Loc));
@@ -1233,11 +1279,11 @@ DwarfDebug::collectVariableInfo(SmallPtrSet<const MDNode *, 16> &Processed) {
     assert(DV.isVariable());
     if (!Processed.insert(DV))
       continue;
-    if (LexicalScope *Scope = LScopes.findLexicalScope(DV.getContext()))
-      addScopeVariable(
-          Scope,
-          new DbgVariable(DV, findAbstractVariable(DV, Scope->getScopeNode()),
-                          this));
+    if (LexicalScope *Scope = LScopes.findLexicalScope(DV.getContext())) {
+      ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode());
+      ConcreteVariables.push_back(make_unique<DbgVariable>(DV, this));
+      addScopeVariable(Scope, ConcreteVariables.back().get());
+    }
   }
 }
 
@@ -1371,6 +1417,10 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
   if (!MMI->hasDebugInfo())
     return;
 
+  auto DI = FunctionDIs.find(MF->getFunction());
+  if (DI == FunctionDIs.end())
+    return;
+
   // Grab the lexical scopes for the function, if we don't have any of those
   // then we're not going to be able to do anything.
   LScopes.initialize(*MF);
@@ -1386,6 +1436,14 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
   // belongs to so that we add to the correct per-cu line table in the
   // non-asm case.
   LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
+  // FnScope->getScopeNode() and DI->second should represent the same function,
+  // though they may not be the same MDNode due to inline functions merged in
+  // LTO where the debug info metadata still differs (either due to distinct
+  // written differences - two versions of a linkonce_odr function
+  // written/copied into two separate files, or some sub-optimal metadata that
+  // isn't structurally identical (see: file path/name info from clang, which
+  // includes the directory of the cpp file being built, even when the file name
+  // is absolute (such as an <> lookup header)))
   DwarfCompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode());
   assert(TheCU && "Unable to find compile unit!");
   if (Asm->OutStreamer.hasRawTextSupport())
@@ -1440,6 +1498,8 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
 }
 
 void DwarfDebug::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {
+  if (addCurrentFnArgument(Var, LS))
+    return;
   SmallVectorImpl<DbgVariable *> &Vars = ScopeVariables[LS];
   DIVariable DV = Var->getVariable();
   // Variables with positive arg numbers are parameters.
@@ -1481,7 +1541,8 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
     assert(CurFn == MF);
   assert(CurFn != nullptr);
 
-  if (!MMI->hasDebugInfo() || LScopes.empty()) {
+  if (!MMI->hasDebugInfo() || LScopes.empty() ||
+      !FunctionDIs.count(MF->getFunction())) {
     // If we don't have a lexical scope for this function then there will
     // be a hole in the range information. Keep note of this by setting the
     // previously used section to nullptr.
@@ -1517,7 +1578,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
       assert(DV && DV.isVariable());
       if (!ProcessedVars.insert(DV))
         continue;
-      findAbstractVariable(DV, DV.getContext());
+      ensureAbstractVariableIsCreated(DV, DV.getContext());
     }
     constructAbstractSubprogramScopeDIE(TheCU, AScope);
   }
@@ -1536,12 +1597,8 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
   // Ownership of DbgVariables is a bit subtle - ScopeVariables owns all the
   // DbgVariables except those that are also in AbstractVariables (since they
   // can be used cross-function)
-  for (const auto &I : ScopeVariables)
-    for (const auto *Var : I.second)
-      if (!AbstractVariables.count(Var->getVariable()) || Var->getAbstractVariable())
-        delete Var;
   ScopeVariables.clear();
-  DeleteContainerPointers(CurrentFnArguments);
+  CurrentFnArguments.clear();
   DbgValues.clear();
   LabelsBeforeInsn.clear();
   LabelsAfterInsn.clear();
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 2f5abc8..ffe4843 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -27,6 +27,7 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/MC/MachineLocation.h"
@@ -71,16 +72,21 @@ class DbgVariable {
   DIVariable Var;             // Variable Descriptor.
   DIE *TheDIE;                // Variable DIE.
   unsigned DotDebugLocOffset; // Offset in DotDebugLocEntries.
-  DbgVariable *AbsVar;        // Corresponding Abstract variable, if any.
   const MachineInstr *MInsn;  // DBG_VALUE instruction of the variable.
   int FrameIndex;
   DwarfDebug *DD;
 
 public:
-  // AbsVar may be NULL.
-  DbgVariable(DIVariable V, DbgVariable *AV, DwarfDebug *DD)
-      : Var(V), TheDIE(nullptr), DotDebugLocOffset(~0U), AbsVar(AV),
-        MInsn(nullptr), FrameIndex(~0), DD(DD) {}
+  /// Construct a DbgVariable from a DIVariable.
+  DbgVariable(DIVariable V, DwarfDebug *DD)
+      : Var(V), TheDIE(nullptr), DotDebugLocOffset(~0U), MInsn(nullptr),
+        FrameIndex(~0), DD(DD) {}
+
+  /// Construct a DbgVariable from a DEBUG_VALUE.
+  /// AbstractVar may be NULL.
+  DbgVariable(const MachineInstr *DbgValue, DwarfDebug *DD)
+      : Var(DbgValue->getDebugVariable()), TheDIE(nullptr),
+        DotDebugLocOffset(~0U), MInsn(DbgValue), FrameIndex(~0), DD(DD) {}
 
   // Accessors.
   DIVariable getVariable() const { return Var; }
@@ -89,9 +95,7 @@ public:
   void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; }
   unsigned getDotDebugLocOffset() const { return DotDebugLocOffset; }
   StringRef getName() const { return Var.getName(); }
-  DbgVariable *getAbstractVariable() const { return AbsVar; }
   const MachineInstr *getMInsn() const { return MInsn; }
-  void setMInsn(const MachineInstr *M) { MInsn = M; }
   int getFrameIndex() const { return FrameIndex; }
   void setFrameIndex(int FI) { FrameIndex = FI; }
   // Translate tag to proper Dwarf tag.
@@ -200,6 +204,7 @@ class DwarfDebug : public AsmPrinterHandler {
 
   // Collection of abstract variables.
   DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> AbstractVariables;
+  SmallVector<std::unique_ptr<DbgVariable>, 64> ConcreteVariables;
 
   // Collection of DebugLocEntry. Stored in a linked list so that DIELocLists
   // can refer to them in spite of insertions into this list.
@@ -325,6 +330,8 @@ class DwarfDebug : public AsmPrinterHandler {
   DwarfAccelTable AccelNamespace;
   DwarfAccelTable AccelTypes;
 
+  DenseMap<const Function *, DISubprogram> FunctionDIs;
+
   MCDwarfDwoLineTable *getDwoLineTable(const DwarfCompileUnit &);
 
   void addScopeVariable(LexicalScope *LS, DbgVariable *Var);
@@ -334,8 +341,14 @@ class DwarfDebug : public AsmPrinterHandler {
   }
 
   /// \brief Find abstract variable associated with Var.
-  DbgVariable *findAbstractVariable(DIVariable &Var, DebugLoc Loc);
-  DbgVariable *findAbstractVariable(DIVariable &Var, const MDNode *Scope);
+  DbgVariable *getExistingAbstractVariable(const DIVariable &DV,
+                                           DIVariable &Cleansed);
+  DbgVariable *getExistingAbstractVariable(const DIVariable &DV);
+  void createAbstractVariable(const DIVariable &DV, LexicalScope *Scope);
+  void ensureAbstractVariableIsCreated(const DIVariable &Var,
+                                       const MDNode *Scope);
+  void ensureAbstractVariableIsCreatedIfScoped(const DIVariable &Var,
+                                               const MDNode *Scope);
 
   /// \brief Find DIE for the given subprogram and attach appropriate
   /// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global
@@ -389,6 +402,8 @@ class DwarfDebug : public AsmPrinterHandler {
   /// \brief Collect info for variables that were optimized out.
   void collectDeadVariables();
 
+  void finishVariableDefinitions();
+
   void finishSubprogramDefinitions();
 
   /// \brief Finish off debug information after all functions have been
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
index f792482..0440fce 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -14,138 +14,14 @@
 #ifndef LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H
 #define LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H
 
-#include "AsmPrinterHandler.h"
-#include "llvm/ADT/DenseMap.h"
+#include "EHStreamer.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include <vector>
 
 namespace llvm {
-
-template <typename T> class SmallVectorImpl;
-struct LandingPadInfo;
-class MachineModuleInfo;
-class MachineInstr;
 class MachineFunction;
-class MCAsmInfo;
-class MCExpr;
-class MCSymbol;
-class Function;
 class ARMTargetStreamer;
-class AsmPrinter;
-
-//===----------------------------------------------------------------------===//
-/// DwarfException - Emits Dwarf exception handling directives.
-///
-class DwarfException : public AsmPrinterHandler {
-protected:
-  /// Asm - Target of Dwarf emission.
-  AsmPrinter *Asm;
-
-  /// MMI - Collected machine module information.
-  MachineModuleInfo *MMI;
-
-  /// SharedTypeIds - How many leading type ids two landing pads have in common.
-  static unsigned SharedTypeIds(const LandingPadInfo *L,
-                                const LandingPadInfo *R);
-
-  /// PadRange - Structure holding a try-range and the associated landing pad.
-  struct PadRange {
-    // The index of the landing pad.
-    unsigned PadIndex;
-    // The index of the begin and end labels in the landing pad's label lists.
-    unsigned RangeIndex;
-  };
-
-  typedef DenseMap<MCSymbol *, PadRange> RangeMapType;
-
-  /// ActionEntry - Structure describing an entry in the actions table.
-  struct ActionEntry {
-    int ValueForTypeID; // The value to write - may not be equal to the type id.
-    int NextAction;
-    unsigned Previous;
-  };
-
-  /// CallSiteEntry - Structure describing an entry in the call-site table.
-  struct CallSiteEntry {
-    // The 'try-range' is BeginLabel .. EndLabel.
-    MCSymbol *BeginLabel; // zero indicates the start of the function.
-    MCSymbol *EndLabel;   // zero indicates the end of the function.
-
-    // The landing pad starts at PadLabel.
-    MCSymbol *PadLabel;   // zero indicates that there is no landing pad.
-    unsigned Action;
-  };
-
-  /// ComputeActionsTable - Compute the actions table and gather the first
-  /// action index for each landing pad site.
-  unsigned ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*>&LPs,
-                               SmallVectorImpl<ActionEntry> &Actions,
-                               SmallVectorImpl<unsigned> &FirstActions);
-
-  /// CallToNoUnwindFunction - Return `true' if this is a call to a function
-  /// marked `nounwind'. Return `false' otherwise.
-  bool CallToNoUnwindFunction(const MachineInstr *MI);
-
-  /// ComputeCallSiteTable - Compute the call-site table.  The entry for an
-  /// invoke has a try-range containing the call, a non-zero landing pad and an
-  /// appropriate action.  The entry for an ordinary call has a try-range
-  /// containing the call and zero for the landing pad and the action.  Calls
-  /// marked 'nounwind' have no entry and must not be contained in the try-range
-  /// of any entry - they form gaps in the table.  Entries must be ordered by
-  /// try-range address.
-  void ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
-                            const RangeMapType &PadMap,
-                            const SmallVectorImpl<const LandingPadInfo *> &LPs,
-                            const SmallVectorImpl<unsigned> &FirstActions);
-
-  /// EmitExceptionTable - Emit landing pads and actions.
-  ///
-  /// The general organization of the table is complex, but the basic concepts
-  /// are easy.  First there is a header which describes the location and
-  /// organization of the three components that follow.
-  ///  1. The landing pad site information describes the range of code covered
-  ///     by the try.  In our case it's an accumulation of the ranges covered
-  ///     by the invokes in the try.  There is also a reference to the landing
-  ///     pad that handles the exception once processed.  Finally an index into
-  ///     the actions table.
-  ///  2. The action table, in our case, is composed of pairs of type ids
-  ///     and next action offset.  Starting with the action index from the
-  ///     landing pad site, each type Id is checked for a match to the current
-  ///     exception.  If it matches then the exception and type id are passed
-  ///     on to the landing pad.  Otherwise the next action is looked up.  This
-  ///     chain is terminated with a next action of zero.  If no type id is
-  ///     found the frame is unwound and handling continues.
-  ///  3. Type id table contains references to all the C++ typeinfo for all
-  ///     catches in the function.  This tables is reversed indexed base 1.
-  void EmitExceptionTable();
-
-  virtual void EmitTypeInfos(unsigned TTypeEncoding);
-
-public:
-  //===--------------------------------------------------------------------===//
-  // Main entry points.
-  //
-  DwarfException(AsmPrinter *A);
-  virtual ~DwarfException();
-
-  /// endModule - Emit all exception information that should come after the
-  /// content.
-  void endModule() override;
-
-  /// beginFunction - Gather pre-function exception information.  Assumes being
-  /// emitted immediately after the function entry point.
-  void beginFunction(const MachineFunction *MF) override;
-
-  /// endFunction - Gather and emit post-function exception information.
-  void endFunction(const MachineFunction *) override;
-
-  // We don't need these.
-  void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {}
-  void beginInstruction(const MachineInstr *MI) override {}
-  void endInstruction() override {}
-};
 
-class DwarfCFIException : public DwarfException {
+class DwarfCFIException : public EHStreamer {
   /// shouldEmitPersonality - Per-function flag to indicate if .cfi_personality
   /// should be emitted.
   bool shouldEmitPersonality;
@@ -179,8 +55,8 @@ public:
   void endFunction(const MachineFunction *) override;
 };
 
-class ARMException : public DwarfException {
-  void EmitTypeInfos(unsigned TTypeEncoding) override;
+class ARMException : public EHStreamer {
+  void emitTypeInfos(unsigned TTypeEncoding) override;
   ARMTargetStreamer &getTargetStreamer();
 
   /// shouldEmitCFI - Per-function flag to indicate if frame CFI info
@@ -206,7 +82,7 @@ public:
   void endFunction(const MachineFunction *) override;
 };
 
-class Win64Exception : public DwarfException {
+class Win64Exception : public EHStreamer {
   /// shouldEmitPersonality - Per-function flag to indicate if personality
   /// info should be emitted.
   bool shouldEmitPersonality;
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index a70c0f7..9538bee 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -1071,6 +1071,8 @@ std::string DwarfUnit::getParentContextString(DIScope Context) const {
        I != E; ++I) {
     DIScope Ctx = *I;
     StringRef Name = Ctx.getName();
+    if (Name.empty() && Ctx.isNameSpace())
+      Name = "(anonymous namespace)";
     if (!Name.empty()) {
       CS += Name;
       CS += "::";
@@ -1359,12 +1361,13 @@ DIE *DwarfUnit::getOrCreateNameSpace(DINameSpace NS) {
     return NDie;
   DIE &NDie = createAndAddDIE(dwarf::DW_TAG_namespace, *ContextDIE, NS);
 
-  if (!NS.getName().empty()) {
+  StringRef Name = NS.getName();
+  if (!Name.empty())
     addString(NDie, dwarf::DW_AT_name, NS.getName());
-    DD->addAccelNamespace(NS.getName(), NDie);
-    addGlobalName(NS.getName(), NDie, NS.getContext());
-  } else
-    DD->addAccelNamespace("(anonymous namespace)", NDie);
+  else
+    Name = "(anonymous namespace)";
+  DD->addAccelNamespace(Name, NDie);
+  addGlobalName(Name, NDie, NS.getContext());
   addSourceLine(NDie, NS);
   return &NDie;
 }
@@ -1382,14 +1385,14 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
   if (DISubprogram SPDecl = SP.getFunctionDeclaration()) {
     // Add subprogram definitions to the CU die directly.
     ContextDIE = &getUnitDie();
-    // Build the decl now to ensure it preceeds the definition.
+    // Build the decl now to ensure it precedes the definition.
     getOrCreateSubprogramDIE(SPDecl);
   }
 
   // DW_TAG_inlined_subroutine may refer to this DIE.
   DIE &SPDie = createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, SP);
 
-  // Abort here and fill this in later, depending on whether or not this
+  // Stop here and fill this in later, depending on whether or not this
   // subprogram turns out to have inlined instances or not.
   if (SP.isDefinition())
     return &SPDie;
@@ -1398,12 +1401,21 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
   return &SPDie;
 }
 
+void DwarfUnit::applySubprogramAttributesToDefinition(DISubprogram SP, DIE &SPDie) {
+  DISubprogram SPDecl = SP.getFunctionDeclaration();
+  DIScope Context = resolve(SPDecl ? SPDecl.getContext() : SP.getContext());
+  applySubprogramAttributes(SP, SPDie);
+  addGlobalName(SP.getName(), SPDie, Context);
+}
+
 void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie) {
   DIE *DeclDie = nullptr;
   StringRef DeclLinkageName;
   if (DISubprogram SPDecl = SP.getFunctionDeclaration()) {
     DeclDie = getDIE(SPDecl);
-    assert(DeclDie);
+    assert(DeclDie && "This DIE should've already been constructed when the "
+                      "definition DIE was created in "
+                      "getOrCreateSubprogramDIE");
     DeclLinkageName = SPDecl.getLinkageName();
   }
 
@@ -1502,6 +1514,17 @@ void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie) {
     addFlag(SPDie, dwarf::DW_AT_explicit);
 }
 
+void DwarfUnit::applyVariableAttributes(const DbgVariable &Var,
+                                        DIE &VariableDie) {
+  StringRef Name = Var.getName();
+  if (!Name.empty())
+    addString(VariableDie, dwarf::DW_AT_name, Name);
+  addSourceLine(VariableDie, Var.getVariable());
+  addType(VariableDie, Var.getType());
+  if (Var.isArtificial())
+    addFlag(VariableDie, dwarf::DW_AT_artificial);
+}
+
 // Return const expression if value is a GEP to access merged global
 // constant. e.g.
 // i8* getelementptr ({ i8, i8, i8, i8 }* @_MergedGlobals, i32 0, i32 0)
@@ -1665,10 +1688,8 @@ void DwarfCompileUnit::createGlobalVariableDIE(DIGlobalVariable GV) {
       DD->addAccelName(GV.getLinkageName(), AddrDIE);
   }
 
-  if (!GV.isLocalToUnit())
-    addGlobalName(GV.getName(),
-                  VariableSpecDIE ? *VariableSpecDIE : *VariableDIE,
-                  GV.getContext());
+  addGlobalName(GV.getName(), VariableSpecDIE ? *VariableSpecDIE : *VariableDIE,
+                GV.getContext());
 }
 
 /// constructSubrangeDIE - Construct subrange DIE from DISubrange.
@@ -1777,24 +1798,13 @@ std::unique_ptr<DIE> DwarfUnit::constructVariableDIE(DbgVariable &DV,
 
 std::unique_ptr<DIE> DwarfUnit::constructVariableDIEImpl(const DbgVariable &DV,
                                                          bool Abstract) {
-  StringRef Name = DV.getName();
-
   // Define variable debug information entry.
   auto VariableDie = make_unique<DIE>(DV.getTag());
-  DbgVariable *AbsVar = DV.getAbstractVariable();
-  if (AbsVar && AbsVar->getDIE())
-    addDIEEntry(*VariableDie, dwarf::DW_AT_abstract_origin, *AbsVar->getDIE());
-  else {
-    if (!Name.empty())
-      addString(*VariableDie, dwarf::DW_AT_name, Name);
-    addSourceLine(*VariableDie, DV.getVariable());
-    addType(*VariableDie, DV.getType());
-    if (DV.isArtificial())
-      addFlag(*VariableDie, dwarf::DW_AT_artificial);
-  }
 
-  if (Abstract)
+  if (Abstract) {
+    applyVariableAttributes(DV, *VariableDie);
     return VariableDie;
+  }
 
   // Add variable address.
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h
index acb7528..b7b83b2 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -400,6 +400,8 @@ public:
   DIE *getOrCreateSubprogramDIE(DISubprogram SP);
 
   void applySubprogramAttributes(DISubprogram SP, DIE &SPDie);
+  void applySubprogramAttributesToDefinition(DISubprogram SP, DIE &SPDie);
+  void applyVariableAttributes(const DbgVariable &Var, DIE &VariableDie);
 
   /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
   /// given DIType.
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index 3a12c73..73f62bf 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -1,4 +1,4 @@
-//===-- CodeGen/AsmPrinter/DwarfException.cpp - Dwarf Exception Impl ------===//
+//===-- CodeGen/AsmPrinter/EHStreamer.cpp - Exception Directive Streamer --===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,45 +7,31 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file contains support for writing DWARF exception info into asm files.
+// This file contains support for writing exception info into assembly files.
 //
 //===----------------------------------------------------------------------===//
 
-#include "DwarfException.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Twine.h"
+#include "EHStreamer.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Mangler.h"
-#include "llvm/IR/Module.h"
+#include "llvm/IR/Function.h"
 #include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/Dwarf.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/LEB128.h"
-#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+
 using namespace llvm;
 
-DwarfException::DwarfException(AsmPrinter *A)
-  : Asm(A), MMI(Asm->MMI) {}
+EHStreamer::EHStreamer(AsmPrinter *A) : Asm(A), MMI(Asm->MMI) {}
 
-DwarfException::~DwarfException() {}
+EHStreamer::~EHStreamer() {}
 
-/// SharedTypeIds - How many leading type ids two landing pads have in common.
-unsigned DwarfException::SharedTypeIds(const LandingPadInfo *L,
-                                       const LandingPadInfo *R) {
+/// How many leading type ids two landing pads have in common.
+unsigned EHStreamer::sharedTypeIDs(const LandingPadInfo *L,
+                                   const LandingPadInfo *R) {
   const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds;
   unsigned LSize = LIds.size(), RSize = RIds.size();
   unsigned MinSize = LSize < RSize ? LSize : RSize;
@@ -58,10 +44,10 @@ unsigned DwarfException::SharedTypeIds(const LandingPadInfo *L,
   return Count;
 }
 
-/// ComputeActionsTable - Compute the actions table and gather the first action
-/// index for each landing pad site.
-unsigned DwarfException::
-ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
+/// Compute the actions table and gather the first action index for each landing
+/// pad site.
+unsigned EHStreamer::
+computeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
                     SmallVectorImpl<ActionEntry> &Actions,
                     SmallVectorImpl<unsigned> &FirstActions) {
 
@@ -109,7 +95,7 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
          I = LandingPads.begin(), E = LandingPads.end(); I != E; ++I) {
     const LandingPadInfo *LPI = *I;
     const std::vector<int> &TypeIds = LPI->TypeIds;
-    unsigned NumShared = PrevLPI ? SharedTypeIds(LPI, PrevLPI) : 0;
+    unsigned NumShared = PrevLPI ? sharedTypeIDs(LPI, PrevLPI) : 0;
     unsigned SizeSiteActions = 0;
 
     if (NumShared < TypeIds.size()) {
@@ -167,9 +153,9 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
   return SizeActions;
 }
 
-/// CallToNoUnwindFunction - Return `true' if this is a call to a function
-/// marked `nounwind'. Return `false' otherwise.
-bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) {
+/// Return `true' if this is a call to a function marked `nounwind'. Return
+/// `false' otherwise.
+bool EHStreamer::callToNoUnwindFunction(const MachineInstr *MI) {
   assert(MI->isCall() && "This should be a call instruction!");
 
   bool MarkedNoUnwind = false;
@@ -201,15 +187,14 @@ bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) {
   return MarkedNoUnwind;
 }
 
-/// ComputeCallSiteTable - Compute the call-site table.  The entry for an invoke
-/// has a try-range containing the call, a non-zero landing pad, and an
-/// appropriate action.  The entry for an ordinary call has a try-range
-/// containing the call and zero for the landing pad and the action.  Calls
-/// marked 'nounwind' have no entry and must not be contained in the try-range
-/// of any entry - they form gaps in the table.  Entries must be ordered by
-/// try-range address.
-void DwarfException::
-ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
+/// Compute the call-site table.  The entry for an invoke has a try-range
+/// containing the call, a non-zero landing pad, and an appropriate action.  The
+/// entry for an ordinary call has a try-range containing the call and zero for
+/// the landing pad and the action.  Calls marked 'nounwind' have no entry and
+/// must not be contained in the try-range of any entry - they form gaps in the
+/// table.  Entries must be ordered by try-range address.
+void EHStreamer::
+computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
                      const RangeMapType &PadMap,
                      const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
                      const SmallVectorImpl<unsigned> &FirstActions) {
@@ -228,7 +213,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
     for (const auto &MI : MBB) {
       if (!MI.isEHLabel()) {
         if (MI.isCall())
-          SawPotentiallyThrowing |= !CallToNoUnwindFunction(&MI);
+          SawPotentiallyThrowing |= !callToNoUnwindFunction(&MI);
         continue;
       }
 
@@ -308,7 +293,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
   }
 }
 
-/// EmitExceptionTable - Emit landing pads and actions.
+/// Emit landing pads and actions.
 ///
 /// The general organization of the table is complex, but the basic concepts are
 /// easy.  First there is a header which describes the location and organization
@@ -328,7 +313,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
 ///     unwound and handling continues.
 ///  3. Type ID table contains references to all the C++ typeinfo for all
 ///     catches in the function.  This tables is reverse indexed base 1.
-void DwarfException::EmitExceptionTable() {
+void EHStreamer::emitExceptionTable() {
   const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
   const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
   const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
@@ -350,7 +335,8 @@ void DwarfException::EmitExceptionTable() {
   // landing pad site.
   SmallVector<ActionEntry, 32> Actions;
   SmallVector<unsigned, 64> FirstActions;
-  unsigned SizeActions=ComputeActionsTable(LandingPads, Actions, FirstActions);
+  unsigned SizeActions =
+    computeActionsTable(LandingPads, Actions, FirstActions);
 
   // Invokes and nounwind calls have entries in PadMap (due to being bracketed
   // by try-range labels when lowered).  Ordinary calls do not, so appropriate
@@ -368,7 +354,7 @@ void DwarfException::EmitExceptionTable() {
 
   // Compute the call-site table.
   SmallVector<CallSiteEntry, 64> CallSites;
-  ComputeCallSiteTable(CallSites, PadMap, LandingPads, FirstActions);
+  computeCallSiteTable(CallSites, PadMap, LandingPads, FirstActions);
 
   // Final tallies.
 
@@ -657,12 +643,12 @@ void DwarfException::EmitExceptionTable() {
     Asm->EmitSLEB128(Action.NextAction);
   }
 
-  EmitTypeInfos(TTypeEncoding);
+  emitTypeInfos(TTypeEncoding);
 
   Asm->EmitAlignment(2);
 }
 
-void DwarfException::EmitTypeInfos(unsigned TTypeEncoding) {
+void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) {
   const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
   const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
 
@@ -703,19 +689,18 @@ void DwarfException::EmitTypeInfos(unsigned TTypeEncoding) {
   }
 }
 
-/// endModule - Emit all exception information that should come after the
-/// content.
-void DwarfException::endModule() {
+/// Emit all exception information that should come after the content.
+void EHStreamer::endModule() {
   llvm_unreachable("Should be implemented");
 }
 
-/// beginFunction - Gather pre-function exception information. Assumes it's
-/// being emitted immediately after the function entry point.
-void DwarfException::beginFunction(const MachineFunction *MF) {
+/// Gather pre-function exception information. Assumes it's being emitted
+/// immediately after the function entry point.
+void EHStreamer::beginFunction(const MachineFunction *MF) {
   llvm_unreachable("Should be implemented");
 }
 
-/// endFunction - Gather and emit post-function exception information.
-void DwarfException::endFunction(const MachineFunction *) {
+/// Gather and emit post-function exception information.
+void EHStreamer::endFunction(const MachineFunction *) {
   llvm_unreachable("Should be implemented");
 }
diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.h b/lib/CodeGen/AsmPrinter/EHStreamer.h
new file mode 100644
index 0000000..2b6ba78
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/EHStreamer.h
@@ -0,0 +1,138 @@
+//===-- EHStreamer.h - Exception Handling Directive Streamer ---*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing exception info into assembly files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_ASMPRINTER_EHSTREAMER_H
+#define LLVM_CODEGEN_ASMPRINTER_EHSTREAMER_H
+
+#include "AsmPrinterHandler.h"
+#include "llvm/ADT/DenseMap.h"
+
+namespace llvm {
+struct LandingPadInfo;
+class MachineModuleInfo;
+class MachineInstr;
+class MachineFunction;
+class AsmPrinter;
+
+template <typename T>
+class SmallVectorImpl;
+
+/// Emits exception handling directives.
+class EHStreamer : public AsmPrinterHandler {
+protected:
+  /// Target of directive emission.
+  AsmPrinter *Asm;
+
+  /// Collected machine module information.
+  MachineModuleInfo *MMI;
+
+  /// How many leading type ids two landing pads have in common.
+  static unsigned sharedTypeIDs(const LandingPadInfo *L,
+                                const LandingPadInfo *R);
+
+  /// Structure holding a try-range and the associated landing pad.
+  struct PadRange {
+    // The index of the landing pad.
+    unsigned PadIndex;
+    // The index of the begin and end labels in the landing pad's label lists.
+    unsigned RangeIndex;
+  };
+
+  typedef DenseMap<MCSymbol *, PadRange> RangeMapType;
+
+  /// Structure describing an entry in the actions table.
+  struct ActionEntry {
+    int ValueForTypeID; // The value to write - may not be equal to the type id.
+    int NextAction;
+    unsigned Previous;
+  };
+
+  /// Structure describing an entry in the call-site table.
+  struct CallSiteEntry {
+    // The 'try-range' is BeginLabel .. EndLabel.
+    MCSymbol *BeginLabel; // zero indicates the start of the function.
+    MCSymbol *EndLabel;   // zero indicates the end of the function.
+
+    // The landing pad starts at PadLabel.
+    MCSymbol *PadLabel;   // zero indicates that there is no landing pad.
+    unsigned Action;
+  };
+
+  /// Compute the actions table and gather the first action index for each
+  /// landing pad site.
+  unsigned computeActionsTable(const SmallVectorImpl<const LandingPadInfo*>&LPs,
+                               SmallVectorImpl<ActionEntry> &Actions,
+                               SmallVectorImpl<unsigned> &FirstActions);
+
+  /// Return `true' if this is a call to a function marked `nounwind'. Return
+  /// `false' otherwise.
+  bool callToNoUnwindFunction(const MachineInstr *MI);
+
+  /// Compute the call-site table.  The entry for an invoke has a try-range
+  /// containing the call, a non-zero landing pad and an appropriate action.
+  /// The entry for an ordinary call has a try-range containing the call and
+  /// zero for the landing pad and the action.  Calls marked 'nounwind' have
+  /// no entry and must not be contained in the try-range of any entry - they
+  /// form gaps in the table.  Entries must be ordered by try-range address.
+
+  void computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
+                            const RangeMapType &PadMap,
+                            const SmallVectorImpl<const LandingPadInfo *> &LPs,
+                            const SmallVectorImpl<unsigned> &FirstActions);
+
+  /// Emit landing pads and actions.
+  ///
+  /// The general organization of the table is complex, but the basic concepts
+  /// are easy.  First there is a header which describes the location and
+  /// organization of the three components that follow.
+  ///  1. The landing pad site information describes the range of code covered
+  ///     by the try.  In our case it's an accumulation of the ranges covered
+  ///     by the invokes in the try.  There is also a reference to the landing
+  ///     pad that handles the exception once processed.  Finally an index into
+  ///     the actions table.
+  ///  2. The action table, in our case, is composed of pairs of type ids
+  ///     and next action offset.  Starting with the action index from the
+  ///     landing pad site, each type Id is checked for a match to the current
+  ///     exception.  If it matches then the exception and type id are passed
+  ///     on to the landing pad.  Otherwise the next action is looked up.  This
+  ///     chain is terminated with a next action of zero.  If no type id is
+  ///     found the frame is unwound and handling continues.
+  ///  3. Type id table contains references to all the C++ typeinfo for all
+  ///     catches in the function.  This tables is reversed indexed base 1.
+  void emitExceptionTable();
+
+  virtual void emitTypeInfos(unsigned TTypeEncoding);
+
+public:
+  EHStreamer(AsmPrinter *A);
+  virtual ~EHStreamer();
+
+  /// Emit all exception information that should come after the content.
+  void endModule() override;
+
+  /// Gather pre-function exception information.  Assumes being emitted
+  /// immediately after the function entry point.
+  void beginFunction(const MachineFunction *MF) override;
+
+  /// Gather and emit post-function exception information.
+  void endFunction(const MachineFunction *) override;
+
+  // Unused.
+  void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {}
+  void beginInstruction(const MachineInstr *MI) override {}
+  void endInstruction() override {}
+};
+}
+
+#endif
+
diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/lib/CodeGen/AsmPrinter/Win64Exception.cpp
index 17d8bff..81285d5 100644
--- a/lib/CodeGen/AsmPrinter/Win64Exception.cpp
+++ b/lib/CodeGen/AsmPrinter/Win64Exception.cpp
@@ -38,9 +38,8 @@
 using namespace llvm;
 
 Win64Exception::Win64Exception(AsmPrinter *A)
-  : DwarfException(A),
-    shouldEmitPersonality(false), shouldEmitLSDA(false), shouldEmitMoves(false)
-    {}
+  : EHStreamer(A), shouldEmitPersonality(false), shouldEmitLSDA(false),
+    shouldEmitMoves(false) {}
 
 Win64Exception::~Win64Exception() {}
 
@@ -73,14 +72,14 @@ void Win64Exception::beginFunction(const MachineFunction *MF) {
   if (!shouldEmitPersonality && !shouldEmitMoves)
     return;
 
-  Asm->OutStreamer.EmitWin64EHStartProc(Asm->CurrentFnSym);
+  Asm->OutStreamer.EmitWinCFIStartProc(Asm->CurrentFnSym);
 
   if (!shouldEmitPersonality)
     return;
 
-  MCSymbol *GCCHandlerSym =
-    Asm->GetExternalSymbolSymbol("_GCC_specific_handler");
-  Asm->OutStreamer.EmitWin64EHHandler(GCCHandlerSym, true, true);
+  const MCSymbol *PersHandlerSym =
+      TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI);
+  Asm->OutStreamer.EmitWinEHHandler(PersHandlerSym, true, true);
 
   Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
                                                 Asm->getFunctionNumber()));
@@ -99,17 +98,10 @@ void Win64Exception::endFunction(const MachineFunction *) {
   MMI->TidyLandingPads();
 
   if (shouldEmitPersonality) {
-    const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
-    const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()];
-    const MCSymbol *Sym =
-        TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI);
-
     Asm->OutStreamer.PushSection();
-    Asm->OutStreamer.EmitWin64EHHandlerData();
-    Asm->OutStreamer.EmitValue(MCSymbolRefExpr::Create(Sym, Asm->OutContext),
-                               4);
-    EmitExceptionTable();
+    Asm->OutStreamer.EmitWinEHHandlerData();
+    emitExceptionTable();
     Asm->OutStreamer.PopSection();
   }
-  Asm->OutStreamer.EmitWin64EHEndProc();
+  Asm->OutStreamer.EmitWinCFIEndProc();
 }
diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
index 2212941..6a5c431 100644
--- a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
+++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
@@ -308,7 +308,7 @@ void WinCodeViewLineTables::endFunction(const MachineFunction *MF) {
     return;
 
   const Function *GV = MF->getFunction();
-  assert(FnDebugInfo.count(GV) == true);
+  assert(FnDebugInfo.count(GV));
   assert(CurFn == &FnDebugInfo[GV]);
 
   if (CurFn->Instrs.empty()) {
diff --git a/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp b/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp
index d995333..421946d 100644
--- a/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp
+++ b/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp
@@ -21,17 +21,19 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
 using namespace llvm;
 
 #define DEBUG_TYPE "arm-atomic-expand"
 
 namespace {
   class AtomicExpandLoadLinked : public FunctionPass {
-    const TargetLowering *TLI;
+    const TargetMachine *TM;
   public:
     static char ID; // Pass identification, replacement for typeid
     explicit AtomicExpandLoadLinked(const TargetMachine *TM = nullptr)
-      : FunctionPass(ID), TLI(TM ? TM->getTargetLowering() : nullptr) {
+      : FunctionPass(ID), TM(TM) {
       initializeAtomicExpandLoadLinkedPass(*PassRegistry::getPassRegistry());
     }
 
@@ -50,29 +52,16 @@ namespace {
 
 char AtomicExpandLoadLinked::ID = 0;
 char &llvm::AtomicExpandLoadLinkedID = AtomicExpandLoadLinked::ID;
-
-static void *initializeAtomicExpandLoadLinkedPassOnce(PassRegistry &Registry) {
-  PassInfo *PI = new PassInfo(
-      "Expand Atomic calls in terms of load-linked & store-conditional",
-      "atomic-ll-sc", &AtomicExpandLoadLinked::ID,
-      PassInfo::NormalCtor_t(callDefaultCtor<AtomicExpandLoadLinked>), false,
-      false, PassInfo::TargetMachineCtor_t(
-                 callTargetMachineCtor<AtomicExpandLoadLinked>));
-  Registry.registerPass(*PI, true);
-  return PI;
-}
-
-void llvm::initializeAtomicExpandLoadLinkedPass(PassRegistry &Registry) {
-  CALL_ONCE_INITIALIZATION(initializeAtomicExpandLoadLinkedPassOnce)
-}
-
+INITIALIZE_TM_PASS(AtomicExpandLoadLinked, "atomic-ll-sc",
+    "Expand Atomic calls in terms of load-linked & store-conditional",
+    false, false)
 
 FunctionPass *llvm::createAtomicExpandLoadLinkedPass(const TargetMachine *TM) {
   return new AtomicExpandLoadLinked(TM);
 }
 
 bool AtomicExpandLoadLinked::runOnFunction(Function &F) {
-  if (!TLI)
+  if (!TM || !TM->getSubtargetImpl()->enableAtomicExpandLoadLinked())
     return false;
 
   SmallVector<Instruction *, 1> AtomicInsts;
@@ -89,7 +78,7 @@ bool AtomicExpandLoadLinked::runOnFunction(Function &F) {
 
   bool MadeChange = false;
   for (Instruction *Inst : AtomicInsts) {
-    if (!TLI->shouldExpandAtomicInIR(Inst))
+    if (!TM->getTargetLowering()->shouldExpandAtomicInIR(Inst))
       continue;
 
     if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst))
@@ -111,13 +100,14 @@ bool AtomicExpandLoadLinked::expandAtomicLoad(LoadInst *LI) {
   // Load instructions don't actually need a leading fence, even in the
   // SequentiallyConsistent case.
   AtomicOrdering MemOpOrder =
-    TLI->getInsertFencesForAtomic() ? Monotonic : LI->getOrdering();
+      TM->getTargetLowering()->getInsertFencesForAtomic() ? Monotonic
+                                                          : LI->getOrdering();
 
   // The only 64-bit load guaranteed to be single-copy atomic by the ARM ARM is
   // an ldrexd (A3.5.3).
   IRBuilder<> Builder(LI);
-  Value *Val =
-      TLI->emitLoadLinked(Builder, LI->getPointerOperand(), MemOpOrder);
+  Value *Val = TM->getTargetLowering()->emitLoadLinked(
+      Builder, LI->getPointerOperand(), MemOpOrder);
 
   insertTrailingFence(Builder, LI->getOrdering());
 
@@ -178,7 +168,8 @@ bool AtomicExpandLoadLinked::expandAtomicRMW(AtomicRMWInst *AI) {
 
   // Start the main loop block now that we've taken care of the preliminaries.
   Builder.SetInsertPoint(LoopBB);
-  Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
+  Value *Loaded =
+      TM->getTargetLowering()->emitLoadLinked(Builder, Addr, MemOpOrder);
 
   Value *NewVal;
   switch (AI->getOperation()) {
@@ -195,7 +186,7 @@ bool AtomicExpandLoadLinked::expandAtomicRMW(AtomicRMWInst *AI) {
     NewVal = Builder.CreateAnd(Loaded, AI->getValOperand(), "new");
     break;
   case AtomicRMWInst::Nand:
-    NewVal = Builder.CreateAnd(Loaded, Builder.CreateNot(AI->getValOperand()),
+    NewVal = Builder.CreateNot(Builder.CreateAnd(Loaded, AI->getValOperand()),
                                "new");
     break;
   case AtomicRMWInst::Or:
@@ -224,8 +215,8 @@ bool AtomicExpandLoadLinked::expandAtomicRMW(AtomicRMWInst *AI) {
     llvm_unreachable("Unknown atomic op");
   }
 
-  Value *StoreSuccess =
-      TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
+  Value *StoreSuccess = TM->getTargetLowering()->emitStoreConditional(
+      Builder, NewVal, Addr, MemOpOrder);
   Value *TryAgain = Builder.CreateICmpNE(
       StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
   Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
@@ -256,19 +247,26 @@ bool AtomicExpandLoadLinked::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
   //     %loaded = @load.linked(%addr)
   //     %should_store = icmp eq %loaded, %desired
   //     br i1 %should_store, label %cmpxchg.trystore,
-  //                          label %cmpxchg.end/%cmpxchg.barrier
+  //                          label %cmpxchg.failure
   // cmpxchg.trystore:
   //     %stored = @store_conditional(%new, %addr)
-  //     %try_again = icmp i32 ne %stored, 0
-  //     br i1 %try_again, label %loop, label %cmpxchg.end
-  // cmpxchg.barrier:
+  //     %success = icmp eq i32 %stored, 0
+  //     br i1 %success, label %cmpxchg.success, label %loop/%cmpxchg.failure
+  // cmpxchg.success:
+  //     fence?
+  //     br label %cmpxchg.end
+  // cmpxchg.failure:
   //     fence?
   //     br label %cmpxchg.end
   // cmpxchg.end:
+  //     %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
+  //     %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
+  //     %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
   //     [...]
   BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end");
-  auto BarrierBB = BasicBlock::Create(Ctx, "cmpxchg.barrier", F, ExitBB);
-  auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, BarrierBB);
+  auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
+  auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, FailureBB);
+  auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB);
   auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB);
 
   // This grabs the DebugLoc from CI
@@ -284,37 +282,82 @@ bool AtomicExpandLoadLinked::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
 
   // Start the main loop block now that we've taken care of the preliminaries.
   Builder.SetInsertPoint(LoopBB);
-  Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
+  Value *Loaded =
+      TM->getTargetLowering()->emitLoadLinked(Builder, Addr, MemOpOrder);
   Value *ShouldStore =
       Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store");
 
   // If the the cmpxchg doesn't actually need any ordering when it fails, we can
   // jump straight past that fence instruction (if it exists).
-  BasicBlock *FailureBB = FailureOrder == Monotonic ? ExitBB : BarrierBB;
   Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB);
 
   Builder.SetInsertPoint(TryStoreBB);
-  Value *StoreSuccess = TLI->emitStoreConditional(
+  Value *StoreSuccess = TM->getTargetLowering()->emitStoreConditional(
       Builder, CI->getNewValOperand(), Addr, MemOpOrder);
-  Value *TryAgain = Builder.CreateICmpNE(
+  StoreSuccess = Builder.CreateICmpEQ(
       StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
-  Builder.CreateCondBr(TryAgain, LoopBB, BarrierBB);
+  Builder.CreateCondBr(StoreSuccess, SuccessBB,
+                       CI->isWeak() ? FailureBB : LoopBB);
 
-  // Finally, make sure later instructions don't get reordered with a fence if
-  // necessary.
-  Builder.SetInsertPoint(BarrierBB);
+  // Make sure later instructions don't get reordered with a fence if necessary.
+  Builder.SetInsertPoint(SuccessBB);
   insertTrailingFence(Builder, SuccessOrder);
   Builder.CreateBr(ExitBB);
 
-  CI->replaceAllUsesWith(Loaded);
-  CI->eraseFromParent();
+  Builder.SetInsertPoint(FailureBB);
+  insertTrailingFence(Builder, FailureOrder);
+  Builder.CreateBr(ExitBB);
+
+  // Finally, we have control-flow based knowledge of whether the cmpxchg
+  // succeeded or not. We expose this to later passes by converting any
+  // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate PHI.
 
+  // Setup the builder so we can create any PHIs we need.
+  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
+  PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2);
+  Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
+  Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
+
+  // Look for any users of the cmpxchg that are just comparing the loaded value
+  // against the desired one, and replace them with the CFG-derived version.
+  SmallVector<ExtractValueInst *, 2> PrunedInsts;
+  for (auto User : CI->users()) {
+    ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
+    if (!EV)
+      continue;
+
+    assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
+           "weird extraction from { iN, i1 }");
+
+    if (EV->getIndices()[0] == 0)
+      EV->replaceAllUsesWith(Loaded);
+    else
+      EV->replaceAllUsesWith(Success);
+
+    PrunedInsts.push_back(EV);
+  }
+
+  // We can remove the instructions now we're no longer iterating through them.
+  for (auto EV : PrunedInsts)
+    EV->eraseFromParent();
+
+  if (!CI->use_empty()) {
+    // Some use of the full struct return that we don't understand has happened,
+    // so we've got to reconstruct it properly.
+    Value *Res;
+    Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0);
+    Res = Builder.CreateInsertValue(Res, Success, 1);
+
+    CI->replaceAllUsesWith(Res);
+  }
+
+  CI->eraseFromParent();
   return true;
 }
 
 AtomicOrdering AtomicExpandLoadLinked::insertLeadingFence(IRBuilder<> &Builder,
                                                        AtomicOrdering Ord) {
-  if (!TLI->getInsertFencesForAtomic())
+  if (!TM->getTargetLowering()->getInsertFencesForAtomic())
     return Ord;
 
   if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
@@ -327,7 +370,7 @@ AtomicOrdering AtomicExpandLoadLinked::insertLeadingFence(IRBuilder<> &Builder,
 
 void AtomicExpandLoadLinked::insertTrailingFence(IRBuilder<> &Builder,
                                               AtomicOrdering Ord) {
-  if (!TLI->getInsertFencesForAtomic())
+  if (!TM->getTargetLowering()->getInsertFencesForAtomic())
     return;
 
   if (Ord == Acquire || Ord == AcquireRelease)
diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp
index 7f31b1a..b2737bf 100644
--- a/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -39,6 +39,9 @@ class BasicTTI final : public ImmutablePass, public TargetTransformInfo {
   /// are set if the result needs to be inserted and/or extracted from vectors.
   unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
 
+  /// Estimate the cost overhead of SK_Alternate shuffle.
+  unsigned getAltShuffleOverhead(Type *Ty) const;
+
   const TargetLoweringBase *getTLI() const { return TM->getTargetLowering(); }
 
 public:
@@ -327,8 +330,28 @@ unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
   return OpCost;
 }
 
+unsigned BasicTTI::getAltShuffleOverhead(Type *Ty) const {
+  assert(Ty->isVectorTy() && "Can only shuffle vectors");
+  unsigned Cost = 0;
+  // Shuffle cost is equal to the cost of extracting element from its argument
+  // plus the cost of inserting them onto the result vector.
+
+  // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from index
+  // 0 of first vector, index 1 of second vector,index 2 of first vector and
+  // finally index 3 of second vector and insert them at index <0,1,2,3> of
+  // result vector.
+  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
+    Cost += TopTTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
+    Cost += TopTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
+  }
+  return Cost;
+}
+
 unsigned BasicTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
                                   Type *SubTp) const {
+  if (Kind == SK_Alternate) {
+    return getAltShuffleOverhead(Tp);
+  }
   return 1;
 }
 
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index f623a48..7503e57 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -1505,10 +1505,17 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
     if (MO.isUse()) {
       for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
         Uses.insert(*AI);
-    } else if (!MO.isDead())
-      // Don't try to hoist code in the rare case the terminator defines a
-      // register that is later used.
-      return MBB->end();
+    } else {
+      if (!MO.isDead())
+        // Don't try to hoist code in the rare case the terminator defines a
+        // register that is later used.
+        return MBB->end();
+
+      // If the terminator defines a register, make sure we don't hoist
+      // the instruction whose def might be clobbered by the terminator.
+      for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+        Defs.insert(*AI);
+    }
   }
 
   if (Uses.empty())
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 0b492a9..57c24e8 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -22,11 +22,13 @@ add_llvm_library(LLVMCodeGen
   GCMetadata.cpp
   GCMetadataPrinter.cpp
   GCStrategy.cpp
+  GlobalMerge.cpp
   IfConversion.cpp
   InlineSpiller.cpp
   InterferenceCache.cpp
   IntrinsicLowering.cpp
   JITCodeEmitter.cpp
+  JumpInstrTables.cpp
   LLVMTargetMachine.cpp
   LatencyPriorityQueue.cpp
   LexicalScopes.cpp
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index 6aa60c6..ccac40c 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -151,19 +151,8 @@ typedef DenseMap<Instruction *, Type *> InstrToOrigTy;
 }
 
 char CodeGenPrepare::ID = 0;
-static void *initializeCodeGenPreparePassOnce(PassRegistry &Registry) {
-  initializeTargetLibraryInfoPass(Registry);
-  PassInfo *PI = new PassInfo(
-      "Optimize for code generation", "codegenprepare", &CodeGenPrepare::ID,
-      PassInfo::NormalCtor_t(callDefaultCtor<CodeGenPrepare>), false, false,
-      PassInfo::TargetMachineCtor_t(callTargetMachineCtor<CodeGenPrepare>));
-  Registry.registerPass(*PI, true);
-  return PI;
-}
-
-void llvm::initializeCodeGenPreparePass(PassRegistry &Registry) {
-  CALL_ONCE_INITIALIZATION(initializeCodeGenPreparePassOnce)
-}
+INITIALIZE_TM_PASS(CodeGenPrepare, "codegenprepare",
+                   "Optimize for code generation", false, false)
 
 FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) {
   return new CodeGenPrepare(TM);
@@ -1078,8 +1067,11 @@ void ExtAddrMode::print(raw_ostream &OS) const {
     NeedPlus = true;
   }
 
-  if (BaseOffs)
-    OS << (NeedPlus ? " + " : "") << BaseOffs, NeedPlus = true;
+  if (BaseOffs) {
+    OS << (NeedPlus ? " + " : "")
+       << BaseOffs;
+    NeedPlus = true;
+  }
 
   if (BaseReg) {
     OS << (NeedPlus ? " + " : "")
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 822636f..d3ffcc7 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -146,8 +146,8 @@ static const SDep *CriticalPathStep(const SUnit *SU) {
 
 void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
   // It's not safe to change register allocation for source operands of
-  // that have special allocation requirements. Also assume all registers
-  // used in a call must not be changed (ABI).
+  // instructions that have special allocation requirements. Also assume all
+  // registers used in a call must not be changed (ABI).
   // FIXME: The issue with predicated instruction is more complex. We are being
   // conservative here because the kill markers cannot be trusted after
   // if-conversion:
@@ -200,6 +200,28 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
     if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1))
       RegRefs.insert(std::make_pair(Reg, &MO));
 
+    // If this reg is tied and live (Classes[Reg] is set to -1), we can't change
+    // it or any of its sub or super regs. We need to use KeepRegs to mark the
+    // reg because not all uses of the same reg within an instruction are
+    // necessarily tagged as tied.
+    // Example: an x86 "xor %eax, %eax" will have one source operand tied to the
+    // def register but not the second (see PR20020 for details).
+    // FIXME: can this check be relaxed to account for undef uses
+    // of a register? In the above 'xor' example, the uses of %eax are undef, so
+    // earlier instructions could still replace %eax even though the 'xor'
+    // itself can't be changed.
+    if (MI->isRegTiedToUseOperand(i) &&
+        Classes[Reg] == reinterpret_cast<TargetRegisterClass *>(-1)) {
+      for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
+           SubRegs.isValid(); ++SubRegs) {
+        KeepRegs.set(*SubRegs);
+      }
+      for (MCSuperRegIterator SuperRegs(Reg, TRI);
+           SuperRegs.isValid(); ++SuperRegs) {
+        KeepRegs.set(*SuperRegs);
+      }
+    }
+
     if (MO.isUse() && Special) {
       if (!KeepRegs.test(Reg)) {
         for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
@@ -236,9 +258,15 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
       unsigned Reg = MO.getReg();
       if (Reg == 0) continue;
       if (!MO.isDef()) continue;
+
+      // If we've already marked this reg as unchangeable, carry on.
+      if (KeepRegs.test(Reg)) continue;
+      
       // Ignore two-addr defs.
       if (MI->isRegTiedToUseOperand(i)) continue;
 
+      // FIXME: we should use a SubRegIterator that includes self (as above), so
+      // we don't have to repeat all this code for the reg itself.
       DefIndices[Reg] = Count;
       KillIndices[Reg] = ~0u;
       assert(((KillIndices[Reg] == ~0u) !=
@@ -281,6 +309,9 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
 
     RegRefs.insert(std::make_pair(Reg, &MO));
 
+    // FIXME: we should use an MCRegAliasIterator that includes self so we don't
+    // have to repeat all this code for the reg itself.
+    
     // It wasn't previously live but now it is, this is a kill.
     if (KillIndices[Reg] == ~0u) {
       KillIndices[Reg] = Count;
@@ -309,7 +340,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
 // the two-address instruction also defines NewReg, as may happen with
 // pre/postincrement loads. In this case, both the use and def operands are in
 // RegRefs because the def is inserted by PrescanInstruction and not erased
-// during ScanInstruction. So checking for an instructions with definitions of
+// during ScanInstruction. So checking for an instruction with definitions of
 // both NewReg and AntiDepReg covers it.
 bool
 CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin,
@@ -325,7 +356,7 @@ CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin,
     if (RefOper->isDef() && RefOper->isEarlyClobber())
       return true;
 
-    // Handle cases in which this instructions defines NewReg.
+    // Handle cases in which this instruction defines NewReg.
     MachineInstr *MI = RefOper->getParent();
     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
       const MachineOperand &CheckOper = MI->getOperand(i);
@@ -343,11 +374,11 @@ CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin,
         return true;
 
       // Don't allow an instruction using AntiDepReg to be earlyclobbered by
-      // NewReg
+      // NewReg.
       if (CheckOper.isEarlyClobber())
         return true;
 
-      // Don't allow inline asm to define NewReg at all. Who know what it's
+      // Don't allow inline asm to define NewReg at all. Who knows what it's
       // doing with it.
       if (MI->isInlineAsm())
         return true;
@@ -494,8 +525,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
   // as we go to help determine which registers are available.
   unsigned Broken = 0;
   unsigned Count = InsertPosIndex - 1;
-  for (MachineBasicBlock::iterator I = End, E = Begin;
-       I != E; --Count) {
+  for (MachineBasicBlock::iterator I = End, E = Begin; I != E; --Count) {
     MachineInstr *MI = --I;
     if (MI->isDebugValue())
       continue;
@@ -526,7 +556,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
             // Don't break anti-dependencies on non-allocatable registers.
             AntiDepReg = 0;
           else if (KeepRegs.test(AntiDepReg))
-            // Don't break anti-dependencies if an use down below requires
+            // Don't break anti-dependencies if a use down below requires
             // this exact register.
             AntiDepReg = 0;
           else {
@@ -564,8 +594,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
     // If MI's defs have a special allocation requirement, don't allow
     // any def registers to be changed. Also assume all registers
     // defined in a call must not be changed (ABI).
-    if (MI->isCall() || MI->hasExtraDefRegAllocReq() ||
-        TII->isPredicated(MI))
+    if (MI->isCall() || MI->hasExtraDefRegAllocReq() || TII->isPredicated(MI))
       // If this instruction's defs have special allocation requirement, don't
       // break this anti-dependency.
       AntiDepReg = 0;
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
index 1949a48..45e4ff5 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -55,12 +55,12 @@ class TargetRegisterInfo;
     typedef std::multimap<unsigned, MachineOperand *>::const_iterator
       RegRefIter;
 
-    /// KillIndices - The index of the most recent kill (proceding bottom-up),
+    /// KillIndices - The index of the most recent kill (proceeding bottom-up),
     /// or ~0u if the register is not live.
     std::vector<unsigned> KillIndices;
 
-    /// DefIndices - The index of the most recent complete def (proceding bottom
-    /// up), or ~0u if the register is live.
+    /// DefIndices - The index of the most recent complete def (proceeding
+    /// bottom up), or ~0u if the register is live.
     std::vector<unsigned> DefIndices;
 
     /// KeepRegs - A set of registers which are live and cannot be changed to
diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp
new file mode 100644
index 0000000..027ee38
--- /dev/null
+++ b/lib/CodeGen/GlobalMerge.cpp
@@ -0,0 +1,361 @@
+//===-- GlobalMerge.cpp - Internal globals merging  -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This pass merges globals with internal linkage into one. This way all the
+// globals which were merged into a biggest one can be addressed using offsets
+// from the same base pointer (no need for separate base pointer for each of the
+// global). Such a transformation can significantly reduce the register pressure
+// when many globals are involved.
+//
+// For example, consider the code which touches several global variables at
+// once:
+//
+// static int foo[N], bar[N], baz[N];
+//
+// for (i = 0; i < N; ++i) {
+//    foo[i] = bar[i] * baz[i];
+// }
+//
+//  On ARM the addresses of 3 arrays should be kept in the registers, thus
+//  this code has quite large register pressure (loop body):
+//
+//  ldr     r1, [r5], #4
+//  ldr     r2, [r6], #4
+//  mul     r1, r2, r1
+//  str     r1, [r0], #4
+//
+//  Pass converts the code to something like:
+//
+//  static struct {
+//    int foo[N];
+//    int bar[N];
+//    int baz[N];
+//  } merged;
+//
+//  for (i = 0; i < N; ++i) {
+//    merged.foo[i] = merged.bar[i] * merged.baz[i];
+//  }
+//
+//  and in ARM code this becomes:
+//
+//  ldr     r0, [r5, #40]
+//  ldr     r1, [r5, #80]
+//  mul     r0, r1, r0
+//  str     r0, [r5], #4
+//
+//  note that we saved 2 registers here almostly "for free".
+// ===---------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "global-merge"
+
+cl::opt<bool>
+EnableGlobalMerge("enable-global-merge", cl::Hidden,
+                  cl::desc("Enable global merge pass"),
+                  cl::init(true));
+
+static cl::opt<bool>
+EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden,
+                         cl::desc("Enable global merge pass on constants"),
+                         cl::init(false));
+
+// FIXME: this could be a transitional option, and we probably need to remove
+// it if only we are sure this optimization could always benefit all targets.
+static cl::opt<bool>
+EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden,
+     cl::desc("Enable global merge pass on external linkage"),
+     cl::init(false));
+
+STATISTIC(NumMerged      , "Number of globals merged");
+namespace {
+  class GlobalMerge : public FunctionPass {
+    const TargetMachine *TM;
+
+    bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
+                 Module &M, bool isConst, unsigned AddrSpace) const;
+
+    /// \brief Check if the given variable has been identified as must keep
+    /// \pre setMustKeepGlobalVariables must have been called on the Module that
+    ///      contains GV
+    bool isMustKeepGlobalVariable(const GlobalVariable *GV) const {
+      return MustKeepGlobalVariables.count(GV);
+    }
+
+    /// Collect every variables marked as "used" or used in a landing pad
+    /// instruction for this Module.
+    void setMustKeepGlobalVariables(Module &M);
+
+    /// Collect every variables marked as "used"
+    void collectUsedGlobalVariables(Module &M);
+
+    /// Keep track of the GlobalVariable that must not be merged away
+    SmallPtrSet<const GlobalVariable *, 16> MustKeepGlobalVariables;
+
+  public:
+    static char ID;             // Pass identification, replacement for typeid.
+    explicit GlobalMerge(const TargetMachine *TM = nullptr)
+      : FunctionPass(ID), TM(TM) {
+      initializeGlobalMergePass(*PassRegistry::getPassRegistry());
+    }
+
+    bool doInitialization(Module &M) override;
+    bool runOnFunction(Function &F) override;
+    bool doFinalization(Module &M) override;
+
+    const char *getPassName() const override {
+      return "Merge internal globals";
+    }
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.setPreservesCFG();
+      FunctionPass::getAnalysisUsage(AU);
+    }
+  };
+} // end anonymous namespace
+
+char GlobalMerge::ID = 0;
+INITIALIZE_TM_PASS(GlobalMerge, "global-merge", "Merge global variables",
+                   false, false)
+
+bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
+                          Module &M, bool isConst, unsigned AddrSpace) const {
+  const TargetLowering *TLI = TM->getTargetLowering();
+  const DataLayout *DL = TLI->getDataLayout();
+
+  // FIXME: Infer the maximum possible offset depending on the actual users
+  // (these max offsets are different for the users inside Thumb or ARM
+  // functions)
+  unsigned MaxOffset = TLI->getMaximalGlobalOffset();
+
+  // FIXME: Find better heuristics
+  std::stable_sort(Globals.begin(), Globals.end(),
+                   [DL](const GlobalVariable *GV1, const GlobalVariable *GV2) {
+    Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType();
+    Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType();
+
+    return (DL->getTypeAllocSize(Ty1) < DL->getTypeAllocSize(Ty2));
+  });
+
+  Type *Int32Ty = Type::getInt32Ty(M.getContext());
+
+  assert(Globals.size() > 1);
+
+  // FIXME: This simple solution merges globals all together as maximum as
+  // possible. However, with this solution it would be hard to remove dead
+  // global symbols at link-time. An alternative solution could be checking
+  // global symbols references function by function, and make the symbols
+  // being referred in the same function merged and we would probably need
+  // to introduce heuristic algorithm to solve the merge conflict from
+  // different functions.
+  for (size_t i = 0, e = Globals.size(); i != e; ) {
+    size_t j = 0;
+    uint64_t MergedSize = 0;
+    std::vector<Type*> Tys;
+    std::vector<Constant*> Inits;
+
+    bool HasExternal = false;
+    GlobalVariable *TheFirstExternal = 0;
+    for (j = i; j != e; ++j) {
+      Type *Ty = Globals[j]->getType()->getElementType();
+      MergedSize += DL->getTypeAllocSize(Ty);
+      if (MergedSize > MaxOffset) {
+        break;
+      }
+      Tys.push_back(Ty);
+      Inits.push_back(Globals[j]->getInitializer());
+
+      if (Globals[j]->hasExternalLinkage() && !HasExternal) {
+        HasExternal = true;
+        TheFirstExternal = Globals[j];
+      }
+    }
+
+    // If merged variables doesn't have external linkage, we needn't to expose
+    // the symbol after merging.
+    GlobalValue::LinkageTypes Linkage = HasExternal
+                                            ? GlobalValue::ExternalLinkage
+                                            : GlobalValue::InternalLinkage;
+
+    StructType *MergedTy = StructType::get(M.getContext(), Tys);
+    Constant *MergedInit = ConstantStruct::get(MergedTy, Inits);
+
+    // If merged variables have external linkage, we use symbol name of the
+    // first variable merged as the suffix of global symbol name. This would
+    // be able to avoid the link-time naming conflict for globalm symbols.
+    GlobalVariable *MergedGV = new GlobalVariable(
+        M, MergedTy, isConst, Linkage, MergedInit,
+        HasExternal ? "_MergedGlobals_" + TheFirstExternal->getName()
+                    : "_MergedGlobals",
+        nullptr, GlobalVariable::NotThreadLocal, AddrSpace);
+
+    for (size_t k = i; k < j; ++k) {
+      GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage();
+      std::string Name = Globals[k]->getName();
+
+      Constant *Idx[2] = {
+        ConstantInt::get(Int32Ty, 0),
+        ConstantInt::get(Int32Ty, k-i)
+      };
+      Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx);
+      Globals[k]->replaceAllUsesWith(GEP);
+      Globals[k]->eraseFromParent();
+
+      if (Linkage != GlobalValue::InternalLinkage) {
+        // Generate a new alias...
+        auto *PTy = cast<PointerType>(GEP->getType());
+        GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(),
+                            Linkage, Name, GEP, &M);
+      }
+
+      NumMerged++;
+    }
+    i = j;
+  }
+
+  return true;
+}
+
+void GlobalMerge::collectUsedGlobalVariables(Module &M) {
+  // Extract global variables from llvm.used array
+  const GlobalVariable *GV = M.getGlobalVariable("llvm.used");
+  if (!GV || !GV->hasInitializer()) return;
+
+  // Should be an array of 'i8*'.
+  const ConstantArray *InitList = cast<ConstantArray>(GV->getInitializer());
+
+  for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
+    if (const GlobalVariable *G =
+        dyn_cast<GlobalVariable>(InitList->getOperand(i)->stripPointerCasts()))
+      MustKeepGlobalVariables.insert(G);
+}
+
+void GlobalMerge::setMustKeepGlobalVariables(Module &M) {
+  collectUsedGlobalVariables(M);
+
+  for (Module::iterator IFn = M.begin(), IEndFn = M.end(); IFn != IEndFn;
+       ++IFn) {
+    for (Function::iterator IBB = IFn->begin(), IEndBB = IFn->end();
+         IBB != IEndBB; ++IBB) {
+      // Follow the invoke link to find the landing pad instruction
+      const InvokeInst *II = dyn_cast<InvokeInst>(IBB->getTerminator());
+      if (!II) continue;
+
+      const LandingPadInst *LPInst = II->getUnwindDest()->getLandingPadInst();
+      // Look for globals in the clauses of the landing pad instruction
+      for (unsigned Idx = 0, NumClauses = LPInst->getNumClauses();
+           Idx != NumClauses; ++Idx)
+        if (const GlobalVariable *GV =
+            dyn_cast<GlobalVariable>(LPInst->getClause(Idx)
+                                     ->stripPointerCasts()))
+          MustKeepGlobalVariables.insert(GV);
+    }
+  }
+}
+
+bool GlobalMerge::doInitialization(Module &M) {
+  if (!EnableGlobalMerge)
+    return false;
+
+  DenseMap<unsigned, SmallVector<GlobalVariable*, 16> > Globals, ConstGlobals,
+                                                        BSSGlobals;
+  const TargetLowering *TLI = TM->getTargetLowering();
+  const DataLayout *DL = TLI->getDataLayout();
+  unsigned MaxOffset = TLI->getMaximalGlobalOffset();
+  bool Changed = false;
+  setMustKeepGlobalVariables(M);
+
+  // Grab all non-const globals.
+  for (Module::global_iterator I = M.global_begin(),
+         E = M.global_end(); I != E; ++I) {
+    // Merge is safe for "normal" internal or external globals only
+    if (I->isDeclaration() || I->isThreadLocal() || I->hasSection())
+      continue;
+
+    if (!(EnableGlobalMergeOnExternal && I->hasExternalLinkage()) &&
+        !I->hasInternalLinkage())
+      continue;
+
+    PointerType *PT = dyn_cast<PointerType>(I->getType());
+    assert(PT && "Global variable is not a pointer!");
+
+    unsigned AddressSpace = PT->getAddressSpace();
+
+    // Ignore fancy-aligned globals for now.
+    unsigned Alignment = DL->getPreferredAlignment(I);
+    Type *Ty = I->getType()->getElementType();
+    if (Alignment > DL->getABITypeAlignment(Ty))
+      continue;
+
+    // Ignore all 'special' globals.
+    if (I->getName().startswith("llvm.") ||
+        I->getName().startswith(".llvm."))
+      continue;
+
+    // Ignore all "required" globals:
+    if (isMustKeepGlobalVariable(I))
+      continue;
+
+    if (DL->getTypeAllocSize(Ty) < MaxOffset) {
+      if (TargetLoweringObjectFile::getKindForGlobal(I, *TM).isBSSLocal())
+        BSSGlobals[AddressSpace].push_back(I);
+      else if (I->isConstant())
+        ConstGlobals[AddressSpace].push_back(I);
+      else
+        Globals[AddressSpace].push_back(I);
+    }
+  }
+
+  for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator
+       I = Globals.begin(), E = Globals.end(); I != E; ++I)
+    if (I->second.size() > 1)
+      Changed |= doMerge(I->second, M, false, I->first);
+
+  for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator
+       I = BSSGlobals.begin(), E = BSSGlobals.end(); I != E; ++I)
+    if (I->second.size() > 1)
+      Changed |= doMerge(I->second, M, false, I->first);
+
+  if (EnableGlobalMergeOnConst)
+    for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator
+         I = ConstGlobals.begin(), E = ConstGlobals.end(); I != E; ++I)
+      if (I->second.size() > 1)
+        Changed |= doMerge(I->second, M, true, I->first);
+
+  return Changed;
+}
+
+bool GlobalMerge::runOnFunction(Function &F) {
+  return false;
+}
+
+bool GlobalMerge::doFinalization(Module &M) {
+  MustKeepGlobalVariables.clear();
+  return false;
+}
+
+Pass *llvm::createGlobalMergePass(const TargetMachine *TM) {
+  return new GlobalMerge(TM);
+}
diff --git a/lib/CodeGen/JumpInstrTables.cpp b/lib/CodeGen/JumpInstrTables.cpp
new file mode 100644
index 0000000..61ef722
--- /dev/null
+++ b/lib/CodeGen/JumpInstrTables.cpp
@@ -0,0 +1,301 @@
+//===-- JumpInstrTables.cpp: Jump-Instruction Tables ----------------------===//
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief An implementation of jump-instruction tables.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jt"
+
+#include "llvm/CodeGen/JumpInstrTables.h"
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/JumpInstrTableInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <vector>
+
+using namespace llvm;
+
+char JumpInstrTables::ID = 0;
+
+INITIALIZE_PASS_BEGIN(JumpInstrTables, "jump-instr-tables",
+                      "Jump-Instruction Tables", true, true)
+INITIALIZE_PASS_DEPENDENCY(JumpInstrTableInfo);
+INITIALIZE_PASS_END(JumpInstrTables, "jump-instr-tables",
+                    "Jump-Instruction Tables", true, true)
+
+STATISTIC(NumJumpTables, "Number of indirect call tables generated");
+STATISTIC(NumFuncsInJumpTables, "Number of functions in the jump tables");
+
+ModulePass *llvm::createJumpInstrTablesPass() {
+  // The default implementation uses a single table for all functions.
+  return new JumpInstrTables(JumpTable::Single);
+}
+
+ModulePass *llvm::createJumpInstrTablesPass(JumpTable::JumpTableType JTT) {
+  return new JumpInstrTables(JTT);
+}
+
+namespace {
+static const char jump_func_prefix[] = "__llvm_jump_instr_table_";
+static const char jump_section_prefix[] = ".jump.instr.table.text.";
+
+// Checks to see if a given CallSite is making an indirect call, including
+// cases where the indirect call is made through a bitcast.
+bool isIndirectCall(CallSite &CS) {
+  if (CS.getCalledFunction())
+    return false;
+
+  // Check the value to see if it is merely a bitcast of a function. In
+  // this case, it will translate to a direct function call in the resulting
+  // assembly, so we won't treat it as an indirect call here.
+  const Value *V = CS.getCalledValue();
+  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+    return !(CE->isCast() && isa<Function>(CE->getOperand(0)));
+  }
+
+  // Otherwise, since we know it's a call, it must be an indirect call
+  return true;
+}
+
+// Replaces Functions and GlobalAliases with a different Value.
+bool replaceGlobalValueIndirectUse(GlobalValue *GV, Value *V, Use *U) {
+  User *Us = U->getUser();
+  if (!Us)
+    return false;
+  if (Instruction *I = dyn_cast<Instruction>(Us)) {
+    CallSite CS(I);
+
+    // Don't do the replacement if this use is a direct call to this function.
+    // If the use is not the called value, then replace it.
+    if (CS && (isIndirectCall(CS) || CS.isCallee(U))) {
+      return false;
+    }
+
+    U->set(V);
+  } else if (Constant *C = dyn_cast<Constant>(Us)) {
+    // Don't replace calls to bitcasts of function symbols, since they get
+    // translated to direct calls.
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Us)) {
+      if (CE->getOpcode() == Instruction::BitCast) {
+        // This bitcast must have exactly one user.
+        if (CE->user_begin() != CE->user_end()) {
+          User *ParentUs = *CE->user_begin();
+          if (CallInst *CI = dyn_cast<CallInst>(ParentUs)) {
+            CallSite CS(CI);
+            Use &CEU = *CE->use_begin();
+            if (CS.isCallee(&CEU)) {
+              return false;
+            }
+          }
+        }
+      }
+    }
+
+    // GlobalAlias doesn't support replaceUsesOfWithOnConstant. And the verifier
+    // requires alias to point to a defined function. So, GlobalAlias is handled
+    // as a separate case in runOnModule.
+    if (!isa<GlobalAlias>(C))
+      C->replaceUsesOfWithOnConstant(GV, V, U);
+  } else {
+    assert(false && "The Use of a Function symbol is neither an instruction nor"
+                    " a constant");
+  }
+
+  return true;
+}
+
+// Replaces all replaceable address-taken uses of GV with a pointer to a
+// jump-instruction table entry.
+void replaceValueWithFunction(GlobalValue *GV, Function *F) {
+  // Go through all uses of this function and replace the uses of GV with the
+  // jump-table version of the function. Get the uses as a vector before
+  // replacing them, since replacing them changes the use list and invalidates
+  // the iterator otherwise.
+  for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E;) {
+    Use &U = *I++;
+
+    // Replacement of constants replaces all instances in the constant. So, some
+    // uses might have already been handled by the time we reach them here.
+    if (U.get() == GV)
+      replaceGlobalValueIndirectUse(GV, F, &U);
+  }
+
+  return;
+}
+} // end anonymous namespace
+
+JumpInstrTables::JumpInstrTables()
+    : ModulePass(ID), Metadata(), JITI(nullptr), TableCount(0),
+      JTType(JumpTable::Single) {
+  initializeJumpInstrTablesPass(*PassRegistry::getPassRegistry());
+}
+
+JumpInstrTables::JumpInstrTables(JumpTable::JumpTableType JTT)
+    : ModulePass(ID), Metadata(), JITI(nullptr), TableCount(0), JTType(JTT) {
+  initializeJumpInstrTablesPass(*PassRegistry::getPassRegistry());
+}
+
+JumpInstrTables::~JumpInstrTables() {}
+
+void JumpInstrTables::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<JumpInstrTableInfo>();
+}
+
+Function *JumpInstrTables::insertEntry(Module &M, Function *Target) {
+  FunctionType *OrigFunTy = Target->getFunctionType();
+  FunctionType *FunTy = transformType(OrigFunTy);
+
+  JumpMap::iterator it = Metadata.find(FunTy);
+  if (Metadata.end() == it) {
+    struct TableMeta Meta;
+    Meta.TableNum = TableCount;
+    Meta.Count = 0;
+    Metadata[FunTy] = Meta;
+    it = Metadata.find(FunTy);
+    ++NumJumpTables;
+    ++TableCount;
+  }
+
+  it->second.Count++;
+
+  std::string NewName(jump_func_prefix);
+  NewName += (Twine(it->second.TableNum) + "_" + Twine(it->second.Count)).str();
+  Function *JumpFun =
+      Function::Create(OrigFunTy, GlobalValue::ExternalLinkage, NewName, &M);
+  // The section for this table
+  JumpFun->setSection((jump_section_prefix + Twine(it->second.TableNum)).str());
+  JITI->insertEntry(FunTy, Target, JumpFun);
+
+  ++NumFuncsInJumpTables;
+  return JumpFun;
+}
+
+bool JumpInstrTables::hasTable(FunctionType *FunTy) {
+  FunctionType *TransTy = transformType(FunTy);
+  return Metadata.end() != Metadata.find(TransTy);
+}
+
+FunctionType *JumpInstrTables::transformType(FunctionType *FunTy) {
+  // Returning nullptr forces all types into the same table, since all types map
+  // to the same type
+  Type *VoidPtrTy = Type::getInt8PtrTy(FunTy->getContext());
+
+  // Ignore the return type.
+  Type *RetTy = VoidPtrTy;
+  bool IsVarArg = FunTy->isVarArg();
+  std::vector<Type *> ParamTys(FunTy->getNumParams());
+  FunctionType::param_iterator PI, PE;
+  int i = 0;
+
+  std::vector<Type *> EmptyParams;
+  Type *Int32Ty = Type::getInt32Ty(FunTy->getContext());
+  FunctionType *VoidFnTy = FunctionType::get(
+      Type::getVoidTy(FunTy->getContext()), EmptyParams, false);
+  switch (JTType) {
+  case JumpTable::Single:
+
+    return FunctionType::get(RetTy, EmptyParams, false);
+  case JumpTable::Arity:
+    // Transform all types to void* so that all functions with the same arity
+    // end up in the same table.
+    for (PI = FunTy->param_begin(), PE = FunTy->param_end(); PI != PE;
+         PI++, i++) {
+      ParamTys[i] = VoidPtrTy;
+    }
+
+    return FunctionType::get(RetTy, ParamTys, IsVarArg);
+  case JumpTable::Simplified:
+    // Project all parameters types to one of 3 types: composite, integer, and
+    // function, matching the three subclasses of Type.
+    for (PI = FunTy->param_begin(), PE = FunTy->param_end(); PI != PE;
+         ++PI, ++i) {
+      assert((isa<IntegerType>(*PI) || isa<FunctionType>(*PI) ||
+              isa<CompositeType>(*PI)) &&
+             "This type is not an Integer or a Composite or a Function");
+      if (isa<CompositeType>(*PI)) {
+        ParamTys[i] = VoidPtrTy;
+      } else if (isa<FunctionType>(*PI)) {
+        ParamTys[i] = VoidFnTy;
+      } else if (isa<IntegerType>(*PI)) {
+        ParamTys[i] = Int32Ty;
+      }
+    }
+
+    return FunctionType::get(RetTy, ParamTys, IsVarArg);
+  case JumpTable::Full:
+    // Don't transform this type at all.
+    return FunTy;
+  }
+
+  return nullptr;
+}
+
+bool JumpInstrTables::runOnModule(Module &M) {
+  // Make sure the module is well-formed, especially with respect to jumptable.
+  if (verifyModule(M))
+    return false;
+
+  JITI = &getAnalysis<JumpInstrTableInfo>();
+
+  // Get the set of jumptable-annotated functions.
+  DenseMap<Function *, Function *> Functions;
+  for (Function &F : M) {
+    if (F.hasFnAttribute(Attribute::JumpTable)) {
+      assert(F.hasUnnamedAddr() &&
+             "Attribute 'jumptable' requires 'unnamed_addr'");
+      Functions[&F] = nullptr;
+    }
+  }
+
+  // Create the jump-table functions.
+  for (auto &KV : Functions) {
+    Function *F = KV.first;
+    KV.second = insertEntry(M, F);
+  }
+
+  // GlobalAlias is a special case, because the target of an alias statement
+  // must be a defined function. So, instead of replacing a given function in
+  // the alias, we replace all uses of aliases that target jumptable functions.
+  // Note that there's no need to create these functions, since only aliases
+  // that target known jumptable functions are replaced, and there's no way to
+  // put the jumptable annotation on a global alias.
+  DenseMap<GlobalAlias *, Function *> Aliases;
+  for (GlobalAlias &GA : M.aliases()) {
+    Constant *Aliasee = GA.getAliasee();
+    if (Function *F = dyn_cast<Function>(Aliasee)) {
+      auto it = Functions.find(F);
+      if (it != Functions.end()) {
+        Aliases[&GA] = it->second;
+      }
+    }
+  }
+
+  // Replace each address taken function with its jump-instruction table entry.
+  for (auto &KV : Functions)
+    replaceValueWithFunction(KV.first, KV.second);
+
+  for (auto &KV : Aliases)
+    replaceValueWithFunction(KV.first, KV.second);
+
+  return !Functions.empty();
+}
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index a5ac057..df96b94 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -12,11 +12,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Target/TargetMachine.h"
+
+#include "llvm/Analysis/Passes.h"
 #include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/JumpInstrTables.h"
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/IR/Verifier.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCInstrInfo.h"
@@ -82,6 +86,7 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM,
                                           bool DisableVerify,
                                           AnalysisID StartAfter,
                                           AnalysisID StopAfter) {
+
   // Add internal analysis passes from the target machine.
   TM->addAnalysisPasses(PM);
 
@@ -136,6 +141,11 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
                                             bool DisableVerify,
                                             AnalysisID StartAfter,
                                             AnalysisID StopAfter) {
+  // Passes to handle jumptable function annotations. These can't be handled at
+  // JIT time, so we don't add them directly to addPassesToGenerateCode.
+  PM.add(createJumpInstrTableInfoPass());
+  PM.add(createJumpInstrTablesPass(Options.JTType));
+
   // Add common CodeGen passes.
   MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify,
                                                StartAfter, StopAfter);
@@ -199,7 +209,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
   case CGFT_Null:
     // The Null output is intended for use for performance analysis and testing,
     // not real users.
-    AsmStreamer.reset(createNullStreamer(*Context));
+    AsmStreamer.reset(getTarget().createNullStreamer(*Context));
     break;
   }
 
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index 388f58f..7d5646b 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -329,12 +329,13 @@ class LDVImpl {
   void computeIntervals();
 
 public:
-  LDVImpl(LiveDebugVariables *ps) : pass(*ps), EmitDone(false),
-                                    ModifiedMF(false) {}
+  LDVImpl(LiveDebugVariables *ps)
+      : pass(*ps), MF(nullptr), EmitDone(false), ModifiedMF(false) {}
   bool runOnMachineFunction(MachineFunction &mf);
 
   /// clear - Release all memory.
   void clear() {
+    MF = nullptr;
     userValues.clear();
     virtRegToEqClass.clear();
     userVarMap.clear();
@@ -693,11 +694,11 @@ void LDVImpl::computeIntervals() {
 }
 
 bool LDVImpl::runOnMachineFunction(MachineFunction &mf) {
+  clear();
   MF = &mf;
   LIS = &pass.getAnalysis<LiveIntervals>();
   MDT = &pass.getAnalysis<MachineDominatorTree>();
   TRI = mf.getTarget().getRegisterInfo();
-  clear();
   LS.initialize(mf);
   DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: "
                << mf.getName() << " **********\n");
@@ -712,6 +713,8 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf) {
 bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) {
   if (!EnableLDV)
     return false;
+  if (!FunctionDIs.count(mf.getFunction()))
+    return false;
   if (!pImpl)
     pImpl = new LDVImpl(this);
   return static_cast<LDVImpl*>(pImpl)->runOnMachineFunction(mf);
@@ -974,6 +977,8 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
 
 void LDVImpl::emitDebugValues(VirtRegMap *VRM) {
   DEBUG(dbgs() << "********** EMITTING LIVE DEBUG VARIABLES **********\n");
+  if (!MF)
+    return;
   const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
   for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
     DEBUG(userValues[i]->print(dbgs(), &MF->getTarget()));
@@ -988,6 +993,10 @@ void LiveDebugVariables::emitDebugValues(VirtRegMap *VRM) {
     static_cast<LDVImpl*>(pImpl)->emitDebugValues(VRM);
 }
 
+bool LiveDebugVariables::doInitialization(Module &M) {
+  FunctionDIs = makeSubprogramMap(M);
+  return Pass::doInitialization(M);
+}
 
 #ifndef NDEBUG
 void LiveDebugVariables::dump() {
diff --git a/lib/CodeGen/LiveDebugVariables.h b/lib/CodeGen/LiveDebugVariables.h
index bb67435..7ec0d17 100644
--- a/lib/CodeGen/LiveDebugVariables.h
+++ b/lib/CodeGen/LiveDebugVariables.h
@@ -22,6 +22,7 @@
 #define LLVM_CODEGEN_LIVEDEBUGVARIABLES_H
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 
 namespace llvm {
@@ -32,6 +33,7 @@ class VirtRegMap;
 
 class LiveDebugVariables : public MachineFunctionPass {
   void *pImpl;
+  DenseMap<const Function*, DISubprogram> FunctionDIs;
 public:
   static char ID; // Pass identification, replacement for typeid
 
@@ -64,6 +66,7 @@ private:
   bool runOnMachineFunction(MachineFunction &) override;
   void releaseMemory() override;
   void getAnalysisUsage(AnalysisUsage &) const override;
+  bool doInitialization(Module &) override;
 
 };
 
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 3563f8e..1559560 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -186,6 +186,7 @@ void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) {
   LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
   LRCalc->createDeadDefs(LI);
   LRCalc->extendToUses(LI);
+  computeDeadValues(&LI, LI, nullptr, nullptr);
 }
 
 void LiveIntervals::computeVirtRegs() {
@@ -412,21 +413,34 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
 
   // Handle dead values.
   bool CanSeparate = false;
+  computeDeadValues(li, NewLR, &CanSeparate, dead);
+
+  // Move the trimmed segments back.
+  li->segments.swap(NewLR.segments);
+  DEBUG(dbgs() << "Shrunk: " << *li << '\n');
+  return CanSeparate;
+}
+
+void LiveIntervals::computeDeadValues(LiveInterval *li,
+                                      LiveRange &LR,
+                                      bool *CanSeparate,
+                                      SmallVectorImpl<MachineInstr*> *dead) {
   for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end();
        I != E; ++I) {
     VNInfo *VNI = *I;
     if (VNI->isUnused())
       continue;
-    LiveRange::iterator LRI = NewLR.FindSegmentContaining(VNI->def);
-    assert(LRI != NewLR.end() && "Missing segment for PHI");
+    LiveRange::iterator LRI = LR.FindSegmentContaining(VNI->def);
+    assert(LRI != LR.end() && "Missing segment for PHI");
     if (LRI->end != VNI->def.getDeadSlot())
       continue;
     if (VNI->isPHIDef()) {
       // This is a dead PHI. Remove it.
       VNI->markUnused();
-      NewLR.removeSegment(LRI->start, LRI->end);
+      LR.removeSegment(LRI->start, LRI->end);
       DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n");
-      CanSeparate = true;
+      if (CanSeparate)
+        *CanSeparate = true;
     } else {
       // This is a dead def. Make sure the instruction knows.
       MachineInstr *MI = getInstructionFromIndex(VNI->def);
@@ -438,11 +452,6 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
       }
     }
   }
-
-  // Move the trimmed segments back.
-  li->segments.swap(NewLR.segments);
-  DEBUG(dbgs() << "Shrunk: " << *li << '\n');
-  return CanSeparate;
 }
 
 void LiveIntervals::extendToIndices(LiveRange &LR,
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 0ec5c33..08fef5f 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -332,7 +332,7 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
   }
 }
 
-void MachineBasicBlock::printAsOperand(raw_ostream &OS, bool /*PrintType*/) {
+void MachineBasicBlock::printAsOperand(raw_ostream &OS, bool /*PrintType*/) const {
   OS << "BB#" << getNumber();
 }
 
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index eb3d71f..6138aef 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -457,7 +457,7 @@ unsigned MachineFunction::addLiveIn(unsigned PReg,
 /// getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
 /// If isLinkerPrivate is specified, an 'l' label is returned, otherwise a
 /// normal 'L' label is returned.
-MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, 
+MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx,
                                         bool isLinkerPrivate) const {
   const DataLayout *DL = getTarget().getDataLayout();
   assert(JumpTableInfo && "No jump tables");
@@ -530,10 +530,9 @@ int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment,
 ///
 int MachineFrameInfo::CreateSpillStackObject(uint64_t Size,
                                              unsigned Alignment) {
-  Alignment =
-    clampStackAlignment(!getFrameLowering()->isStackRealignable() ||
-                          !RealignOption,
-                        Alignment, getFrameLowering()->getStackAlignment()); 
+  Alignment = clampStackAlignment(
+      !getFrameLowering()->isStackRealignable() || !RealignOption, Alignment,
+      getFrameLowering()->getStackAlignment());
   CreateStackObject(Size, Alignment, true);
   int Index = (int)Objects.size() - NumFixedObjects - 1;
   ensureMaxAlignment(Alignment);
@@ -548,10 +547,9 @@ int MachineFrameInfo::CreateSpillStackObject(uint64_t Size,
 int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment,
                                                 const AllocaInst *Alloca) {
   HasVarSizedObjects = true;
-  Alignment =
-    clampStackAlignment(!getFrameLowering()->isStackRealignable() ||
-                          !RealignOption,
-                        Alignment, getFrameLowering()->getStackAlignment()); 
+  Alignment = clampStackAlignment(
+      !getFrameLowering()->isStackRealignable() || !RealignOption, Alignment,
+      getFrameLowering()->getStackAlignment());
   Objects.push_back(StackObject(0, Alignment, 0, false, false, Alloca));
   ensureMaxAlignment(Alignment);
   return (int)Objects.size()-NumFixedObjects-1;
@@ -571,16 +569,30 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
   // object is 16-byte aligned.
   unsigned StackAlign = getFrameLowering()->getStackAlignment();
   unsigned Align = MinAlign(SPOffset, StackAlign);
-  Align =
-    clampStackAlignment(!getFrameLowering()->isStackRealignable() ||
-                          !RealignOption,
-                        Align, getFrameLowering()->getStackAlignment()); 
+  Align = clampStackAlignment(!getFrameLowering()->isStackRealignable() ||
+                                  !RealignOption,
+                              Align, getFrameLowering()->getStackAlignment());
   Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable,
                                               /*isSS*/   false,
                                               /*Alloca*/ nullptr));
   return -++NumFixedObjects;
 }
 
+/// CreateFixedSpillStackObject - Create a spill slot at a fixed location
+/// on the stack.  Returns an index with a negative value.
+int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size,
+                                                  int64_t SPOffset) {
+  unsigned StackAlign = getFrameLowering()->getStackAlignment();
+  unsigned Align = MinAlign(SPOffset, StackAlign);
+  Align = clampStackAlignment(!getFrameLowering()->isStackRealignable() ||
+                                  !RealignOption,
+                              Align, getFrameLowering()->getStackAlignment());
+  Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset,
+                                              /*Immutable*/ true,
+                                              /*isSS*/ true,
+                                              /*Alloca*/ nullptr));
+  return -++NumFixedObjects;
+}
 
 BitVector
 MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const {
@@ -849,11 +861,10 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
   if (isa<StructType>(A->getType()) || isa<ArrayType>(A->getType()) ||
       isa<StructType>(B->getType()) || isa<ArrayType>(B->getType()))
     return false;
-  
+
   // For now, only support constants with the same size.
   uint64_t StoreSize = TD->getTypeStoreSize(A->getType());
-  if (StoreSize != TD->getTypeStoreSize(B->getType()) || 
-      StoreSize > 128)
+  if (StoreSize != TD->getTypeStoreSize(B->getType()) || StoreSize > 128)
     return false;
 
   Type *IntTy = IntegerType::get(A->getContext(), StoreSize*8);
@@ -882,7 +893,7 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
 /// an existing one.  User must specify the log2 of the minimum required
 /// alignment for the object.
 ///
-unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C, 
+unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C,
                                                    unsigned Alignment) {
   assert(Alignment && "Alignment must be specified!");
   if (Alignment > PoolAlignment) PoolAlignment = Alignment;
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index 23847d6..44191f7 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -333,6 +333,12 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
   if (skipOptnoneFunction(*mf.getFunction()))
     return false;
 
+  const TargetSubtargetInfo &ST =
+    mf.getTarget().getSubtarget<TargetSubtargetInfo>();
+  if (!ST.enablePostMachineScheduler()) {
+    DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
+    return false;
+  }
   DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
 
   // Initialize the context of the pass.
@@ -472,14 +478,13 @@ void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
   // unimplemented
 }
 
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
 void ReadyQueue::dump() {
   dbgs() << Name << ": ";
   for (unsigned i = 0, e = Queue.size(); i < e; ++i)
     dbgs() << Queue[i]->NodeNum << " ";
   dbgs() << "\n";
 }
-#endif
 
 //===----------------------------------------------------------------------===//
 // ScheduleDAGMI - Basic machine instruction scheduling. This is
@@ -529,6 +534,11 @@ void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
     llvm_unreachable(nullptr);
   }
 #endif
+  // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
+  // CurrCycle may have advanced since then.
+  if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
+    SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
+
   --SuccSU->NumPredsLeft;
   if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
     SchedImpl->releaseTopNode(SuccSU);
@@ -563,6 +573,11 @@ void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
     llvm_unreachable(nullptr);
   }
 #endif
+  // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
+  // CurrCycle may have advanced since then.
+  if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
+    PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
+
   --PredSU->NumSuccsLeft;
   if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
     SchedImpl->releaseBottomNode(PredSU);
@@ -674,10 +689,13 @@ void ScheduleDAGMI::schedule() {
         CurrentBottom = MI;
       }
     }
-    updateQueues(SU, IsTopNode);
-
-    // Notify the scheduling strategy after updating the DAG.
+    // Notify the scheduling strategy before updating the DAG.
+    // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
+    // runs, it can then use the accurate ReadyCycle time to determine whether
+    // newly released nodes can move to the readyQ.
     SchedImpl->schedNode(SU, IsTopNode);
+
+    updateQueues(SU, IsTopNode);
   }
   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
 
@@ -1568,7 +1586,7 @@ void SchedBoundary::reset() {
   // Track the maximum number of stall cycles that could arise either from the
   // latency of a DAG edge or the number of cycles that a processor resource is
   // reserved (SchedBoundary::ReservedCycles).
-  MaxObservedLatency = 0;
+  MaxObservedStall = 0;
 #endif
   // Reserve a zero-count for invalid CritResIdx.
   ExecutedResCounts.resize(1);
@@ -1668,8 +1686,16 @@ bool SchedBoundary::checkHazard(SUnit *SU) {
     for (TargetSchedModel::ProcResIter
            PI = SchedModel->getWriteProcResBegin(SC),
            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
-      if (getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles) > CurrCycle)
+      unsigned NRCycle = getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles);
+      if (NRCycle > CurrCycle) {
+#ifndef NDEBUG
+        MaxObservedStall = std::max(PI->Cycles, MaxObservedStall);
+#endif
+        DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") "
+              << SchedModel->getResourceName(PI->ProcResourceIdx)
+              << "=" << NRCycle << "c\n");
         return true;
+      }
     }
   }
   return false;
@@ -1725,6 +1751,16 @@ getOtherResourceCount(unsigned &OtherCritIdx) {
 }
 
 void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
+  assert(SU->getInstr() && "Scheduled SUnit must have instr");
+
+#ifndef NDEBUG
+  // ReadyCycle was been bumped up to the CurrCycle when this node was
+  // scheduled, but CurrCycle may have been eagerly advanced immediately after
+  // scheduling, so may now be greater than ReadyCycle.
+  if (ReadyCycle > CurrCycle)
+    MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
+#endif
+
   if (ReadyCycle < MinReadyCycle)
     MinReadyCycle = ReadyCycle;
 
@@ -1744,18 +1780,6 @@ void SchedBoundary::releaseTopNode(SUnit *SU) {
   if (SU->isScheduled)
     return;
 
-  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
-       I != E; ++I) {
-    if (I->isWeak())
-      continue;
-    unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
-    unsigned Latency = I->getLatency();
-#ifndef NDEBUG
-    MaxObservedLatency = std::max(Latency, MaxObservedLatency);
-#endif
-    if (SU->TopReadyCycle < PredReadyCycle + Latency)
-      SU->TopReadyCycle = PredReadyCycle + Latency;
-  }
   releaseNode(SU, SU->TopReadyCycle);
 }
 
@@ -1763,20 +1787,6 @@ void SchedBoundary::releaseBottomNode(SUnit *SU) {
   if (SU->isScheduled)
     return;
 
-  assert(SU->getInstr() && "Scheduled SUnit must have instr");
-
-  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
-       I != E; ++I) {
-    if (I->isWeak())
-      continue;
-    unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
-    unsigned Latency = I->getLatency();
-#ifndef NDEBUG
-    MaxObservedLatency = std::max(Latency, MaxObservedLatency);
-#endif
-    if (SU->BotReadyCycle < SuccReadyCycle + Latency)
-      SU->BotReadyCycle = SuccReadyCycle + Latency;
-  }
   releaseNode(SU, SU->BotReadyCycle);
 }
 
@@ -1943,10 +1953,12 @@ void SchedBoundary::bumpNode(SUnit *SU) {
              PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
         unsigned PIdx = PI->ProcResourceIdx;
         if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
-          ReservedCycles[PIdx] = isTop() ? NextCycle + PI->Cycles : NextCycle;
-#ifndef NDEBUG
-          MaxObservedLatency = std::max(PI->Cycles, MaxObservedLatency);
-#endif
+          if (isTop()) {
+            ReservedCycles[PIdx] =
+              std::max(getNextResourceCycle(PIdx, 0), NextCycle + PI->Cycles);
+          }
+          else
+            ReservedCycles[PIdx] = NextCycle;
         }
       }
     }
@@ -2049,8 +2061,10 @@ SUnit *SchedBoundary::pickOnlyChoice() {
     }
   }
   for (unsigned i = 0; Available.empty(); ++i) {
-    assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedLatency) &&
-           "permanent hazard"); (void)i;
+//  FIXME: Re-enable assert once PR20057 is resolved.
+//    assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
+//           "permanent hazard");
+    (void)i;
     bumpCycle(CurrCycle + 1);
     releasePending();
   }
@@ -2090,111 +2104,6 @@ void SchedBoundary::dumpScheduledState() {
 // GenericScheduler - Generic implementation of MachineSchedStrategy.
 //===----------------------------------------------------------------------===//
 
-namespace {
-/// Base class for GenericScheduler. This class maintains information about
-/// scheduling candidates based on TargetSchedModel making it easy to implement
-/// heuristics for either preRA or postRA scheduling.
-class GenericSchedulerBase : public MachineSchedStrategy {
-public:
-  /// Represent the type of SchedCandidate found within a single queue.
-  /// pickNodeBidirectional depends on these listed by decreasing priority.
-  enum CandReason {
-    NoCand, PhysRegCopy, RegExcess, RegCritical, Stall, Cluster, Weak, RegMax,
-    ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce,
-    TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder};
-
-#ifndef NDEBUG
-  static const char *getReasonStr(GenericSchedulerBase::CandReason Reason);
-#endif
-
-  /// Policy for scheduling the next instruction in the candidate's zone.
-  struct CandPolicy {
-    bool ReduceLatency;
-    unsigned ReduceResIdx;
-    unsigned DemandResIdx;
-
-    CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {}
-  };
-
-  /// Status of an instruction's critical resource consumption.
-  struct SchedResourceDelta {
-    // Count critical resources in the scheduled region required by SU.
-    unsigned CritResources;
-
-    // Count critical resources from another region consumed by SU.
-    unsigned DemandedResources;
-
-    SchedResourceDelta(): CritResources(0), DemandedResources(0) {}
-
-    bool operator==(const SchedResourceDelta &RHS) const {
-      return CritResources == RHS.CritResources
-        && DemandedResources == RHS.DemandedResources;
-    }
-    bool operator!=(const SchedResourceDelta &RHS) const {
-      return !operator==(RHS);
-    }
-  };
-
-  /// Store the state used by GenericScheduler heuristics, required for the
-  /// lifetime of one invocation of pickNode().
-  struct SchedCandidate {
-    CandPolicy Policy;
-
-    // The best SUnit candidate.
-    SUnit *SU;
-
-    // The reason for this candidate.
-    CandReason Reason;
-
-    // Set of reasons that apply to multiple candidates.
-    uint32_t RepeatReasonSet;
-
-    // Register pressure values for the best candidate.
-    RegPressureDelta RPDelta;
-
-    // Critical resource consumption of the best candidate.
-    SchedResourceDelta ResDelta;
-
-    SchedCandidate(const CandPolicy &policy)
-      : Policy(policy), SU(nullptr), Reason(NoCand), RepeatReasonSet(0) {}
-
-    bool isValid() const { return SU; }
-
-    // Copy the status of another candidate without changing policy.
-    void setBest(SchedCandidate &Best) {
-      assert(Best.Reason != NoCand && "uninitialized Sched candidate");
-      SU = Best.SU;
-      Reason = Best.Reason;
-      RPDelta = Best.RPDelta;
-      ResDelta = Best.ResDelta;
-    }
-
-    bool isRepeat(CandReason R) { return RepeatReasonSet & (1 << R); }
-    void setRepeat(CandReason R) { RepeatReasonSet |= (1 << R); }
-
-    void initResourceDelta(const ScheduleDAGMI *DAG,
-                           const TargetSchedModel *SchedModel);
-  };
-
-protected:
-  const MachineSchedContext *Context;
-  const TargetSchedModel *SchedModel;
-  const TargetRegisterInfo *TRI;
-
-  SchedRemainder Rem;
-protected:
-  GenericSchedulerBase(const MachineSchedContext *C):
-    Context(C), SchedModel(nullptr), TRI(nullptr) {}
-
-  void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone,
-                 SchedBoundary *OtherZone);
-
-#ifndef NDEBUG
-  void traceCandidate(const SchedCandidate &Cand);
-#endif
-};
-} // namespace
-
 void GenericSchedulerBase::SchedCandidate::
 initResourceDelta(const ScheduleDAGMI *DAG,
                   const TargetSchedModel *SchedModel) {
@@ -2430,65 +2339,6 @@ static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand,
         << GenericSchedulerBase::getReasonStr(Cand.Reason) << '\n');
 }
 
-namespace {
-/// GenericScheduler shrinks the unscheduled zone using heuristics to balance
-/// the schedule.
-class GenericScheduler : public GenericSchedulerBase {
-  ScheduleDAGMILive *DAG;
-
-  // State of the top and bottom scheduled instruction boundaries.
-  SchedBoundary Top;
-  SchedBoundary Bot;
-
-  MachineSchedPolicy RegionPolicy;
-public:
-  GenericScheduler(const MachineSchedContext *C):
-    GenericSchedulerBase(C), DAG(nullptr), Top(SchedBoundary::TopQID, "TopQ"),
-    Bot(SchedBoundary::BotQID, "BotQ") {}
-
-  void initPolicy(MachineBasicBlock::iterator Begin,
-                  MachineBasicBlock::iterator End,
-                  unsigned NumRegionInstrs) override;
-
-  bool shouldTrackPressure() const override {
-    return RegionPolicy.ShouldTrackPressure;
-  }
-
-  void initialize(ScheduleDAGMI *dag) override;
-
-  SUnit *pickNode(bool &IsTopNode) override;
-
-  void schedNode(SUnit *SU, bool IsTopNode) override;
-
-  void releaseTopNode(SUnit *SU) override {
-    Top.releaseTopNode(SU);
-  }
-
-  void releaseBottomNode(SUnit *SU) override {
-    Bot.releaseBottomNode(SU);
-  }
-
-  void registerRoots() override;
-
-protected:
-  void checkAcyclicLatency();
-
-  void tryCandidate(SchedCandidate &Cand,
-                    SchedCandidate &TryCand,
-                    SchedBoundary &Zone,
-                    const RegPressureTracker &RPTracker,
-                    RegPressureTracker &TempTracker);
-
-  SUnit *pickNodeBidirectional(bool &IsTopNode);
-
-  void pickNodeFromQueue(SchedBoundary &Zone,
-                         const RegPressureTracker &RPTracker,
-                         SchedCandidate &Candidate);
-
-  void reschedulePhysRegCopies(SUnit *SU, bool isTop);
-};
-} // namespace
-
 void GenericScheduler::initialize(ScheduleDAGMI *dag) {
   assert(dag->hasVRegLiveness() &&
          "(PreRA)GenericScheduler needs vreg liveness");
@@ -3023,75 +2873,25 @@ GenericSchedRegistry("converge", "Standard converging scheduler.",
 // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
 //===----------------------------------------------------------------------===//
 
-namespace {
-/// PostGenericScheduler - Interface to the scheduling algorithm used by
-/// ScheduleDAGMI.
-///
-/// Callbacks from ScheduleDAGMI:
-///   initPolicy -> initialize(DAG) -> registerRoots -> pickNode ...
-class PostGenericScheduler : public GenericSchedulerBase {
-  ScheduleDAGMI *DAG;
-  SchedBoundary Top;
-  SmallVector<SUnit*, 8> BotRoots;
-public:
-  PostGenericScheduler(const MachineSchedContext *C):
-    GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ") {}
-
-  virtual ~PostGenericScheduler() {}
-
-  void initPolicy(MachineBasicBlock::iterator Begin,
-                  MachineBasicBlock::iterator End,
-                  unsigned NumRegionInstrs) override {
-    /* no configurable policy */
-  };
-
-  /// PostRA scheduling does not track pressure.
-  bool shouldTrackPressure() const override { return false; }
-
-  void initialize(ScheduleDAGMI *Dag) override {
-    DAG = Dag;
-    SchedModel = DAG->getSchedModel();
-    TRI = DAG->TRI;
-
-    Rem.init(DAG, SchedModel);
-    Top.init(DAG, SchedModel, &Rem);
-    BotRoots.clear();
-
-    // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
-    // or are disabled, then these HazardRecs will be disabled.
-    const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
-    const TargetMachine &TM = DAG->MF.getTarget();
-    if (!Top.HazardRec) {
-      Top.HazardRec =
-        TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
-    }
-  }
-
-  void registerRoots() override;
-
-  SUnit *pickNode(bool &IsTopNode) override;
-
-  void scheduleTree(unsigned SubtreeID) override {
-    llvm_unreachable("PostRA scheduler does not support subtree analysis.");
-  }
-
-  void schedNode(SUnit *SU, bool IsTopNode) override;
+void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {
+  DAG = Dag;
+  SchedModel = DAG->getSchedModel();
+  TRI = DAG->TRI;
 
-  void releaseTopNode(SUnit *SU) override {
-    Top.releaseTopNode(SU);
-  }
+  Rem.init(DAG, SchedModel);
+  Top.init(DAG, SchedModel, &Rem);
+  BotRoots.clear();
 
-  // Only called for roots.
-  void releaseBottomNode(SUnit *SU) override {
-    BotRoots.push_back(SU);
+  // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
+  // or are disabled, then these HazardRecs will be disabled.
+  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
+  const TargetMachine &TM = DAG->MF.getTarget();
+  if (!Top.HazardRec) {
+    Top.HazardRec =
+      TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
   }
+}
 
-protected:
-  void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand);
-
-  void pickNodeFromQueue(SchedCandidate &Cand);
-};
-} // namespace
 
 void PostGenericScheduler::registerRoots() {
   Rem.CriticalPath = DAG->ExitSU.getDepth();
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index b3f7198..249b2d0 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -30,11 +30,6 @@
 
 using namespace llvm;
 
-namespace llvm {
-extern cl::opt<bool> EnableStackMapLiveness;
-extern cl::opt<bool> EnablePatchPointLiveness;
-}
-
 static cl::opt<bool> DisablePostRA("disable-post-ra", cl::Hidden,
     cl::desc("Disable Post Regalloc"));
 static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden,
@@ -92,9 +87,9 @@ PrintMachineInstrs("print-machineinstrs", cl::ValueOptional,
 
 // Temporary option to allow experimenting with MachineScheduler as a post-RA
 // scheduler. Targets can "properly" enable this with
-// substitutePass(&PostRASchedulerID, &MachineSchedulerID); Ideally it wouldn't
-// be part of the standard pass pipeline, and the target would just add a PostRA
-// scheduling pass wherever it wants.
+// substitutePass(&PostRASchedulerID, &PostMachineSchedulerID); Ideally it
+// wouldn't be part of the standard pass pipeline, and the target would just add
+// a PostRA scheduling pass wherever it wants.
 static cl::opt<bool> MISchedPostRA("misched-postra", cl::Hidden,
   cl::desc("Run MachineScheduler post regalloc (independent of preRA sched)"));
 
@@ -421,7 +416,7 @@ void TargetPassConfig::addPassesToHandleExceptions() {
     // FALLTHROUGH
   case ExceptionHandling::DwarfCFI:
   case ExceptionHandling::ARM:
-  case ExceptionHandling::Win64:
+  case ExceptionHandling::WinEH:
     addPass(createDwarfEHPass(TM));
     break;
   case ExceptionHandling::None:
@@ -566,8 +561,7 @@ void TargetPassConfig::addMachinePasses() {
   if (addPreEmitPass())
     printAndVerify("After PreEmit passes");
 
-  if (EnableStackMapLiveness || EnablePatchPointLiveness)
-    addPass(&StackMapLivenessID);
+  addPass(&StackMapLivenessID);
 }
 
 /// Add passes that optimize machine instructions in SSA form.
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index eeee93a..716cb1f 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -91,6 +91,10 @@ static cl::opt<bool>
 DisablePeephole("disable-peephole", cl::Hidden, cl::init(false),
                 cl::desc("Disable the peephole optimizer"));
 
+static cl::opt<bool>
+DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(true),
+                  cl::desc("Disable advanced copy optimization"));
+
 STATISTIC(NumReuse,      "Number of extension results reused");
 STATISTIC(NumCmps,       "Number of compares eliminated");
 STATISTIC(NumImmFold,    "Number of move immediate folded");
@@ -137,6 +141,105 @@ namespace {
     bool isLoadFoldable(MachineInstr *MI,
                         SmallSet<unsigned, 16> &FoldAsLoadDefCandidates);
   };
+
+  /// \brief Helper class to track the possible sources of a value defined by
+  /// a (chain of) copy related instructions.
+  /// Given a definition (instruction and definition index), this class
+  /// follows the use-def chain to find successive suitable sources.
+  /// The given source can be used to rewrite the definition into
+  /// def = COPY src.
+  ///
+  /// For instance, let us consider the following snippet:
+  /// v0 =
+  /// v2 = INSERT_SUBREG v1, v0, sub0
+  /// def = COPY v2.sub0
+  ///
+  /// Using a ValueTracker for def = COPY v2.sub0 will give the following
+  /// suitable sources:
+  /// v2.sub0 and v0.
+  /// Then, def can be rewritten into def = COPY v0.
+  class ValueTracker {
+  private:
+    /// The current point into the use-def chain.
+    const MachineInstr *Def;
+    /// The index of the definition in Def.
+    unsigned DefIdx;
+    /// The sub register index of the definition.
+    unsigned DefSubReg;
+    /// The register where the value can be found.
+    unsigned Reg;
+    /// Specifiy whether or not the value tracking looks through
+    /// complex instructions. When this is false, the value tracker
+    /// bails on everything that is not a copy or a bitcast.
+    ///
+    /// Note: This could have been implemented as a specialized version of
+    /// the ValueTracker class but that would have complicated the code of
+    /// the users of this class.
+    bool UseAdvancedTracking;
+    /// Optional MachineRegisterInfo used to perform some complex
+    /// tracking.
+    const MachineRegisterInfo *MRI;
+
+    /// \brief Dispatcher to the right underlying implementation of
+    /// getNextSource.
+    bool getNextSourceImpl(unsigned &SrcIdx, unsigned &SrcSubReg);
+    /// \brief Specialized version of getNextSource for Copy instructions.
+    bool getNextSourceFromCopy(unsigned &SrcIdx, unsigned &SrcSubReg);
+    /// \brief Specialized version of getNextSource for Bitcast instructions.
+    bool getNextSourceFromBitcast(unsigned &SrcIdx, unsigned &SrcSubReg);
+    /// \brief Specialized version of getNextSource for RegSequence
+    /// instructions.
+    bool getNextSourceFromRegSequence(unsigned &SrcIdx, unsigned &SrcSubReg);
+    /// \brief Specialized version of getNextSource for InsertSubreg
+    /// instructions.
+    bool getNextSourceFromInsertSubreg(unsigned &SrcIdx, unsigned &SrcSubReg);
+    /// \brief Specialized version of getNextSource for ExtractSubreg
+    /// instructions.
+    bool getNextSourceFromExtractSubreg(unsigned &SrcIdx, unsigned &SrcSubReg);
+    /// \brief Specialized version of getNextSource for SubregToReg
+    /// instructions.
+    bool getNextSourceFromSubregToReg(unsigned &SrcIdx, unsigned &SrcSubReg);
+
+  public:
+    /// \brief Create a ValueTracker instance for the value defines by \p MI
+    /// at the operand index \p DefIdx.
+    /// \p DefSubReg represents the sub register index the value tracker will
+    /// track. It does not need to match the sub register index used in \p MI.
+    /// \p UseAdvancedTracking specifies whether or not the value tracker looks
+    /// through complex instructions. By default (false), it handles only copy
+    /// and bitcast instructions.
+    /// \p MRI useful to perform some complex checks.
+    ValueTracker(const MachineInstr &MI, unsigned DefIdx, unsigned DefSubReg,
+                 bool UseAdvancedTracking = false,
+                 const MachineRegisterInfo *MRI = nullptr)
+        : Def(&MI), DefIdx(DefIdx), DefSubReg(DefSubReg),
+          UseAdvancedTracking(UseAdvancedTracking), MRI(MRI) {
+      assert(Def->getOperand(DefIdx).isDef() &&
+             Def->getOperand(DefIdx).isReg() &&
+             "Definition does not match machine instruction");
+      // Initially the value is in the defined register.
+      Reg = Def->getOperand(DefIdx).getReg();
+    }
+
+    /// \brief Following the use-def chain, get the next available source
+    /// for the tracked value.
+    /// When the returned value is not nullptr, getReg() gives the register
+    /// that contain the tracked value.
+    /// \note The sub register index returned in \p SrcSubReg must be used
+    /// on that getReg() to access the actual value.
+    /// \return Unless the returned value is nullptr (i.e., no source found),
+    /// \p SrcIdx gives the index of the next source in the returned
+    /// instruction and \p SrcSubReg the index to be used on that source to
+    /// get the tracked value. When nullptr is returned, no alternative source
+    /// has been found.
+    const MachineInstr *getNextSource(unsigned &SrcIdx, unsigned &SrcSubReg);
+
+    /// \brief Get the last register where the initial value can be found.
+    /// Initially this is the register of the definition.
+    /// Then, after each successful call to getNextSource, this is the
+    /// register of the last source.
+    unsigned getReg() const { return Reg; }
+  };
 }
 
 char PeepholeOptimizer::ID = 0;
@@ -443,31 +546,32 @@ bool PeepholeOptimizer::optimizeCopyOrBitcast(MachineInstr *MI) {
   unsigned Src;
   unsigned SrcSubReg;
   bool ShouldRewrite = false;
-  MachineInstr *Copy = MI;
   const TargetRegisterInfo &TRI = *TM->getRegisterInfo();
 
-  // Follow the chain of copies until we reach the top or find a
-  // more suitable source.
+  // Follow the chain of copies until we reach the top of the use-def chain
+  // or find a more suitable source.
+  ValueTracker ValTracker(*MI, DefIdx, DefSubReg, !DisableAdvCopyOpt, MRI);
   do {
-    unsigned CopyDefIdx, CopySrcIdx;
-    if (!getCopyOrBitcastDefUseIdx(*Copy, CopyDefIdx, CopySrcIdx))
+    unsigned CopySrcIdx, CopySrcSubReg;
+    if (!ValTracker.getNextSource(CopySrcIdx, CopySrcSubReg))
       break;
-    const MachineOperand &MO = Copy->getOperand(CopySrcIdx);
-    assert(MO.isReg() && "Copies must be between registers.");
-    Src = MO.getReg();
-
+    Src = ValTracker.getReg();
+    SrcSubReg = CopySrcSubReg;
+
+    // Do not extend the live-ranges of physical registers as they add
+    // constraints to the register allocator.
+    // Moreover, if we want to extend the live-range of a physical register,
+    // unlike SSA virtual register, we will have to check that they are not
+    // redefine before the related use.
     if (TargetRegisterInfo::isPhysicalRegister(Src))
       break;
 
     const TargetRegisterClass *SrcRC = MRI->getRegClass(Src);
-    SrcSubReg = MO.getSubReg();
 
     // If this source does not incur a cross register bank copy, use it.
     ShouldRewrite = shareSameRegisterFile(TRI, DefRC, DefSubReg, SrcRC,
                                           SrcSubReg);
-    // Follow the chain of copies: get the definition of Src.
-    Copy = MRI->getVRegDef(Src);
-  } while (!ShouldRewrite && Copy && (Copy->isCopy() || Copy->isBitcast()));
+  } while (!ShouldRewrite);
 
   // If we did not find a more suitable source, there is nothing to optimize.
   if (!ShouldRewrite || Src == MI->getOperand(SrcIdx).getReg())
@@ -483,6 +587,9 @@ bool PeepholeOptimizer::optimizeCopyOrBitcast(MachineInstr *MI) {
 
   MRI->replaceRegWith(Def, NewVR);
   MRI->clearKillFlags(NewVR);
+  // We extended the lifetime of Src.
+  // Clear the kill flags to account for that.
+  MRI->clearKillFlags(Src);
   MI->eraseFromParent();
   ++NumCopiesBitcasts;
   return true;
@@ -673,3 +780,251 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
 
   return Changed;
 }
+
+bool ValueTracker::getNextSourceFromCopy(unsigned &SrcIdx,
+                                         unsigned &SrcSubReg) {
+  assert(Def->isCopy() && "Invalid definition");
+  // Copy instruction are supposed to be: Def = Src.
+  // If someone breaks this assumption, bad things will happen everywhere.
+  assert(Def->getDesc().getNumOperands() == 2 && "Invalid number of operands");
+
+  if (Def->getOperand(DefIdx).getSubReg() != DefSubReg)
+    // If we look for a different subreg, it means we want a subreg of src.
+    // Bails as we do not support composing subreg yet.
+    return false;
+  // Otherwise, we want the whole source.
+  SrcIdx = 1;
+  SrcSubReg = Def->getOperand(SrcIdx).getSubReg();
+  return true;
+}
+
+bool ValueTracker::getNextSourceFromBitcast(unsigned &SrcIdx,
+                                            unsigned &SrcSubReg) {
+  assert(Def->isBitcast() && "Invalid definition");
+
+  // Bail if there are effects that a plain copy will not expose.
+  if (Def->hasUnmodeledSideEffects())
+    return false;
+
+  // Bitcasts with more than one def are not supported.
+  if (Def->getDesc().getNumDefs() != 1)
+    return false;
+  if (Def->getOperand(DefIdx).getSubReg() != DefSubReg)
+    // If we look for a different subreg, it means we want a subreg of the src.
+    // Bails as we do not support composing subreg yet.
+    return false;
+
+  SrcIdx = Def->getDesc().getNumOperands();
+  for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx;
+       ++OpIdx) {
+    const MachineOperand &MO = Def->getOperand(OpIdx);
+    if (!MO.isReg() || !MO.getReg())
+      continue;
+    assert(!MO.isDef() && "We should have skipped all the definitions by now");
+    if (SrcIdx != EndOpIdx)
+      // Multiple sources?
+      return false;
+    SrcIdx = OpIdx;
+  }
+  SrcSubReg = Def->getOperand(SrcIdx).getSubReg();
+  return true;
+}
+
+bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcIdx,
+                                                unsigned &SrcSubReg) {
+  assert(Def->isRegSequence() && "Invalid definition");
+
+  if (Def->getOperand(DefIdx).getSubReg())
+    // If we are composing subreg, bails out.
+    // The case we are checking is Def.<subreg> = REG_SEQUENCE.
+    // This should almost never happen as the SSA property is tracked at
+    // the register level (as opposed to the subreg level).
+    // I.e.,
+    // Def.sub0 =
+    // Def.sub1 =
+    // is a valid SSA representation for Def.sub0 and Def.sub1, but not for
+    // Def. Thus, it must not be generated.
+    // However, some code could theoretically generates a single
+    // Def.sub0 (i.e, not defining the other subregs) and we would
+    // have this case.
+    // If we can ascertain (or force) that this never happens, we could
+    // turn that into an assertion.
+    return false;
+
+  // We are looking at:
+  // Def = REG_SEQUENCE v0, sub0, v1, sub1, ...
+  // Check if one of the operand defines the subreg we are interested in.
+  for (unsigned OpIdx = DefIdx + 1, EndOpIdx = Def->getNumOperands();
+       OpIdx != EndOpIdx; OpIdx += 2) {
+    const MachineOperand &MOSubIdx = Def->getOperand(OpIdx + 1);
+    assert(MOSubIdx.isImm() &&
+           "One of the subindex of the reg_sequence is not an immediate");
+    if (MOSubIdx.getImm() == DefSubReg) {
+      assert(Def->getOperand(OpIdx).isReg() &&
+             "One of the source of the reg_sequence is not a register");
+      SrcIdx = OpIdx;
+      SrcSubReg = Def->getOperand(SrcIdx).getSubReg();
+      return true;
+    }
+  }
+
+  // If the subreg we are tracking is super-defined by another subreg,
+  // we could follow this value. However, this would require to compose
+  // the subreg and we do not do that for now.
+  return false;
+}
+
+bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcIdx,
+                                                 unsigned &SrcSubReg) {
+  assert(Def->isInsertSubreg() && "Invalid definition");
+  if (Def->getOperand(DefIdx).getSubReg())
+    // If we are composing subreg, bails out.
+    // Same remark as getNextSourceFromRegSequence.
+    // I.e., this may be turned into an assert.
+    return false;
+
+  // We are looking at:
+  // Def = INSERT_SUBREG v0, v1, sub1
+  // There are two cases:
+  // 1. DefSubReg == sub1, get v1.
+  // 2. DefSubReg != sub1, the value may be available through v0.
+
+  // #1 Check if the inserted register matches the require sub index.
+  unsigned InsertedSubReg = Def->getOperand(3).getImm();
+  if (InsertedSubReg == DefSubReg) {
+    SrcIdx = 2;
+    SrcSubReg = Def->getOperand(SrcIdx).getSubReg();
+    return true;
+  }
+  // #2 Otherwise, if the sub register we are looking for is not partial
+  // defined by the inserted element, we can look through the main
+  // register (v0).
+  // To check the overlapping we need a MRI and a TRI.
+  if (!MRI)
+    return false;
+
+  const MachineOperand &MODef = Def->getOperand(DefIdx);
+  const MachineOperand &MOBase = Def->getOperand(1);
+  // If the result register (Def) and the base register (v0) do not
+  // have the same register class or if we have to compose
+  // subregisters, bails out.
+  if (MRI->getRegClass(MODef.getReg()) != MRI->getRegClass(MOBase.getReg()) ||
+      MOBase.getSubReg())
+    return false;
+
+  // Get the TRI and check if inserted sub register overlaps with the
+  // sub register we are tracking.
+  const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+  if (!TRI ||
+      (TRI->getSubRegIndexLaneMask(DefSubReg) &
+       TRI->getSubRegIndexLaneMask(InsertedSubReg)) != 0)
+    return false;
+  // At this point, the value is available in v0 via the same subreg
+  // we used for Def.
+  SrcIdx = 1;
+  SrcSubReg = DefSubReg;
+  return true;
+}
+
+bool ValueTracker::getNextSourceFromExtractSubreg(unsigned &SrcIdx,
+                                                  unsigned &SrcSubReg) {
+  assert(Def->isExtractSubreg() && "Invalid definition");
+  // We are looking at:
+  // Def = EXTRACT_SUBREG v0, sub0
+
+  // Bails if we have to compose sub registers.
+  // Indeed, if DefSubReg != 0, we would have to compose it with sub0.
+  if (DefSubReg)
+    return false;
+
+  // Bails if we have to compose sub registers.
+  // Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0.
+  if (Def->getOperand(1).getSubReg())
+    return false;
+  // Otherwise, the value is available in the v0.sub0.
+  SrcIdx = 1;
+  SrcSubReg = Def->getOperand(2).getImm();
+  return true;
+}
+
+bool ValueTracker::getNextSourceFromSubregToReg(unsigned &SrcIdx,
+                                                unsigned &SrcSubReg) {
+  assert(Def->isSubregToReg() && "Invalid definition");
+  // We are looking at:
+  // Def = SUBREG_TO_REG Imm, v0, sub0
+
+  // Bails if we have to compose sub registers.
+  // If DefSubReg != sub0, we would have to check that all the bits
+  // we track are included in sub0 and if yes, we would have to
+  // determine the right subreg in v0.
+  if (DefSubReg != Def->getOperand(3).getImm())
+    return false;
+  // Bails if we have to compose sub registers.
+  // Likewise, if v0.subreg != 0, we would have to compose it with sub0.
+  if (Def->getOperand(2).getSubReg())
+    return false;
+
+  SrcIdx = 2;
+  SrcSubReg = Def->getOperand(3).getImm();
+  return true;
+}
+
+bool ValueTracker::getNextSourceImpl(unsigned &SrcIdx, unsigned &SrcSubReg) {
+  assert(Def && "This method needs a valid definition");
+
+  assert(
+      (DefIdx < Def->getDesc().getNumDefs() || Def->getDesc().isVariadic()) &&
+      Def->getOperand(DefIdx).isDef() && "Invalid DefIdx");
+  if (Def->isCopy())
+    return getNextSourceFromCopy(SrcIdx, SrcSubReg);
+  if (Def->isBitcast())
+    return getNextSourceFromBitcast(SrcIdx, SrcSubReg);
+  // All the remaining cases involve "complex" instructions.
+  // Bails if we did not ask for the advanced tracking.
+  if (!UseAdvancedTracking)
+    return false;
+  if (Def->isRegSequence())
+    return getNextSourceFromRegSequence(SrcIdx, SrcSubReg);
+  if (Def->isInsertSubreg())
+    return getNextSourceFromInsertSubreg(SrcIdx, SrcSubReg);
+  if (Def->isExtractSubreg())
+    return getNextSourceFromExtractSubreg(SrcIdx, SrcSubReg);
+  if (Def->isSubregToReg())
+    return getNextSourceFromSubregToReg(SrcIdx, SrcSubReg);
+  return false;
+}
+
+const MachineInstr *ValueTracker::getNextSource(unsigned &SrcIdx,
+                                                unsigned &SrcSubReg) {
+  // If we reach a point where we cannot move up in the use-def chain,
+  // there is nothing we can get.
+  if (!Def)
+    return nullptr;
+
+  const MachineInstr *PrevDef = nullptr;
+  // Try to find the next source.
+  if (getNextSourceImpl(SrcIdx, SrcSubReg)) {
+    // Update definition, definition index, and subregister for the
+    // next call of getNextSource.
+    const MachineOperand &MO = Def->getOperand(SrcIdx);
+    assert(MO.isReg() && !MO.isDef() && "Source is invalid");
+    // Update the current register.
+    Reg = MO.getReg();
+    // Update the return value before moving up in the use-def chain.
+    PrevDef = Def;
+    // If we can still move up in the use-def chain, move to the next
+    // defintion.
+    if (!TargetRegisterInfo::isPhysicalRegister(Reg)) {
+      Def = MRI->getVRegDef(Reg);
+      DefIdx = MRI->def_begin(Reg).getOperandNo();
+      DefSubReg = SrcSubReg;
+      return PrevDef;
+    }
+  }
+  // If we end up here, this means we will not be able to find another source
+  // for the next iteration.
+  // Make sure any new call to getNextSource bails out early by cutting the
+  // use-def chain.
+  Def = nullptr;
+  return PrevDef;
+}
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index c74a42f..b98d210 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -160,7 +160,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
   replaceFrameIndices(Fn);
 
   // If register scavenging is needed, as we've enabled doing it as a
-  // post-pass, scavenge the virtual registers that frame index elimiation
+  // post-pass, scavenge the virtual registers that frame index elimination
   // inserted.
   if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging)
     scavengeFrameVirtualRegs(Fn);
@@ -268,51 +268,56 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &F) {
     }
   }
 
-  if (CSI.empty())
-    return;   // Early exit if no callee saved registers are modified!
+  if (!TFI->assignCalleeSavedSpillSlots(F, RegInfo, CSI)) {
+    // If target doesn't implement this, use generic code.
 
-  unsigned NumFixedSpillSlots;
-  const TargetFrameLowering::SpillSlot *FixedSpillSlots =
-    TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots);
+    if (CSI.empty())
+      return; // Early exit if no callee saved registers are modified!
 
-  // Now that we know which registers need to be saved and restored, allocate
-  // stack slots for them.
-  for (std::vector<CalleeSavedInfo>::iterator
-         I = CSI.begin(), E = CSI.end(); I != E; ++I) {
-    unsigned Reg = I->getReg();
-    const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
+    unsigned NumFixedSpillSlots;
+    const TargetFrameLowering::SpillSlot *FixedSpillSlots =
+        TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots);
 
-    int FrameIdx;
-    if (RegInfo->hasReservedSpillSlot(F, Reg, FrameIdx)) {
-      I->setFrameIdx(FrameIdx);
-      continue;
-    }
+    // Now that we know which registers need to be saved and restored, allocate
+    // stack slots for them.
+    for (std::vector<CalleeSavedInfo>::iterator I = CSI.begin(), E = CSI.end();
+         I != E; ++I) {
+      unsigned Reg = I->getReg();
+      const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
 
-    // Check to see if this physreg must be spilled to a particular stack slot
-    // on this target.
-    const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots;
-    while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots &&
-           FixedSlot->Reg != Reg)
-      ++FixedSlot;
-
-    if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) {
-      // Nope, just spill it anywhere convenient.
-      unsigned Align = RC->getAlignment();
-      unsigned StackAlign = TFI->getStackAlignment();
-
-      // We may not be able to satisfy the desired alignment specification of
-      // the TargetRegisterClass if the stack alignment is smaller. Use the
-      // min.
-      Align = std::min(Align, StackAlign);
-      FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true);
-      if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
-      if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
-    } else {
-      // Spill it to the stack where we must.
-      FrameIdx = MFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset, true);
-    }
+      int FrameIdx;
+      if (RegInfo->hasReservedSpillSlot(F, Reg, FrameIdx)) {
+        I->setFrameIdx(FrameIdx);
+        continue;
+      }
+
+      // Check to see if this physreg must be spilled to a particular stack slot
+      // on this target.
+      const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots;
+      while (FixedSlot != FixedSpillSlots + NumFixedSpillSlots &&
+             FixedSlot->Reg != Reg)
+        ++FixedSlot;
+
+      if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) {
+        // Nope, just spill it anywhere convenient.
+        unsigned Align = RC->getAlignment();
+        unsigned StackAlign = TFI->getStackAlignment();
+
+        // We may not be able to satisfy the desired alignment specification of
+        // the TargetRegisterClass if the stack alignment is smaller. Use the
+        // min.
+        Align = std::min(Align, StackAlign);
+        FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true);
+        if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
+        if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
+      } else {
+        // Spill it to the stack where we must.
+        FrameIdx =
+            MFI->CreateFixedSpillStackObject(RC->getSize(), FixedSlot->Offset);
+      }
 
-    I->setFrameIdx(FrameIdx);
+      I->setFrameIdx(FrameIdx);
+    }
   }
 
   MFI->setCalleeSavedInfo(CSI);
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index aa7c178..901b993 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -44,6 +44,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include <queue>
 
 using namespace llvm;
@@ -79,6 +80,12 @@ ExhaustiveSearch("exhaustive-register-search", cl::NotHidden,
                  cl::desc("Exhaustive Search for registers bypassing the depth "
                           "and interference cutoffs of last chance recoloring"));
 
+static cl::opt<bool> EnableLocalReassignment(
+    "enable-local-reassign", cl::Hidden,
+    cl::desc("Local reassignment can yield better allocation decisions, but "
+             "may be compile time intensive"),
+    cl::init(false));
+
 // FIXME: Find a good default for this flag and remove the flag.
 static cl::opt<unsigned>
 CSRFirstTimeCost("regalloc-csr-first-time-cost",
@@ -285,6 +292,10 @@ class RAGreedy : public MachineFunctionPass,
   /// Callee-save register cost, calculated once per machine function.
   BlockFrequency CSRCost;
 
+  /// Run or not the local reassignment heuristic. This information is
+  /// obtained from the TargetSubtargetInfo.
+  bool EnableLocalReassign;
+
 public:
   RAGreedy();
 
@@ -731,7 +742,7 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
       // Evicting another local live range in this case could lead to suboptimal
       // coloring.
       if (!MaxCost.isMax() && IsLocal && LIS->intervalIsInOneMBB(*Intf) &&
-          !canReassign(*Intf, PhysReg)) {
+          (!EnableLocalReassign || !canReassign(*Intf, PhysReg))) {
         return false;
       }
     }
@@ -2308,9 +2319,14 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
                << "********** Function: " << mf.getName() << '\n');
 
   MF = &mf;
-  TRI = MF->getTarget().getRegisterInfo();
-  TII = MF->getTarget().getInstrInfo();
+  const TargetMachine &TM = MF->getTarget();
+  TRI = TM.getRegisterInfo();
+  TII = TM.getInstrInfo();
   RCI.runOnMachineFunction(mf);
+
+  EnableLocalReassign = EnableLocalReassignment ||
+    TM.getSubtargetImpl()->enableRALocalReassignment(TM.getOptLevel());
+
   if (VerifyEnabled)
     MF->verify(this, "Before greedy register allocator");
 
diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp
index b2909e0..617e459 100644
--- a/lib/CodeGen/RegisterPressure.cpp
+++ b/lib/CodeGen/RegisterPressure.cpp
@@ -41,7 +41,7 @@ static void decreaseSetPressure(std::vector<unsigned> &CurrSetPressure,
   }
 }
 
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
 void llvm::dumpRegSetPressure(ArrayRef<unsigned> SetPressure,
                               const TargetRegisterInfo *TRI) {
   bool Empty = true;
@@ -55,6 +55,7 @@ void llvm::dumpRegSetPressure(ArrayRef<unsigned> SetPressure,
     dbgs() << "\n";
 }
 
+LLVM_DUMP_METHOD
 void RegisterPressure::dump(const TargetRegisterInfo *TRI) const {
   dbgs() << "Max Pressure: ";
   dumpRegSetPressure(MaxSetPressure, TRI);
@@ -68,6 +69,7 @@ void RegisterPressure::dump(const TargetRegisterInfo *TRI) const {
   dbgs() << '\n';
 }
 
+LLVM_DUMP_METHOD
 void RegPressureTracker::dump() const {
   if (!isTopClosed() || !isBottomClosed()) {
     dbgs() << "Curr Pressure: ";
@@ -75,7 +77,6 @@ void RegPressureTracker::dump() const {
   }
   P.dump(TRI);
 }
-#endif
 
 /// Increase the current pressure as impacted by these registers and bump
 /// the high water mark if needed.
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 92a9a30..0f8b21c 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -1508,7 +1508,7 @@ void SchedDFSResult::scheduleTree(unsigned SubtreeID) {
   }
 }
 
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
 void ILPValue::print(raw_ostream &OS) const {
   OS << InstrCount << " / " << Length << " = ";
   if (!Length)
@@ -1517,16 +1517,17 @@ void ILPValue::print(raw_ostream &OS) const {
     OS << format("%g", ((double)InstrCount / Length));
 }
 
+LLVM_DUMP_METHOD
 void ILPValue::dump() const {
   dbgs() << *this << '\n';
 }
 
 namespace llvm {
 
+LLVM_DUMP_METHOD
 raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) {
   Val.print(OS);
   return OS;
 }
 
 } // namespace llvm
-#endif // !NDEBUG || LLVM_ENABLE_DUMP
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2d2fd53..7c42e4d 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -167,9 +167,18 @@ namespace {
 
     bool CombineToPreIndexedLoadStore(SDNode *N);
     bool CombineToPostIndexedLoadStore(SDNode *N);
-    SDValue SplitIndexingFromLoad(LoadSDNode *LD);
     bool SliceUpLoad(SDNode *N);
 
+    /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
+    ///   load.
+    ///
+    /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
+    /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
+    /// \param EltNo index of the vector element to load.
+    /// \param OriginalLoad load that EVE came from to be replaced.
+    /// \returns EVE on success SDValue() on failure.
+    SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
+        SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
@@ -646,10 +655,14 @@ static ConstantSDNode *isConstOrConstSplat(SDValue N) {
     return CN;
 
   if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
-    ConstantSDNode *CN = BV->getConstantSplatValue();
+    BitVector UndefElements;
+    ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements);
 
     // BuildVectors can truncate their operands. Ignore that case here.
-    if (CN && CN->getValueType(0) == N.getValueType().getScalarType())
+    // FIXME: We blindly ignore splats which include undef which is overly
+    // pessimistic.
+    if (CN && UndefElements.none() &&
+        CN->getValueType(0) == N.getValueType().getScalarType())
       return CN;
   }
 
@@ -762,14 +775,10 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
 
     // If the operands of this node are only used by the node, they will now
     // be dead.  Make sure to visit them first to delete dead nodes early.
-    for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i) {
-      SDNode *Op = TLO.Old.getNode()->getOperand(i).getNode();
-      // For an operand generating multiple values, one of the values may
-      // become dead allowing further simplification (e.g. split index
-      // arithmetic from an indexed load).
-      if (Op->hasOneUse() || Op->getNumValues() > 1)
-        AddToWorkList(Op);
-    }
+    for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i)
+      if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse())
+        AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode());
+
     DAG.DeleteNode(TLO.Old.getNode());
   }
 }
@@ -1320,9 +1329,16 @@ SDValue DAGCombiner::combine(SDNode *N) {
 
     // Constant operands are canonicalized to RHS.
     if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
-      SDValue Ops[] = { N1, N0 };
-      SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(),
-                                            Ops);
+      SDValue Ops[] = {N1, N0};
+      SDNode *CSENode;
+      if (const BinaryWithFlagsSDNode *BinNode =
+              dyn_cast<BinaryWithFlagsSDNode>(N)) {
+        CSENode = DAG.getNodeIfExists(
+            N->getOpcode(), N->getVTList(), Ops, BinNode->hasNoUnsignedWrap(),
+            BinNode->hasNoSignedWrap(), BinNode->isExact());
+      } else {
+        CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops);
+      }
       if (CSENode)
         return SDValue(CSENode, 0);
     }
@@ -3942,14 +3958,14 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
     // If setcc produces all-one true value then:
     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
     if (N1CV && N1CV->isConstant()) {
-      if (N0.getOpcode() == ISD::AND &&
-          TLI.getBooleanContents(true) ==
-          TargetLowering::ZeroOrNegativeOneBooleanContent) {
+      if (N0.getOpcode() == ISD::AND) {
         SDValue N00 = N0->getOperand(0);
         SDValue N01 = N0->getOperand(1);
         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
 
-        if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC) {
+        if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
+            TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
+                TargetLowering::ZeroOrNegativeOneBooleanContent) {
           SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV);
           if (C.getNode())
             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
@@ -4508,11 +4524,20 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
   if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1)
     return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
   // fold (select C, 0, 1) -> (xor C, 1)
+  // We can't do this reliably if integer based booleans have different contents
+  // to floating point based booleans. This is because we can't tell whether we
+  // have an integer-based boolean or a floating-point-based boolean unless we
+  // can find the SETCC that produced it and inspect its operands. This is
+  // fairly easy if C is the SETCC node, but it can potentially be
+  // undiscoverable (or not reasonably discoverable). For example, it could be
+  // in another basic block or it could require searching a complicated
+  // expression.
   if (VT.isInteger() &&
-      (VT0 == MVT::i1 ||
-       (VT0.isInteger() &&
-        TLI.getBooleanContents(false) ==
-        TargetLowering::ZeroOrOneBooleanContent)) &&
+      (VT0 == MVT::i1 || (VT0.isInteger() &&
+                          TLI.getBooleanContents(false, false) ==
+                              TLI.getBooleanContents(false, true) &&
+                          TLI.getBooleanContents(false, false) ==
+                              TargetLowering::ZeroOrOneBooleanContent)) &&
       N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) {
     SDValue XORNode;
     if (VT == VT0)
@@ -4555,12 +4580,9 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
 
   // fold selects based on a setcc into other things, such as min/max/abs
   if (N0.getOpcode() == ISD::SETCC) {
-    // FIXME:
-    // Check against MVT::Other for SELECT_CC, which is a workaround for targets
-    // having to say they don't support SELECT_CC on every type the DAG knows
-    // about, since there is no way to mark an opcode illegal at all value types
-    if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other) &&
-        TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))
+    if ((!LegalOperations &&
+         TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
+	TLI.isOperationLegal(ISD::SELECT_CC, VT))
       return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT,
                          N0.getOperand(0), N0.getOperand(1),
                          N1, N2, N0.getOperand(2));
@@ -4587,6 +4609,56 @@ std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
   return std::make_pair(Lo, Hi);
 }
 
+// This function assumes all the vselect's arguments are CONCAT_VECTOR
+// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
+static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
+  SDLoc dl(N);
+  SDValue Cond = N->getOperand(0);
+  SDValue LHS = N->getOperand(1);
+  SDValue RHS = N->getOperand(2);
+  MVT VT = N->getSimpleValueType(0);
+  int NumElems = VT.getVectorNumElements();
+  assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
+         RHS.getOpcode() == ISD::CONCAT_VECTORS &&
+         Cond.getOpcode() == ISD::BUILD_VECTOR);
+
+  // We're sure we have an even number of elements due to the
+  // concat_vectors we have as arguments to vselect.
+  // Skip BV elements until we find one that's not an UNDEF
+  // After we find an UNDEF element, keep looping until we get to half the
+  // length of the BV and see if all the non-undef nodes are the same.
+  ConstantSDNode *BottomHalf = nullptr;
+  for (int i = 0; i < NumElems / 2; ++i) {
+    if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF)
+      continue;
+
+    if (BottomHalf == nullptr)
+      BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
+    else if (Cond->getOperand(i).getNode() != BottomHalf)
+      return SDValue();
+  }
+
+  // Do the same for the second half of the BuildVector
+  ConstantSDNode *TopHalf = nullptr;
+  for (int i = NumElems / 2; i < NumElems; ++i) {
+    if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF)
+      continue;
+
+    if (TopHalf == nullptr)
+      TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
+    else if (Cond->getOperand(i).getNode() != TopHalf)
+      return SDValue();
+  }
+
+  assert(TopHalf && BottomHalf &&
+         "One half of the selector was all UNDEFs and the other was all the "
+         "same value. This should have been addressed before this function.");
+  return DAG.getNode(
+      ISD::CONCAT_VECTORS, dl, VT,
+      BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
+      TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
+}
+
 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -4659,6 +4731,17 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
   if (ISD::isBuildVectorAllZeros(N0.getNode()))
     return N2;
 
+  // The ConvertSelectToConcatVector function is assuming both the above
+  // checks for (vselect (build_vector all{ones,zeros) ...) have been made
+  // and addressed.
+  if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
+      N2.getOpcode() == ISD::CONCAT_VECTORS &&
+      ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
+    SDValue CV = ConvertSelectToConcatVector(N, DAG);
+    if (CV.getNode())
+      return CV;
+  }
+
   return SDValue();
 }
 
@@ -5003,12 +5086,12 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
   }
 
   if (N0.getOpcode() == ISD::SETCC) {
+    EVT N0VT = N0.getOperand(0).getValueType();
     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
     // Only do this before legalize for now.
     if (VT.isVector() && !LegalOperations &&
-        TLI.getBooleanContents(true) ==
-          TargetLowering::ZeroOrNegativeOneBooleanContent) {
-      EVT N0VT = N0.getOperand(0).getValueType();
+        TLI.getBooleanContents(N0VT) ==
+            TargetLowering::ZeroOrNegativeOneBooleanContent) {
       // On some architectures (such as SSE/NEON/etc) the SETCC result type is
       // of the same size as the compared operands. Only optimize sext(setcc())
       // if this is the case.
@@ -6140,6 +6223,9 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
       // Do not change the width of a volatile load.
       !cast<LoadSDNode>(N0)->isVolatile() &&
+      // Do not remove the cast if the types differ in endian layout.
+      TLI.hasBigEndianPartOrdering(N0.getValueType()) ==
+      TLI.hasBigEndianPartOrdering(VT) &&
       (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
       TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
@@ -6955,11 +7041,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
   }
 
   // The next optimizations are desirable only if SELECT_CC can be lowered.
-  // Check against MVT::Other for SELECT_CC, which is a workaround for targets
-  // having to say they don't support SELECT_CC on every type the DAG knows
-  // about, since there is no way to mark an opcode illegal at all value types
-  // (See also visitSELECT)
-  if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) {
+  if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
     // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
     if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
         !VT.isVector() &&
@@ -7012,11 +7094,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
   }
 
   // The next optimizations are desirable only if SELECT_CC can be lowered.
-  // Check against MVT::Other for SELECT_CC, which is a workaround for targets
-  // having to say they don't support SELECT_CC on every type the DAG knows
-  // about, since there is no way to mark an opcode illegal at all value types
-  // (See also visitSELECT)
-  if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) {
+  if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
     // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
 
     if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
@@ -7849,17 +7927,6 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
   return false;
 }
 
-/// \brief Return the base-pointer arithmetic from an indexed \p LD.
-SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
-  ISD::MemIndexedMode AM = LD->getAddressingMode();
-  assert(AM != ISD::UNINDEXED);
-  SDValue BP = LD->getOperand(1);
-  SDValue Inc = LD->getOperand(2);
-  unsigned Opc =
-      (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
-  return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
-}
-
 SDValue DAGCombiner::visitLOAD(SDNode *N) {
   LoadSDNode *LD  = cast<LoadSDNode>(N);
   SDValue Chain = LD->getChain();
@@ -7896,16 +7963,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
     } else {
       // Indexed loads.
       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
-      if (!N->hasAnyUseOfValue(0)) {
+      if (!N->hasAnyUseOfValue(0) && !N->hasAnyUseOfValue(1)) {
         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
-        SDValue Index;
-        if (N->hasAnyUseOfValue(1)) {
-          Index = SplitIndexingFromLoad(LD);
-          // Try to fold the base pointer arithmetic into subsequent loads and
-          // stores.
-          AddUsersToWorkList(N);
-        } else
-          Index = DAG.getUNDEF(N->getValueType(1));
         DEBUG(dbgs() << "\nReplacing.7 ";
               N->dump(&DAG);
               dbgs() << "\nWith: ";
@@ -7913,7 +7972,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
               dbgs() << " and 2 other values\n");
         WorkListRemover DeadNodes(*this);
         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
-        DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
+        DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1),
+                                      DAG.getUNDEF(N->getValueType(1)));
         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
         removeFromWorkList(N);
         DAG.DeleteNode(N);
@@ -9666,6 +9726,27 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
     return SDValue();
   unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
 
+  // Canonicalize insert_vector_elt dag nodes.
+  // Example:
+  // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
+  // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
+  //
+  // Do this only if the child insert_vector node has one use; also
+  // do this only if indices are both constants and Idx1 < Idx0.
+  if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
+      && isa<ConstantSDNode>(InVec.getOperand(2))) {
+    unsigned OtherElt =
+      cast<ConstantSDNode>(InVec.getOperand(2))->getZExtValue();
+    if (Elt < OtherElt) {
+      // Swap nodes.
+      SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VT,
+                                  InVec.getOperand(0), InVal, EltNo);
+      AddToWorkList(NewOp.getNode());
+      return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
+                         VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
+    }
+  }
+
   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
   // vector elements.
@@ -9698,6 +9779,86 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
   return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
 }
 
+SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
+    SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
+  EVT ResultVT = EVE->getValueType(0);
+  EVT VecEltVT = InVecVT.getVectorElementType();
+  unsigned Align = OriginalLoad->getAlignment();
+  unsigned NewAlign = TLI.getDataLayout()->getABITypeAlignment(
+      VecEltVT.getTypeForEVT(*DAG.getContext()));
+
+  if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
+    return SDValue();
+
+  Align = NewAlign;
+
+  SDValue NewPtr = OriginalLoad->getBasePtr();
+  SDValue Offset;
+  EVT PtrType = NewPtr.getValueType();
+  MachinePointerInfo MPI;
+  if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
+    int Elt = ConstEltNo->getZExtValue();
+    unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
+    if (TLI.isBigEndian())
+      PtrOff = InVecVT.getSizeInBits() / 8 - PtrOff;
+    Offset = DAG.getConstant(PtrOff, PtrType);
+    MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
+  } else {
+    Offset = DAG.getNode(
+        ISD::MUL, SDLoc(EVE), EltNo.getValueType(), EltNo,
+        DAG.getConstant(VecEltVT.getStoreSize(), EltNo.getValueType()));
+    if (TLI.isBigEndian())
+      Offset = DAG.getNode(
+          ISD::SUB, SDLoc(EVE), EltNo.getValueType(),
+          DAG.getConstant(InVecVT.getStoreSize(), EltNo.getValueType()), Offset);
+    MPI = OriginalLoad->getPointerInfo();
+  }
+  NewPtr = DAG.getNode(ISD::ADD, SDLoc(EVE), PtrType, NewPtr, Offset);
+
+  // The replacement we need to do here is a little tricky: we need to
+  // replace an extractelement of a load with a load.
+  // Use ReplaceAllUsesOfValuesWith to do the replacement.
+  // Note that this replacement assumes that the extractvalue is the only
+  // use of the load; that's okay because we don't want to perform this
+  // transformation in other cases anyway.
+  SDValue Load;
+  SDValue Chain;
+  if (ResultVT.bitsGT(VecEltVT)) {
+    // If the result type of vextract is wider than the load, then issue an
+    // extending load instead.
+    ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, VecEltVT)
+                                   ? ISD::ZEXTLOAD
+                                   : ISD::EXTLOAD;
+    Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT, OriginalLoad->getChain(),
+                          NewPtr, MPI, VecEltVT, OriginalLoad->isVolatile(),
+                          OriginalLoad->isNonTemporal(), Align,
+                          OriginalLoad->getTBAAInfo());
+    Chain = Load.getValue(1);
+  } else {
+    Load = DAG.getLoad(
+        VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI,
+        OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(),
+        OriginalLoad->isInvariant(), Align, OriginalLoad->getTBAAInfo());
+    Chain = Load.getValue(1);
+    if (ResultVT.bitsLT(VecEltVT))
+      Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
+    else
+      Load = DAG.getNode(ISD::BITCAST, SDLoc(EVE), ResultVT, Load);
+  }
+  WorkListRemover DeadNodes(*this);
+  SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
+  SDValue To[] = { Load, Chain };
+  DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
+  // Since we're explicitly calling ReplaceAllUses, add the new node to the
+  // worklist explicitly as well.
+  AddToWorkList(Load.getNode());
+  AddUsersToWorkList(Load.getNode()); // Add users too
+  // Make sure to revisit this node to clean it up; it will usually be dead.
+  AddToWorkList(EVE);
+  ++OpsNarrowed;
+  return SDValue(EVE, 0);
+}
+
 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
   // (vextract (scalar_to_vector val, 0) -> val
   SDValue InVec = N->getOperand(0);
@@ -9766,6 +9927,38 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
     }
   }
 
+  bool BCNumEltsChanged = false;
+  EVT ExtVT = VT.getVectorElementType();
+  EVT LVT = ExtVT;
+
+  // If the result of load has to be truncated, then it's not necessarily
+  // profitable.
+  if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
+    return SDValue();
+
+  if (InVec.getOpcode() == ISD::BITCAST) {
+    // Don't duplicate a load with other uses.
+    if (!InVec.hasOneUse())
+      return SDValue();
+
+    EVT BCVT = InVec.getOperand(0).getValueType();
+    if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
+      return SDValue();
+    if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
+      BCNumEltsChanged = true;
+    InVec = InVec.getOperand(0);
+    ExtVT = BCVT.getVectorElementType();
+  }
+
+  // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
+  if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
+      ISD::isNormalLoad(InVec.getNode())) {
+    SDValue Index = N->getOperand(1);
+    if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec))
+      return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
+                                                           OrigLoad);
+  }
+
   // Perform only after legalization to ensure build_vector / vector_shuffle
   // optimizations have already been done.
   if (!LegalOperations) return SDValue();
@@ -9776,30 +9969,6 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
 
   if (ConstEltNo) {
     int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
-    bool NewLoad = false;
-    bool BCNumEltsChanged = false;
-    EVT ExtVT = VT.getVectorElementType();
-    EVT LVT = ExtVT;
-
-    // If the result of load has to be truncated, then it's not necessarily
-    // profitable.
-    if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
-      return SDValue();
-
-    if (InVec.getOpcode() == ISD::BITCAST) {
-      // Don't duplicate a load with other uses.
-      if (!InVec.hasOneUse())
-        return SDValue();
-
-      EVT BCVT = InVec.getOperand(0).getValueType();
-      if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
-        return SDValue();
-      if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
-        BCNumEltsChanged = true;
-      InVec = InVec.getOperand(0);
-      ExtVT = BCVT.getVectorElementType();
-      NewLoad = true;
-    }
 
     LoadSDNode *LN0 = nullptr;
     const ShuffleVectorSDNode *SVN = nullptr;
@@ -9842,6 +10011,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
       if (ISD::isNormalLoad(InVec.getNode())) {
         LN0 = cast<LoadSDNode>(InVec);
         Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
+        EltNo = DAG.getConstant(Elt, EltNo.getValueType());
       }
     }
 
@@ -9854,72 +10024,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
     if (Elt == -1)
       return DAG.getUNDEF(LVT);
 
-    unsigned Align = LN0->getAlignment();
-    if (NewLoad) {
-      // Check the resultant load doesn't need a higher alignment than the
-      // original load.
-      unsigned NewAlign =
-        TLI.getDataLayout()
-            ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));
-
-      if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT))
-        return SDValue();
-
-      Align = NewAlign;
-    }
-
-    SDValue NewPtr = LN0->getBasePtr();
-    unsigned PtrOff = 0;
-
-    if (Elt) {
-      PtrOff = LVT.getSizeInBits() * Elt / 8;
-      EVT PtrType = NewPtr.getValueType();
-      if (TLI.isBigEndian())
-        PtrOff = VT.getSizeInBits() / 8 - PtrOff;
-      NewPtr = DAG.getNode(ISD::ADD, SDLoc(N), PtrType, NewPtr,
-                           DAG.getConstant(PtrOff, PtrType));
-    }
-
-    // The replacement we need to do here is a little tricky: we need to
-    // replace an extractelement of a load with a load.
-    // Use ReplaceAllUsesOfValuesWith to do the replacement.
-    // Note that this replacement assumes that the extractvalue is the only
-    // use of the load; that's okay because we don't want to perform this
-    // transformation in other cases anyway.
-    SDValue Load;
-    SDValue Chain;
-    if (NVT.bitsGT(LVT)) {
-      // If the result type of vextract is wider than the load, then issue an
-      // extending load instead.
-      ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT)
-        ? ISD::ZEXTLOAD : ISD::EXTLOAD;
-      Load = DAG.getExtLoad(ExtType, SDLoc(N), NVT, LN0->getChain(),
-                            NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff),
-                            LVT, LN0->isVolatile(), LN0->isNonTemporal(),
-                            Align, LN0->getTBAAInfo());
-      Chain = Load.getValue(1);
-    } else {
-      Load = DAG.getLoad(LVT, SDLoc(N), LN0->getChain(), NewPtr,
-                         LN0->getPointerInfo().getWithOffset(PtrOff),
-                         LN0->isVolatile(), LN0->isNonTemporal(),
-                         LN0->isInvariant(), Align, LN0->getTBAAInfo());
-      Chain = Load.getValue(1);
-      if (NVT.bitsLT(LVT))
-        Load = DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, Load);
-      else
-        Load = DAG.getNode(ISD::BITCAST, SDLoc(N), NVT, Load);
-    }
-    WorkListRemover DeadNodes(*this);
-    SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) };
-    SDValue To[] = { Load, Chain };
-    DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
-    // Since we're explcitly calling ReplaceAllUses, add the new node to the
-    // worklist explicitly as well.
-    AddToWorkList(Load.getNode());
-    AddUsersToWorkList(Load.getNode()); // Add users too
-    // Make sure to revisit this node to clean it up; it will usually be dead.
-    AddToWorkList(N);
-    return SDValue(N, 0);
+    return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0);
   }
 
   return SDValue();
@@ -10280,10 +10385,24 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
     SmallVector<SDValue, 8> Opnds;
     unsigned BuildVecNumElts =  N0.getNumOperands();
 
-    for (unsigned i = 0; i != BuildVecNumElts; ++i)
-      Opnds.push_back(N0.getOperand(i));
-    for (unsigned i = 0; i != BuildVecNumElts; ++i)
-      Opnds.push_back(N1.getOperand(i));
+    EVT SclTy0 = N0.getOperand(0)->getValueType(0);
+    EVT SclTy1 = N1.getOperand(0)->getValueType(0);
+    if (SclTy0.isFloatingPoint()) {
+      for (unsigned i = 0; i != BuildVecNumElts; ++i)
+        Opnds.push_back(N0.getOperand(i));
+      for (unsigned i = 0; i != BuildVecNumElts; ++i)
+        Opnds.push_back(N1.getOperand(i));
+    } else {
+      // If BUILD_VECTOR are from built from integer, they may have different
+      // operand types. Get the smaller type and truncate all operands to it.
+      EVT MinTy = SclTy0.bitsLE(SclTy1) ? SclTy0 : SclTy1;
+      for (unsigned i = 0; i != BuildVecNumElts; ++i)
+        Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy,
+                        N0.getOperand(i)));
+      for (unsigned i = 0; i != BuildVecNumElts; ++i)
+        Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy,
+                        N1.getOperand(i)));
+    }
 
     return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds);
   }
@@ -10558,22 +10677,19 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
   }
 
   // If this shuffle node is simply a swizzle of another shuffle node,
-  // and it reverses the swizzle of the previous shuffle then we can
-  // optimize shuffle(shuffle(x, undef), undef) -> x.
+  // then try to simplify it.
   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
       N1.getOpcode() == ISD::UNDEF) {
 
     ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
 
-    // Shuffle nodes can only reverse shuffles with a single non-undef value.
-    if (N0.getOperand(1).getOpcode() != ISD::UNDEF)
-      return SDValue();
-
     // The incoming shuffle must be of the same type as the result of the
     // current shuffle.
     assert(OtherSV->getOperand(0).getValueType() == VT &&
            "Shuffle types don't match");
 
+    SmallVector<int, 4> Mask;
+    // Compute the combined shuffle mask.
     for (unsigned i = 0; i != NumElts; ++i) {
       int Idx = SVN->getMaskElt(i);
       assert(Idx < (int)NumElts && "Index references undef operand");
@@ -10581,13 +10697,71 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
       // shuffle. Adopt the incoming index.
       if (Idx >= 0)
         Idx = OtherSV->getMaskElt(Idx);
+      Mask.push_back(Idx);
+    }
+    
+    bool CommuteOperands = false;
+    if (N0.getOperand(1).getOpcode() != ISD::UNDEF) {
+      // To be valid, the combine shuffle mask should only reference elements
+      // from one of the two vectors in input to the inner shufflevector.
+      bool IsValidMask = true;
+      for (unsigned i = 0; i != NumElts && IsValidMask; ++i)
+        // See if the combined mask only reference undefs or elements coming
+        // from the first shufflevector operand.
+        IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] < NumElts;
+
+      if (!IsValidMask) {
+        IsValidMask = true;
+        for (unsigned i = 0; i != NumElts && IsValidMask; ++i)
+          // Check that all the elements come from the second shuffle operand.
+          IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] >= NumElts;
+        CommuteOperands = IsValidMask;
+      }
 
-      // The combined shuffle must map each index to itself.
-      if (Idx >= 0 && (unsigned)Idx != i)
+      // Early exit if the combined shuffle mask is not valid.
+      if (!IsValidMask)
         return SDValue();
     }
 
-    return OtherSV->getOperand(0);
+    // See if this pair of shuffles can be safely folded according to either
+    // of the following rules:
+    //   shuffle(shuffle(x, y), undef) -> x
+    //   shuffle(shuffle(x, undef), undef) -> x
+    //   shuffle(shuffle(x, y), undef) -> y
+    bool IsIdentityMask = true;
+    unsigned BaseMaskIndex = CommuteOperands ? NumElts : 0;
+    for (unsigned i = 0; i != NumElts && IsIdentityMask; ++i) {
+      // Skip Undefs.
+      if (Mask[i] < 0)
+        continue;
+
+      // The combined shuffle must map each index to itself.
+      IsIdentityMask = (unsigned)Mask[i] == i + BaseMaskIndex;
+    }
+    
+    if (IsIdentityMask) {
+      if (CommuteOperands)
+        // optimize shuffle(shuffle(x, y), undef) -> y.
+        return OtherSV->getOperand(1);
+      
+      // optimize shuffle(shuffle(x, undef), undef) -> x
+      // optimize shuffle(shuffle(x, y), undef) -> x
+      return OtherSV->getOperand(0);
+    }
+
+    // It may still be beneficial to combine the two shuffles if the
+    // resulting shuffle is legal.
+    if (TLI.isShuffleMaskLegal(Mask, VT)) {
+      if (!CommuteOperands)
+        // shuffle(shuffle(x, undef, M1), undef, M2) -> shuffle(x, undef, M3).
+        // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(x, undef, M3)
+        return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), N1,
+                                    &Mask[0]);
+      
+      //   shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(undef, y, M3)
+      return DAG.getVectorShuffle(VT, SDLoc(N), N1, N0->getOperand(1),
+                                  &Mask[0]);
+    }
   }
 
   return SDValue();
@@ -10729,6 +10903,27 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
       return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), LHS.getValueType(), Ops);
   }
 
+  // Type legalization might introduce new shuffles in the DAG.
+  // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
+  //   -> (shuffle (VBinOp (A, B)), Undef, Mask).
+  if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
+      isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
+      LHS.getOperand(1).getOpcode() == ISD::UNDEF &&
+      RHS.getOperand(1).getOpcode() == ISD::UNDEF) {
+    ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
+    ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
+
+    if (SVN0->getMask().equals(SVN1->getMask())) {
+      EVT VT = N->getValueType(0);
+      SDValue UndefVector = LHS.getOperand(1);
+      SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
+                                     LHS.getOperand(0), RHS.getOperand(0));
+      AddUsersToWorkList(N);
+      return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
+                                  &SVN0->getMask()[0]);
+    }
+  }
+
   return SDValue();
 }
 
@@ -11080,8 +11275,8 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
 
   // fold select C, 16, 0 -> shl C, 4
   if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() &&
-    TLI.getBooleanContents(N0.getValueType().isVector()) ==
-      TargetLowering::ZeroOrOneBooleanContent) {
+      TLI.getBooleanContents(N0.getValueType()) ==
+          TargetLowering::ZeroOrOneBooleanContent) {
 
     // If the caller doesn't want us to simplify this into a zext of a compare,
     // don't do it.
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 99931c1..445572a 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -42,12 +42,15 @@
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/StackMaps.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/Function.h"
@@ -558,6 +561,107 @@ bool FastISel::SelectGetElementPtr(const User *I) {
   return true;
 }
 
+/// \brief Add a stackmap or patchpoint intrinsic call's live variable operands
+/// to a stackmap or patchpoint machine instruction.
+bool FastISel::addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops,
+                                   const CallInst *CI, unsigned StartIdx) {
+  for (unsigned i = StartIdx, e = CI->getNumArgOperands(); i != e; ++i) {
+    Value *Val = CI->getArgOperand(i);
+    // Check for constants and encode them with a StackMaps::ConstantOp prefix.
+    if (auto *C = dyn_cast<ConstantInt>(Val)) {
+      Ops.push_back(MachineOperand::CreateImm(StackMaps::ConstantOp));
+      Ops.push_back(MachineOperand::CreateImm(C->getSExtValue()));
+    } else if (isa<ConstantPointerNull>(Val)) {
+      Ops.push_back(MachineOperand::CreateImm(StackMaps::ConstantOp));
+      Ops.push_back(MachineOperand::CreateImm(0));
+    } else if (auto *AI = dyn_cast<AllocaInst>(Val)) {
+      // Values coming from a stack location also require a sepcial encoding,
+      // but that is added later on by the target specific frame index
+      // elimination implementation.
+      auto SI = FuncInfo.StaticAllocaMap.find(AI);
+      if (SI != FuncInfo.StaticAllocaMap.end())
+        Ops.push_back(MachineOperand::CreateFI(SI->second));
+      else
+        return false;
+    } else {
+      unsigned Reg = getRegForValue(Val);
+      if (Reg == 0)
+        return false;
+      Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false));
+    }
+  }
+
+  return true;
+}
+
+bool FastISel::SelectStackmap(const CallInst *I) {
+  // void @llvm.experimental.stackmap(i64 <id>, i32 <numShadowBytes>,
+  //                                  [live variables...])
+  assert(I->getCalledFunction()->getReturnType()->isVoidTy() &&
+         "Stackmap cannot return a value.");
+
+  // The stackmap intrinsic only records the live variables (the arguments
+  // passed to it) and emits NOPS (if requested). Unlike the patchpoint
+  // intrinsic, this won't be lowered to a function call. This means we don't
+  // have to worry about calling conventions and target-specific lowering code.
+  // Instead we perform the call lowering right here.
+  //
+  // CALLSEQ_START(0)
+  // STACKMAP(id, nbytes, ...)
+  // CALLSEQ_END(0, 0)
+  //
+  SmallVector<MachineOperand, 32> Ops;
+
+  // Add the <id> and <numBytes> constants.
+  assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::IDPos)) &&
+         "Expected a constant integer.");
+  const auto *ID = cast<ConstantInt>(I->getOperand(PatchPointOpers::IDPos));
+  Ops.push_back(MachineOperand::CreateImm(ID->getZExtValue()));
+
+  assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)) &&
+         "Expected a constant integer.");
+  const auto *NumBytes =
+    cast<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos));
+  Ops.push_back(MachineOperand::CreateImm(NumBytes->getZExtValue()));
+
+  // Push live variables for the stack map (skipping the first two arguments
+  // <id> and <numBytes>).
+  if (!addStackMapLiveVars(Ops, I, 2))
+    return false;
+
+  // We are not adding any register mask info here, because the stackmap doesn't
+  // clobber anything.
+
+  // Add scratch registers as implicit def and early clobber.
+  CallingConv::ID CC = I->getCallingConv();
+  const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC);
+  for (unsigned i = 0; ScratchRegs[i]; ++i)
+    Ops.push_back(MachineOperand::CreateReg(
+      ScratchRegs[i], /*IsDef=*/true, /*IsImp=*/true, /*IsKill=*/false,
+      /*IsDead=*/false, /*IsUndef=*/false, /*IsEarlyClobber=*/true));
+
+  // Issue CALLSEQ_START
+  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
+    .addImm(0);
+
+  // Issue STACKMAP.
+  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+                                    TII.get(TargetOpcode::STACKMAP));
+  for (auto const &MO : Ops)
+    MIB.addOperand(MO);
+
+  // Issue CALLSEQ_END
+  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
+    .addImm(0).addImm(0);
+
+  // Inform the Frame Information that we have a stackmap in this function.
+  FuncInfo.MF->getFrameInfo()->setHasStackMap();
+
+  return true;
+}
+
 bool FastISel::SelectCall(const User *I) {
   const CallInst *Call = cast<CallInst>(I);
 
@@ -713,6 +817,8 @@ bool FastISel::SelectCall(const User *I) {
     UpdateValueMap(Call, ResultReg);
     return true;
   }
+  case Intrinsic::experimental_stackmap:
+    return SelectStackmap(Call);
   }
 
   // Usually, it does not make sense to initialize a value,
@@ -879,7 +985,6 @@ FastISel::SelectInstruction(const Instruction *I) {
 /// the CFG.
 void
 FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DbgLoc) {
-
   if (FuncInfo.MBB->getBasicBlock()->size() > 1 &&
       FuncInfo.MBB->isLayoutSuccessor(MSucc)) {
     // For more accurate line information if this is the only instruction
@@ -890,7 +995,11 @@ FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DbgLoc) {
     TII.InsertBranch(*FuncInfo.MBB, MSucc, nullptr,
                      SmallVector<MachineOperand, 0>(), DbgLoc);
   }
-  FuncInfo.MBB->addSuccessor(MSucc);
+  uint32_t BranchWeight = 0;
+  if (FuncInfo.BPI)
+    BranchWeight = FuncInfo.BPI->getEdgeWeight(FuncInfo.MBB->getBasicBlock(),
+                                               MSucc->getBasicBlock());
+  FuncInfo.MBB->addSuccessor(MSucc, BranchWeight);
 }
 
 /// SelectFNeg - Emit an FNeg operation.
@@ -1101,6 +1210,7 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) {
 FastISel::FastISel(FunctionLoweringInfo &funcInfo,
                    const TargetLibraryInfo *libInfo)
   : FuncInfo(funcInfo),
+    MF(funcInfo.MF),
     MRI(FuncInfo.MF->getRegInfo()),
     MFI(*FuncInfo.MF->getFrameInfo()),
     MCP(*FuncInfo.MF->getConstantPool()),
@@ -1635,3 +1745,47 @@ bool FastISel::canFoldAddIntoGEP(const User *GEP, const Value *Add) {
   return isa<ConstantInt>(cast<AddOperator>(Add)->getOperand(1));
 }
 
+MachineMemOperand *
+FastISel::createMachineMemOperandFor(const Instruction *I) const {
+  const Value *Ptr;
+  Type *ValTy;
+  unsigned Alignment;
+  unsigned Flags;
+  bool IsVolatile;
+
+  if (const auto *LI = dyn_cast<LoadInst>(I)) {
+    Alignment = LI->getAlignment();
+    IsVolatile = LI->isVolatile();
+    Flags = MachineMemOperand::MOLoad;
+    Ptr = LI->getPointerOperand();
+    ValTy = LI->getType();
+  } else if (const auto *SI = dyn_cast<StoreInst>(I)) {
+    Alignment = SI->getAlignment();
+    IsVolatile = SI->isVolatile();
+    Flags = MachineMemOperand::MOStore;
+    Ptr = SI->getPointerOperand();
+    ValTy = SI->getValueOperand()->getType();
+  } else {
+    return nullptr;
+  }
+
+  bool IsNonTemporal = I->getMetadata("nontemporal") != nullptr;
+  bool IsInvariant = I->getMetadata("invariant.load") != nullptr;
+  const MDNode *TBAAInfo = I->getMetadata(LLVMContext::MD_tbaa);
+  const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range);
+
+  if (Alignment == 0)  // Ensure that codegen never sees alignment 0.
+    Alignment = DL.getABITypeAlignment(ValTy);
+
+  unsigned Size = TM.getDataLayout()->getTypeStoreSize(ValTy);
+
+  if (IsVolatile)
+    Flags |= MachineMemOperand::MOVolatile;
+  if (IsNonTemporal)
+    Flags |= MachineMemOperand::MONonTemporal;
+  if (IsInvariant)
+    Flags |= MachineMemOperand::MOInvariant;
+
+  return FuncInfo.MF->getMachineMemOperand(MachinePointerInfo(Ptr), Flags, Size,
+                                           Alignment, TBAAInfo, Ranges);
+}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index a59e895..c0e8c8c 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -2060,7 +2060,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
 
   TargetLowering::CallLoweringInfo CLI(DAG);
   CLI.setDebugLoc(SDLoc(Node)).setChain(InChain)
-    .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, &Args, 0)
+    .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0)
     .setTailCall(isTailCall).setSExtResult(isSigned).setZExtResult(!isSigned);
 
   std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
@@ -2095,7 +2095,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT,
 
   TargetLowering::CallLoweringInfo CLI(DAG);
   CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
-    .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, &Args, 0)
+    .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0)
     .setSExtResult(isSigned).setZExtResult(!isSigned);
 
   std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(CLI);
@@ -2129,7 +2129,7 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
 
   TargetLowering::CallLoweringInfo CLI(DAG);
   CLI.setDebugLoc(SDLoc(Node)).setChain(InChain)
-    .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, &Args, 0)
+    .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0)
     .setSExtResult(isSigned).setZExtResult(!isSigned);
 
   std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
@@ -2266,7 +2266,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
   SDLoc dl(Node);
   TargetLowering::CallLoweringInfo CLI(DAG);
   CLI.setDebugLoc(dl).setChain(InChain)
-    .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, &Args, 0)
+    .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0)
     .setSExtResult(isSigned).setZExtResult(!isSigned);
 
   std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
@@ -2381,7 +2381,7 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
   TargetLowering::CallLoweringInfo CLI(DAG);
   CLI.setDebugLoc(dl).setChain(InChain)
     .setCallee(TLI.getLibcallCallingConv(LC),
-               Type::getVoidTy(*DAG.getContext()), Callee, &Args, 0);
+               Type::getVoidTy(*DAG.getContext()), Callee, std::move(Args), 0);
 
   std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
 
@@ -2650,12 +2650,15 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp,
     NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT().SimpleTy+1);
     assert(NewOutTy.isInteger() && "Ran out of possibilities!");
 
+    // A larger signed type can hold all unsigned values of the requested type,
+    // so using FP_TO_SINT is valid
     if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewOutTy)) {
       OpToUse = ISD::FP_TO_SINT;
       break;
     }
 
-    if (TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewOutTy)) {
+    // However, if the value may be < 0.0, we *must* use some FP_TO_SINT.
+    if (!isSigned && TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewOutTy)) {
       OpToUse = ISD::FP_TO_UINT;
       break;
     }
@@ -2996,8 +2999,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     TargetLowering::CallLoweringInfo CLI(DAG);
     CLI.setDebugLoc(dl).setChain(Node->getOperand(0))
       .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
-                 DAG.getExternalSymbol("__sync_synchronize", TLI.getPointerTy()),
-                 &Args, 0);
+                 DAG.getExternalSymbol("__sync_synchronize",
+                 TLI.getPointerTy()), std::move(Args), 0);
 
     std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
 
@@ -3007,14 +3010,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
   case ISD::ATOMIC_LOAD: {
     // There is no libcall for atomic load; fake it with ATOMIC_CMP_SWAP.
     SDValue Zero = DAG.getConstant(0, Node->getValueType(0));
-    SDValue Swap = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl,
-                                 cast<AtomicSDNode>(Node)->getMemoryVT(),
-                                 Node->getOperand(0),
-                                 Node->getOperand(1), Zero, Zero,
-                                 cast<AtomicSDNode>(Node)->getMemOperand(),
-                                 cast<AtomicSDNode>(Node)->getOrdering(),
-                                 cast<AtomicSDNode>(Node)->getOrdering(),
-                                 cast<AtomicSDNode>(Node)->getSynchScope());
+    SDVTList VTs = DAG.getVTList(Node->getValueType(0), MVT::Other);
+    SDValue Swap = DAG.getAtomicCmpSwap(
+        ISD::ATOMIC_CMP_SWAP, dl, cast<AtomicSDNode>(Node)->getMemoryVT(), VTs,
+        Node->getOperand(0), Node->getOperand(1), Zero, Zero,
+        cast<AtomicSDNode>(Node)->getMemOperand(),
+        cast<AtomicSDNode>(Node)->getOrdering(),
+        cast<AtomicSDNode>(Node)->getOrdering(),
+        cast<AtomicSDNode>(Node)->getSynchScope());
     Results.push_back(Swap.getValue(0));
     Results.push_back(Swap.getValue(1));
     break;
@@ -3051,6 +3054,27 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     Results.push_back(Tmp.second);
     break;
   }
+  case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: {
+    // Expanding an ATOMIC_CMP_SWAP_WITH_SUCCESS produces an ATOMIC_CMP_SWAP and
+    // splits out the success value as a comparison. Expanding the resulting
+    // ATOMIC_CMP_SWAP will produce a libcall.
+    SDVTList VTs = DAG.getVTList(Node->getValueType(0), MVT::Other);
+    SDValue Res = DAG.getAtomicCmpSwap(
+        ISD::ATOMIC_CMP_SWAP, dl, cast<AtomicSDNode>(Node)->getMemoryVT(), VTs,
+        Node->getOperand(0), Node->getOperand(1), Node->getOperand(2),
+        Node->getOperand(3), cast<MemSDNode>(Node)->getMemOperand(),
+        cast<AtomicSDNode>(Node)->getSuccessOrdering(),
+        cast<AtomicSDNode>(Node)->getFailureOrdering(),
+        cast<AtomicSDNode>(Node)->getSynchScope());
+
+    SDValue Success = DAG.getSetCC(SDLoc(Node), Node->getValueType(1),
+                                   Res, Node->getOperand(2), ISD::SETEQ);
+
+    Results.push_back(Res.getValue(0));
+    Results.push_back(Success);
+    Results.push_back(Res.getValue(1));
+    break;
+  }
   case ISD::DYNAMIC_STACKALLOC:
     ExpandDYNAMIC_STACKALLOC(Node, Results);
     break;
@@ -3074,7 +3098,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     TargetLowering::CallLoweringInfo CLI(DAG);
     CLI.setDebugLoc(dl).setChain(Node->getOperand(0))
       .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
-                 DAG.getExternalSymbol("abort", TLI.getPointerTy()), &Args, 0);
+                 DAG.getExternalSymbol("abort", TLI.getPointerTy()),
+                 std::move(Args), 0);
     std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
 
     Results.push_back(CallResult.second);
@@ -3128,6 +3153,65 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
                                 Node->getOperand(0), Node->getValueType(0), dl);
     Results.push_back(Tmp1);
     break;
+  case ISD::FP_TO_SINT: {
+    EVT VT = Node->getOperand(0).getValueType();
+    EVT NVT = Node->getValueType(0);
+
+    // FIXME: Only f32 to i64 conversions are supported.
+    if (VT != MVT::f32 || NVT != MVT::i64)
+      break;
+
+    // Expand f32 -> i64 conversion
+    // This algorithm comes from compiler-rt's implementation of fixsfdi:
+    // https://github.com/llvm-mirror/compiler-rt/blob/master/lib/builtins/fixsfdi.c
+    EVT IntVT = EVT::getIntegerVT(*DAG.getContext(),
+                                  VT.getSizeInBits());
+    SDValue ExponentMask = DAG.getConstant(0x7F800000, IntVT);
+    SDValue ExponentLoBit = DAG.getConstant(23, IntVT);
+    SDValue Bias = DAG.getConstant(127, IntVT);
+    SDValue SignMask = DAG.getConstant(APInt::getSignBit(VT.getSizeInBits()),
+                                       IntVT);
+    SDValue SignLowBit = DAG.getConstant(VT.getSizeInBits() - 1, IntVT);
+    SDValue MantissaMask = DAG.getConstant(0x007FFFFF, IntVT);
+
+    SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Node->getOperand(0));
+
+    SDValue ExponentBits = DAG.getNode(ISD::SRL, dl, IntVT,
+        DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
+        DAG.getZExtOrTrunc(ExponentLoBit, dl, TLI.getShiftAmountTy(IntVT)));
+    SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
+
+    SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
+        DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
+        DAG.getZExtOrTrunc(SignLowBit, dl, TLI.getShiftAmountTy(IntVT)));
+    Sign = DAG.getSExtOrTrunc(Sign, dl, NVT);
+
+    SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
+        DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
+        DAG.getConstant(0x00800000, IntVT));
+
+    R = DAG.getZExtOrTrunc(R, dl, NVT);
+
+
+    R = DAG.getSelectCC(dl, Exponent, ExponentLoBit,
+       DAG.getNode(ISD::SHL, dl, NVT, R,
+                   DAG.getZExtOrTrunc(
+                      DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
+                      dl, TLI.getShiftAmountTy(IntVT))),
+       DAG.getNode(ISD::SRL, dl, NVT, R,
+                   DAG.getZExtOrTrunc(
+                      DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
+                      dl, TLI.getShiftAmountTy(IntVT))),
+       ISD::SETGT);
+
+    SDValue Ret = DAG.getNode(ISD::SUB, dl, NVT,
+        DAG.getNode(ISD::XOR, dl, NVT, R, Sign),
+        Sign);
+
+    Results.push_back(DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, IntVT),
+        DAG.getConstant(0, NVT), Ret, ISD::SETLT));
+    break;
+  }
   case ISD::FP_TO_UINT: {
     SDValue True, False;
     EVT VT =  Node->getOperand(0).getValueType();
@@ -3653,7 +3737,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
                               ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
                               LHS, RHS);
     Results.push_back(Sum);
-    EVT OType = Node->getValueType(1);
+    EVT ResultType = Node->getValueType(1);
+    EVT OType = getSetCCResultType(Node->getValueType(0));
 
     SDValue Zero = DAG.getConstant(0, LHS.getValueType());
 
@@ -3676,7 +3761,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
 
     SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
-    Results.push_back(Cmp);
+    Results.push_back(DAG.getBoolExtOrTrunc(Cmp, dl, ResultType, ResultType));
     break;
   }
   case ISD::UADDO:
@@ -3687,9 +3772,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
                               ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
                               LHS, RHS);
     Results.push_back(Sum);
-    Results.push_back(DAG.getSetCC(dl, Node->getValueType(1), Sum, LHS,
-                                   Node->getOpcode () == ISD::UADDO ?
-                                   ISD::SETULT : ISD::SETUGT));
+
+    EVT ResultType = Node->getValueType(1);
+    EVT SetCCType = getSetCCResultType(Node->getValueType(0));
+    ISD::CondCode CC
+      = Node->getOpcode() == ISD::UADDO ? ISD::SETULT : ISD::SETUGT;
+    SDValue SetCC = DAG.getSetCC(dl, SetCCType, Sum, LHS, CC);
+
+    Results.push_back(DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType));
     break;
   }
   case ISD::UMULO:
@@ -3879,7 +3969,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     // illegal; expand it into a SELECT_CC.
     EVT VT = Node->getValueType(0);
     int TrueValue;
-    switch (TLI.getBooleanContents(VT.isVector())) {
+    switch (TLI.getBooleanContents(Tmp1->getValueType(0))) {
     case TargetLowering::ZeroOrOneBooleanContent:
     case TargetLowering::UndefinedBooleanContent:
       TrueValue = 1;
@@ -3899,13 +3989,29 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     Tmp2 = Node->getOperand(1);   // RHS
     Tmp3 = Node->getOperand(2);   // True
     Tmp4 = Node->getOperand(3);   // False
+    EVT VT = Node->getValueType(0);
     SDValue CC = Node->getOperand(4);
+    ISD::CondCode CCOp = cast<CondCodeSDNode>(CC)->get();
+
+    if (TLI.isCondCodeLegal(CCOp, Tmp1.getSimpleValueType())) {
+      // If the condition code is legal, then we need to expand this
+      // node using SETCC and SELECT.
+      EVT CmpVT = Tmp1.getValueType();
+      assert(!TLI.isOperationExpand(ISD::SELECT, VT) &&
+             "Cannot expand ISD::SELECT_CC when ISD::SELECT also needs to be "
+             "expanded.");
+      EVT CCVT = TLI.getSetCCResultType(*DAG.getContext(), CmpVT);
+      SDValue Cond = DAG.getNode(ISD::SETCC, dl, CCVT, Tmp1, Tmp2, CC);
+      Results.push_back(DAG.getSelect(dl, VT, Cond, Tmp3, Tmp4));
+      break;
+    }
 
+    // SELECT_CC is legal, so the condition code must not be.
     bool Legalized = false;
     // Try to legalize by inverting the condition.  This is for targets that
     // might support an ordered version of a condition, but not the unordered
     // version (or vice versa).
-    ISD::CondCode InvCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
+    ISD::CondCode InvCC = ISD::getSetCCInverse(CCOp,
                                                Tmp1.getValueType().isInteger());
     if (TLI.isCondCodeLegal(InvCC, Tmp1.getSimpleValueType())) {
       // Use the new condition code and swap true and false
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 2483184..6feac0d 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -138,7 +138,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
     Res = PromoteIntRes_Atomic1(cast<AtomicSDNode>(N)); break;
 
   case ISD::ATOMIC_CMP_SWAP:
-    Res = PromoteIntRes_Atomic2(cast<AtomicSDNode>(N)); break;
+  case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
+    Res = PromoteIntRes_AtomicCmpSwap(cast<AtomicSDNode>(N), ResNo);
+    break;
   }
 
   // If the result is null then the sub-method took care of registering it.
@@ -192,16 +194,41 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) {
   return Res;
 }
 
-SDValue DAGTypeLegalizer::PromoteIntRes_Atomic2(AtomicSDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N,
+                                                      unsigned ResNo) {
+  if (ResNo == 1) {
+    assert(N->getOpcode() == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS);
+    EVT SVT = getSetCCResultType(N->getOperand(2).getValueType());
+    EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1));
+
+    // Only use the result of getSetCCResultType if it is legal,
+    // otherwise just use the promoted result type (NVT).
+    if (!TLI.isTypeLegal(SVT))
+      SVT = NVT;
+
+    SDVTList VTs = DAG.getVTList(N->getValueType(0), SVT, MVT::Other);
+    SDValue Res = DAG.getAtomicCmpSwap(
+        ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, SDLoc(N), N->getMemoryVT(), VTs,
+        N->getChain(), N->getBasePtr(), N->getOperand(2), N->getOperand(3),
+        N->getMemOperand(), N->getSuccessOrdering(), N->getFailureOrdering(),
+        N->getSynchScope());
+    ReplaceValueWith(SDValue(N, 0), Res.getValue(0));
+    ReplaceValueWith(SDValue(N, 2), Res.getValue(2));
+    return Res.getValue(1);
+  }
+
   SDValue Op2 = GetPromotedInteger(N->getOperand(2));
   SDValue Op3 = GetPromotedInteger(N->getOperand(3));
-  SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N), N->getMemoryVT(),
-                              N->getChain(), N->getBasePtr(), Op2, Op3,
-                              N->getMemOperand(), N->getSuccessOrdering(),
-                              N->getFailureOrdering(), N->getSynchScope());
+  SDVTList VTs =
+      DAG.getVTList(Op2.getValueType(), N->getValueType(1), MVT::Other);
+  SDValue Res = DAG.getAtomicCmpSwap(
+      N->getOpcode(), SDLoc(N), N->getMemoryVT(), VTs, N->getChain(),
+      N->getBasePtr(), Op2, Op3, N->getMemOperand(), N->getSuccessOrdering(),
+      N->getFailureOrdering(), N->getSynchScope());
   // Legalized the chain result - switch anything that used the old chain to
   // use the new one.
-  ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+  unsigned ChainOp = N->getNumValues() - 1;
+  ReplaceValueWith(SDValue(N, ChainOp), Res.getValue(ChainOp));
   return Res;
 }
 
@@ -492,7 +519,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) {
   EVT OpTy = N->getOperand(1).getValueType();
 
   // Promote all the way up to the canonical SetCC type.
-  Mask = PromoteTargetBoolean(Mask, getSetCCResultType(OpTy));
+  Mask = PromoteTargetBoolean(Mask, OpTy);
   SDValue LHS = GetPromotedInteger(N->getOperand(1));
   SDValue RHS = GetPromotedInteger(N->getOperand(2));
   return DAG.getNode(ISD::VSELECT, SDLoc(N),
@@ -892,8 +919,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) {
   assert(OpNo == 1 && "only know how to promote condition");
 
   // Promote all the way up to the canonical SetCC type.
-  EVT SVT = getSetCCResultType(MVT::Other);
-  SDValue Cond = PromoteTargetBoolean(N->getOperand(1), SVT);
+  SDValue Cond = PromoteTargetBoolean(N->getOperand(1), MVT::Other);
 
   // The chain (Op#0) and basic block destination (Op#2) are always legal types.
   return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Cond,
@@ -986,9 +1012,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) {
   EVT OpTy = N->getOperand(1).getValueType();
 
   // Promote all the way up to the canonical SetCC type.
-  EVT SVT = getSetCCResultType(N->getOpcode() == ISD::SELECT ?
-                                   OpTy.getScalarType() : OpTy);
-  Cond = PromoteTargetBoolean(Cond, SVT);
+  EVT OpVT = N->getOpcode() == ISD::SELECT ? OpTy.getScalarType() : OpTy;
+  Cond = PromoteTargetBoolean(Cond, OpVT);
 
   return SDValue(DAG.UpdateNodeOperands(N, Cond, N->getOperand(1),
                                         N->getOperand(2)), 0);
@@ -1143,6 +1168,26 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
     ReplaceValueWith(SDValue(N, 1), Tmp.second);
     break;
   }
+  case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: {
+    AtomicSDNode *AN = cast<AtomicSDNode>(N);
+    SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::Other);
+    SDValue Tmp = DAG.getAtomicCmpSwap(
+        ISD::ATOMIC_CMP_SWAP, SDLoc(N), AN->getMemoryVT(), VTs,
+        N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3),
+        AN->getMemOperand(), AN->getSuccessOrdering(), AN->getFailureOrdering(),
+        AN->getSynchScope());
+
+    // Expanding to the strong ATOMIC_CMP_SWAP node means we can determine
+    // success simply by comparing the loaded value against the ingoing
+    // comparison.
+    SDValue Success = DAG.getSetCC(SDLoc(N), N->getValueType(1), Tmp,
+                                   N->getOperand(2), ISD::SETEQ);
+
+    SplitInteger(Tmp, Lo, Hi);
+    ReplaceValueWith(SDValue(N, 1), Success);
+    ReplaceValueWith(SDValue(N, 2), Tmp.getValue(1));
+    break;
+  }
 
   case ISD::AND:
   case ISD::OR:
@@ -2301,7 +2346,7 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
 
   TargetLowering::CallLoweringInfo CLI(DAG);
   CLI.setDebugLoc(dl).setChain(Chain)
-    .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Func, &Args, 0)
+    .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Func, std::move(Args), 0)
     .setSExtResult();
 
   std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
@@ -2388,16 +2433,18 @@ void DAGTypeLegalizer::ExpandIntRes_ATOMIC_LOAD(SDNode *N,
                                                 SDValue &Lo, SDValue &Hi) {
   SDLoc dl(N);
   EVT VT = cast<AtomicSDNode>(N)->getMemoryVT();
+  SDVTList VTs = DAG.getVTList(VT, MVT::i1, MVT::Other);
   SDValue Zero = DAG.getConstant(0, VT);
-  SDValue Swap = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, VT,
-                               N->getOperand(0),
-                               N->getOperand(1), Zero, Zero,
-                               cast<AtomicSDNode>(N)->getMemOperand(),
-                               cast<AtomicSDNode>(N)->getOrdering(),
-                               cast<AtomicSDNode>(N)->getOrdering(),
-                               cast<AtomicSDNode>(N)->getSynchScope());
+  SDValue Swap = DAG.getAtomicCmpSwap(
+      ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl,
+      cast<AtomicSDNode>(N)->getMemoryVT(), VTs, N->getOperand(0),
+      N->getOperand(1), Zero, Zero, cast<AtomicSDNode>(N)->getMemOperand(),
+      cast<AtomicSDNode>(N)->getOrdering(),
+      cast<AtomicSDNode>(N)->getOrdering(),
+      cast<AtomicSDNode>(N)->getSynchScope());
+
   ReplaceValueWith(SDValue(N, 0), Swap.getValue(0));
-  ReplaceValueWith(SDValue(N, 1), Swap.getValue(1));
+  ReplaceValueWith(SDValue(N, 1), Swap.getValue(2));
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 3971fc3..bd7dacf 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -1054,7 +1054,7 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC,
 
   TargetLowering::CallLoweringInfo CLI(DAG);
   CLI.setDebugLoc(SDLoc(Node)).setChain(InChain)
-    .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, &Args, 0)
+    .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0)
     .setSExtResult(isSigned).setZExtResult(!isSigned);
 
   std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
@@ -1065,11 +1065,14 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC,
 /// PromoteTargetBoolean - Promote the given target boolean to a target boolean
 /// of the given type.  A target boolean is an integer value, not necessarily of
 /// type i1, the bits of which conform to getBooleanContents.
-SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT VT) {
+///
+/// ValVT is the type of values that produced the boolean.
+SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT ValVT) {
   SDLoc dl(Bool);
+  EVT BoolVT = getSetCCResultType(ValVT);
   ISD::NodeType ExtendCode =
-    TargetLowering::getExtendForContent(TLI.getBooleanContents(VT.isVector()));
-  return DAG.getNode(ExtendCode, dl, VT, Bool);
+      TargetLowering::getExtendForContent(TLI.getBooleanContents(ValVT));
+  return DAG.getNode(ExtendCode, dl, BoolVT, Bool);
 }
 
 /// SplitInteger - Return the lower LoVT bits of Op in Lo and the upper HiVT
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index e4bbc78..d0ca6f8 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -167,7 +167,7 @@ private:
                                                  SDNode *Node, bool isSigned);
   std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
 
-  SDValue PromoteTargetBoolean(SDValue Bool, EVT VT);
+  SDValue PromoteTargetBoolean(SDValue Bool, EVT ValVT);
   void ReplaceValueWith(SDValue From, SDValue To);
   void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
   void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT,
@@ -220,7 +220,7 @@ private:
   SDValue PromoteIntRes_AssertZext(SDNode *N);
   SDValue PromoteIntRes_Atomic0(AtomicSDNode *N);
   SDValue PromoteIntRes_Atomic1(AtomicSDNode *N);
-  SDValue PromoteIntRes_Atomic2(AtomicSDNode *N);
+  SDValue PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N, unsigned ResNo);
   SDValue PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N);
   SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N);
   SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N);
@@ -570,6 +570,7 @@ private:
   void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi);
@@ -644,6 +645,7 @@ private:
   bool WidenVectorOperand(SDNode *N, unsigned OpNo);
   SDValue WidenVecOp_BITCAST(SDNode *N);
   SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N);
+  SDValue WidenVecOp_EXTEND(SDNode *N);
   SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
   SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
   SDValue WidenVecOp_STORE(SDNode* N);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index f40ed76..7e2f7b6 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -60,12 +60,15 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
     case TargetLowering::TypeExpandFloat:
       // Convert the expanded pieces of the input.
       GetExpandedOp(InOp, Lo, Hi);
+      if (TLI.hasBigEndianPartOrdering(InVT) !=
+          TLI.hasBigEndianPartOrdering(OutVT))
+        std::swap(Lo, Hi);
       Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
       Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
       return;
     case TargetLowering::TypeSplitVector:
       GetSplitVector(InOp, Lo, Hi);
-      if (TLI.isBigEndian())
+      if (TLI.hasBigEndianPartOrdering(OutVT))
         std::swap(Lo, Hi);
       Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
       Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
@@ -82,7 +85,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
       EVT LoVT, HiVT;
       std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(InVT);
       std::tie(Lo, Hi) = DAG.SplitVector(InOp, dl, LoVT, HiVT);
-      if (TLI.isBigEndian())
+      if (TLI.hasBigEndianPartOrdering(OutVT))
         std::swap(Lo, Hi);
       Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
       Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
@@ -176,7 +179,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
                    false, false, MinAlign(Alignment, IncrementSize));
 
   // Handle endianness of the load.
-  if (TLI.isBigEndian())
+  if (TLI.hasBigEndianPartOrdering(OutVT))
     std::swap(Lo, Hi);
 }
 
@@ -245,7 +248,8 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
   SDLoc dl(N);
 
   LoadSDNode *LD = cast<LoadSDNode>(N);
-  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
+  EVT ValueVT = LD->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT);
   SDValue Chain = LD->getChain();
   SDValue Ptr = LD->getBasePtr();
   unsigned Alignment = LD->getAlignment();
@@ -275,7 +279,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
                       Hi.getValue(1));
 
   // Handle endianness of the load.
-  if (TLI.isBigEndian())
+  if (TLI.hasBigEndianPartOrdering(ValueVT))
     std::swap(Lo, Hi);
 
   // Modified the chain - switch anything that used the old chain to use
@@ -295,7 +299,7 @@ void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {
   Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2), 0);
 
   // Handle endianness of the load.
-  if (TLI.isBigEndian())
+  if (TLI.hasBigEndianPartOrdering(OVT))
     std::swap(Lo, Hi);
 
   // Modified the chain - switch anything that used the old chain to use
@@ -459,8 +463,8 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
   SDLoc dl(N);
 
   StoreSDNode *St = cast<StoreSDNode>(N);
-  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(),
-                                     St->getValue().getValueType());
+  EVT ValueVT = St->getValue().getValueType();
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT);
   SDValue Chain = St->getChain();
   SDValue Ptr = St->getBasePtr();
   unsigned Alignment = St->getAlignment();
@@ -474,7 +478,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
   SDValue Lo, Hi;
   GetExpandedOp(St->getValue(), Lo, Hi);
 
-  if (TLI.isBigEndian())
+  if (TLI.hasBigEndianPartOrdering(ValueVT))
     std::swap(Lo, Hi);
 
   Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(),
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 898cd29..507e7ff 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -37,12 +37,12 @@ class VectorLegalizer {
   const TargetLowering &TLI;
   bool Changed; // Keep track of whether anything changed
 
-  /// LegalizedNodes - For nodes that are of legal width, and that have more
-  /// than one use, this map indicates what regularized operand to use.  This
-  /// allows us to avoid legalizing the same thing more than once.
+  /// For nodes that are of legal width, and that have more than one use, this
+  /// map indicates what regularized operand to use.  This allows us to avoid
+  /// legalizing the same thing more than once.
   SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes;
 
-  // Adds a node to the translation cache
+  /// \brief Adds a node to the translation cache.
   void AddLegalizedOperand(SDValue From, SDValue To) {
     LegalizedNodes.insert(std::make_pair(From, To));
     // If someone requests legalization of the new node, return itself.
@@ -50,41 +50,81 @@ class VectorLegalizer {
       LegalizedNodes.insert(std::make_pair(To, To));
   }
 
-  // Legalizes the given node
+  /// \brief Legalizes the given node.
   SDValue LegalizeOp(SDValue Op);
-  // Assuming the node is legal, "legalize" the results
+
+  /// \brief Assuming the node is legal, "legalize" the results.
   SDValue TranslateLegalizeResults(SDValue Op, SDValue Result);
-  // Implements unrolling a VSETCC.
+
+  /// \brief Implements unrolling a VSETCC.
   SDValue UnrollVSETCC(SDValue Op);
-  // Implements expansion for FNEG; falls back to UnrollVectorOp if FSUB
-  // isn't legal.
-  // Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
-  // SINT_TO_FLOAT and SHR on vectors isn't legal.
+
+  /// \brief Implement expand-based legalization of vector operations.
+  ///
+  /// This is just a high-level routine to dispatch to specific code paths for
+  /// operations to legalize them.
+  SDValue Expand(SDValue Op);
+
+  /// \brief Implements expansion for FNEG; falls back to UnrollVectorOp if
+  /// FSUB isn't legal.
+  ///
+  /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
+  /// SINT_TO_FLOAT and SHR on vectors isn't legal.
   SDValue ExpandUINT_TO_FLOAT(SDValue Op);
-  // Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
+
+  /// \brief Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
   SDValue ExpandSEXTINREG(SDValue Op);
-  // Expand bswap of vectors into a shuffle if legal.
+
+  /// \brief Implement expansion for ANY_EXTEND_VECTOR_INREG.
+  ///
+  /// Shuffles the low lanes of the operand into place and bitcasts to the proper
+  /// type. The contents of the bits in the extended part of each element are
+  /// undef.
+  SDValue ExpandANY_EXTEND_VECTOR_INREG(SDValue Op);
+
+  /// \brief Implement expansion for SIGN_EXTEND_VECTOR_INREG.
+  ///
+  /// Shuffles the low lanes of the operand into place, bitcasts to the proper
+  /// type, then shifts left and arithmetic shifts right to introduce a sign
+  /// extension.
+  SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op);
+
+  /// \brief Implement expansion for ZERO_EXTEND_VECTOR_INREG.
+  ///
+  /// Shuffles the low lanes of the operand into place and blends zeros into
+  /// the remaining lanes, finally bitcasting to the proper type.
+  SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op);
+
+  /// \brief Expand bswap of vectors into a shuffle if legal.
   SDValue ExpandBSWAP(SDValue Op);
-  // Implement vselect in terms of XOR, AND, OR when blend is not supported
-  // by the target.
+
+  /// \brief Implement vselect in terms of XOR, AND, OR when blend is not
+  /// supported by the target.
   SDValue ExpandVSELECT(SDValue Op);
   SDValue ExpandSELECT(SDValue Op);
   SDValue ExpandLoad(SDValue Op);
   SDValue ExpandStore(SDValue Op);
   SDValue ExpandFNEG(SDValue Op);
-  // Implements vector promotion; this is essentially just bitcasting the
-  // operands to a different type and bitcasting the result back to the
-  // original type.
-  SDValue PromoteVectorOp(SDValue Op);
-  // Implements [SU]INT_TO_FP vector promotion; this is a [zs]ext of the input
-  // operand to the next size up.
-  SDValue PromoteVectorOpINT_TO_FP(SDValue Op);
-  // Implements FP_TO_[SU]INT vector promotion of the result type; it is
-  // promoted to the next size up integer type.  The result is then truncated
-  // back to the original type.
-  SDValue PromoteVectorOpFP_TO_INT(SDValue Op, bool isSigned);
-
-  public:
+
+  /// \brief Implements vector promotion.
+  ///
+  /// This is essentially just bitcasting the operands to a different type and
+  /// bitcasting the result back to the original type.
+  SDValue Promote(SDValue Op);
+
+  /// \brief Implements [SU]INT_TO_FP vector promotion.
+  ///
+  /// This is a [zs]ext of the input operand to the next size up.
+  SDValue PromoteINT_TO_FP(SDValue Op);
+
+  /// \brief Implements FP_TO_[SU]INT vector promotion of the result type.
+  ///
+  /// It is promoted to the next size up integer type.  The result is then
+  /// truncated back to the original type.
+  SDValue PromoteFP_TO_INT(SDValue Op, bool isSigned);
+
+public:
+  /// \brief Begin legalizer the vector operations in the DAG.
   bool Run();
   VectorLegalizer(SelectionDAG& dag) :
       DAG(dag), TLI(dag.getTargetLoweringInfo()), Changed(false) {}
@@ -254,6 +294,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case ISD::FP_EXTEND:
   case ISD::FMA:
   case ISD::SIGN_EXTEND_INREG:
+  case ISD::ANY_EXTEND_VECTOR_INREG:
+  case ISD::SIGN_EXTEND_VECTOR_INREG:
+  case ISD::ZERO_EXTEND_VECTOR_INREG:
     QueryType = Node->getValueType(0);
     break;
   case ISD::FP_ROUND_INREG:
@@ -267,27 +310,11 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
 
   switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) {
   case TargetLowering::Promote:
-    switch (Op.getOpcode()) {
-    default:
-      // "Promote" the operation by bitcasting
-      Result = PromoteVectorOp(Op);
-      Changed = true;
-      break;
-    case ISD::SINT_TO_FP:
-    case ISD::UINT_TO_FP:
-      // "Promote" the operation by extending the operand.
-      Result = PromoteVectorOpINT_TO_FP(Op);
-      Changed = true;
-      break;
-    case ISD::FP_TO_UINT:
-    case ISD::FP_TO_SINT:
-      // Promote the operation by extending the operand.
-      Result = PromoteVectorOpFP_TO_INT(Op, Op->getOpcode() == ISD::FP_TO_SINT);
-      Changed = true;
-      break;
-    }
+    Result = Promote(Op);
+    Changed = true;
+    break;
+  case TargetLowering::Legal:
     break;
-  case TargetLowering::Legal: break;
   case TargetLowering::Custom: {
     SDValue Tmp1 = TLI.LowerOperation(Op, DAG);
     if (Tmp1.getNode()) {
@@ -297,23 +324,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
     // FALL THROUGH
   }
   case TargetLowering::Expand:
-    if (Node->getOpcode() == ISD::SIGN_EXTEND_INREG)
-      Result = ExpandSEXTINREG(Op);
-    else if (Node->getOpcode() == ISD::BSWAP)
-      Result = ExpandBSWAP(Op);
-    else if (Node->getOpcode() == ISD::VSELECT)
-      Result = ExpandVSELECT(Op);
-    else if (Node->getOpcode() == ISD::SELECT)
-      Result = ExpandSELECT(Op);
-    else if (Node->getOpcode() == ISD::UINT_TO_FP)
-      Result = ExpandUINT_TO_FLOAT(Op);
-    else if (Node->getOpcode() == ISD::FNEG)
-      Result = ExpandFNEG(Op);
-    else if (Node->getOpcode() == ISD::SETCC)
-      Result = UnrollVSETCC(Op);
-    else
-      Result = DAG.UnrollVectorOp(Op.getNode());
-    break;
+    Result = Expand(Op);
   }
 
   // Make sure that the generated code is itself legal.
@@ -328,10 +339,23 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   return Result;
 }
 
-SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) {
-  // Vector "promotion" is basically just bitcasting and doing the operation
-  // in a different type.  For example, x86 promotes ISD::AND on v2i32 to
-  // v1i64.
+SDValue VectorLegalizer::Promote(SDValue Op) {
+  // For a few operations there is a specific concept for promotion based on
+  // the operand's type.
+  switch (Op.getOpcode()) {
+  case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP:
+    // "Promote" the operation by extending the operand.
+    return PromoteINT_TO_FP(Op);
+  case ISD::FP_TO_UINT:
+  case ISD::FP_TO_SINT:
+    // Promote the operation by extending the operand.
+    return PromoteFP_TO_INT(Op, Op->getOpcode() == ISD::FP_TO_SINT);
+  }
+
+  // The rest of the time, vector "promotion" is basically just bitcasting and
+  // doing the operation in a different type.  For example, x86 promotes
+  // ISD::AND on v2i32 to v1i64.
   MVT VT = Op.getSimpleValueType();
   assert(Op.getNode()->getNumValues() == 1 &&
          "Can't promote a vector with multiple results!");
@@ -351,7 +375,7 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) {
   return DAG.getNode(ISD::BITCAST, dl, VT, Op);
 }
 
-SDValue VectorLegalizer::PromoteVectorOpINT_TO_FP(SDValue Op) {
+SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) {
   // INT_TO_FP operations may require the input operand be promoted even
   // when the type is otherwise legal.
   EVT VT = Op.getOperand(0).getValueType();
@@ -387,7 +411,7 @@ SDValue VectorLegalizer::PromoteVectorOpINT_TO_FP(SDValue Op) {
 // elements and then truncate the result.  This is different from the default
 // PromoteVector which uses bitcast to promote thus assumning that the
 // promoted vector type has the same overall size.
-SDValue VectorLegalizer::PromoteVectorOpFP_TO_INT(SDValue Op, bool isSigned) {
+SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op, bool isSigned) {
   assert(Op.getNode()->getNumValues() == 1 &&
          "Can't promote a vector with multiple results!");
   EVT VT = Op.getValueType();
@@ -609,6 +633,33 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) {
   return TF;
 }
 
+SDValue VectorLegalizer::Expand(SDValue Op) {
+  switch (Op->getOpcode()) {
+  case ISD::SIGN_EXTEND_INREG:
+    return ExpandSEXTINREG(Op);
+  case ISD::ANY_EXTEND_VECTOR_INREG:
+    return ExpandANY_EXTEND_VECTOR_INREG(Op);
+  case ISD::SIGN_EXTEND_VECTOR_INREG:
+    return ExpandSIGN_EXTEND_VECTOR_INREG(Op);
+  case ISD::ZERO_EXTEND_VECTOR_INREG:
+    return ExpandZERO_EXTEND_VECTOR_INREG(Op);
+  case ISD::BSWAP:
+    return ExpandBSWAP(Op);
+  case ISD::VSELECT:
+    return ExpandVSELECT(Op);
+  case ISD::SELECT:
+    return ExpandSELECT(Op);
+  case ISD::UINT_TO_FP:
+    return ExpandUINT_TO_FLOAT(Op);
+  case ISD::FNEG:
+    return ExpandFNEG(Op);
+  case ISD::SETCC:
+    return UnrollVSETCC(Op);
+  default:
+    return DAG.UnrollVectorOp(Op.getNode());
+  }
+}
+
 SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
   // Lower a select instruction where the condition is a scalar and the
   // operands are vectors. Lower this select to VSELECT and implement it
@@ -686,6 +737,85 @@ SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) {
   return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz);
 }
 
+// Generically expand a vector anyext in register to a shuffle of the relevant
+// lanes into the appropriate locations, with other lanes left undef.
+SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDValue Op) {
+  SDLoc DL(Op);
+  EVT VT = Op.getValueType();
+  int NumElements = VT.getVectorNumElements();
+  SDValue Src = Op.getOperand(0);
+  EVT SrcVT = Src.getValueType();
+  int NumSrcElements = SrcVT.getVectorNumElements();
+
+  // Build a base mask of undef shuffles.
+  SmallVector<int, 16> ShuffleMask;
+  ShuffleMask.resize(NumSrcElements, -1);
+
+  // Place the extended lanes into the correct locations.
+  int ExtLaneScale = NumSrcElements / NumElements;
+  int EndianOffset = TLI.isBigEndian() ? ExtLaneScale - 1 : 0;
+  for (int i = 0; i < NumElements; ++i)
+    ShuffleMask[i * ExtLaneScale + EndianOffset] = i;
+
+  return DAG.getNode(
+      ISD::BITCAST, DL, VT,
+      DAG.getVectorShuffle(SrcVT, DL, Src, DAG.getUNDEF(SrcVT), ShuffleMask));
+}
+
+SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) {
+  SDLoc DL(Op);
+  EVT VT = Op.getValueType();
+  SDValue Src = Op.getOperand(0);
+  EVT SrcVT = Src.getValueType();
+
+  // First build an any-extend node which can be legalized above when we
+  // recurse through it.
+  Op = DAG.getAnyExtendVectorInReg(Src, DL, VT);
+
+  // Now we need sign extend. Do this by shifting the elements. Even if these
+  // aren't legal operations, they have a better chance of being legalized
+  // without full scalarization than the sign extension does.
+  unsigned EltWidth = VT.getVectorElementType().getSizeInBits();
+  unsigned SrcEltWidth = SrcVT.getVectorElementType().getSizeInBits();
+  SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, VT);
+  return DAG.getNode(ISD::SRA, DL, VT,
+                     DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount),
+                     ShiftAmount);
+}
+
+// Generically expand a vector zext in register to a shuffle of the relevant
+// lanes into the appropriate locations, a blend of zero into the high bits,
+// and a bitcast to the wider element type.
+SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) {
+  SDLoc DL(Op);
+  EVT VT = Op.getValueType();
+  int NumElements = VT.getVectorNumElements();
+  SDValue Src = Op.getOperand(0);
+  EVT SrcVT = Src.getValueType();
+  int NumSrcElements = SrcVT.getVectorNumElements();
+
+  // Build up a zero vector to blend into this one.
+  EVT SrcScalarVT = SrcVT.getScalarType();
+  SDValue ScalarZero = DAG.getTargetConstant(0, SrcScalarVT);
+  SmallVector<SDValue, 4> BuildVectorOperands(NumSrcElements, ScalarZero);
+  SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, DL, SrcVT, BuildVectorOperands);
+
+  // Shuffle the incoming lanes into the correct position, and pull all other
+  // lanes from the zero vector.
+  SmallVector<int, 16> ShuffleMask;
+  ShuffleMask.reserve(NumSrcElements);
+  for (int i = 0; i < NumSrcElements; ++i)
+    ShuffleMask.push_back(i);
+
+  int ExtLaneScale = NumSrcElements / NumElements;
+  int EndianOffset = TLI.isBigEndian() ? ExtLaneScale - 1 : 0;
+  for (int i = 0; i < NumElements; ++i)
+    ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i;
+
+  return DAG.getNode(ISD::BITCAST, DL, VT,
+                     DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask));
+}
+
 SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) {
   EVT VT = Op.getValueType();
 
@@ -729,9 +859,9 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
   // FIXME: Sign extend 1 to all ones if thats legal on the target.
   if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
       TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
-      TLI.getOperationAction(ISD::OR,  VT) == TargetLowering::Expand ||
-      TLI.getBooleanContents(true) !=
-      TargetLowering::ZeroOrNegativeOneBooleanContent)
+      TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
+      TLI.getBooleanContents(Op1.getValueType()) !=
+          TargetLowering::ZeroOrNegativeOneBooleanContent)
     return DAG.UnrollVectorOp(Op.getNode());
 
   // If the mask and the type are different sizes, unroll the vector op. This
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 368eba3..f77c592 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -257,8 +257,26 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) {
 SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) {
   SDValue Cond = GetScalarizedVector(N->getOperand(0));
   SDValue LHS = GetScalarizedVector(N->getOperand(1));
-  TargetLowering::BooleanContent ScalarBool = TLI.getBooleanContents(false);
-  TargetLowering::BooleanContent VecBool = TLI.getBooleanContents(true);
+  TargetLowering::BooleanContent ScalarBool =
+      TLI.getBooleanContents(false, false);
+  TargetLowering::BooleanContent VecBool = TLI.getBooleanContents(true, false);
+
+  // If integer and float booleans have different contents then we can't
+  // reliably optimize in all cases. There is a full explanation for this in
+  // DAGCombiner::visitSELECT() where the same issue affects folding
+  // (select C, 0, 1) to (xor C, 1).
+  if (TLI.getBooleanContents(false, false) !=
+      TLI.getBooleanContents(false, true)) {
+    // At least try the common case where the boolean is generated by a
+    // comparison.
+    if (Cond->getOpcode() == ISD::SETCC) {
+      EVT OpVT = Cond->getOperand(0)->getValueType(0);
+      ScalarBool = TLI.getBooleanContents(OpVT.getScalarType());
+      VecBool = TLI.getBooleanContents(OpVT);
+    } else
+      ScalarBool = TargetLowering::UndefinedBooleanContent;
+  }
+
   if (ScalarBool != VecBool) {
     EVT CondVT = Cond.getValueType();
     switch (ScalarBool) {
@@ -357,7 +375,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) {
   // Vectors may have a different boolean contents to scalars.  Promote the
   // value appropriately.
   ISD::NodeType ExtendCode =
-    TargetLowering::getExtendForContent(TLI.getBooleanContents(true));
+      TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
   return DAG.getNode(ExtendCode, DL, NVT, Res);
 }
 
@@ -545,6 +563,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::BUILD_VECTOR:      SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break;
   case ISD::CONCAT_VECTORS:    SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break;
   case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break;
+  case ISD::INSERT_SUBVECTOR:  SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break;
   case ISD::FP_ROUND_INREG:    SplitVecRes_InregOp(N, Lo, Hi); break;
   case ISD::FPOWI:             SplitVecRes_FPOWI(N, Lo, Hi); break;
   case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
@@ -765,6 +784,43 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,
                                    TLI.getVectorIdxTy()));
 }
 
+void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
+                                                    SDValue &Hi) {
+  SDValue Vec = N->getOperand(0);
+  SDValue SubVec = N->getOperand(1);
+  SDValue Idx = N->getOperand(2);
+  SDLoc dl(N);
+  GetSplitVector(Vec, Lo, Hi);
+
+  // Spill the vector to the stack.
+  EVT VecVT = Vec.getValueType();
+  EVT SubVecVT = VecVT.getVectorElementType();
+  SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
+  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
+                               MachinePointerInfo(), false, false, 0);
+
+  // Store the new subvector into the specified index.
+  SDValue SubVecPtr = GetVectorElementPointer(StackPtr, SubVecVT, Idx);
+  Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
+  unsigned Alignment = TLI.getDataLayout()->getPrefTypeAlignment(VecType);
+  Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, MachinePointerInfo(),
+                       false, false, 0);
+
+  // Load the Lo part from the stack slot.
+  Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
+                   false, false, false, 0);
+
+  // Increment the pointer to the other part.
+  unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8;
+  StackPtr =
+      DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+                  DAG.getConstant(IncrementSize, StackPtr.getValueType()));
+
+  // Load the Hi part from the stack slot.
+  Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
+                   false, false, false, MinAlign(Alignment, IncrementSize));
+}
+
 void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo,
                                          SDValue &Hi) {
   SDLoc dl(N);
@@ -1511,7 +1567,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
 
   case ISD::ADD:
   case ISD::AND:
-  case ISD::BSWAP:
   case ISD::MUL:
   case ISD::MULHS:
   case ISD::MULHU:
@@ -1558,6 +1613,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
     Res = WidenVecRes_Convert(N);
     break;
 
+  case ISD::BSWAP:
   case ISD::CTLZ:
   case ISD::CTPOP:
   case ISD::CTTZ:
@@ -2343,15 +2399,18 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
   case ISD::STORE:              Res = WidenVecOp_STORE(N); break;
   case ISD::SETCC:              Res = WidenVecOp_SETCC(N); break;
 
+  case ISD::ANY_EXTEND:
+  case ISD::SIGN_EXTEND:
+  case ISD::ZERO_EXTEND:
+    Res = WidenVecOp_EXTEND(N);
+    break;
+
   case ISD::FP_EXTEND:
   case ISD::FP_TO_SINT:
   case ISD::FP_TO_UINT:
   case ISD::SINT_TO_FP:
   case ISD::UINT_TO_FP:
   case ISD::TRUNCATE:
-  case ISD::SIGN_EXTEND:
-  case ISD::ZERO_EXTEND:
-  case ISD::ANY_EXTEND:
     Res = WidenVecOp_Convert(N);
     break;
   }
@@ -2372,6 +2431,68 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
   return false;
 }
 
+SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) {
+  SDLoc DL(N);
+  EVT VT = N->getValueType(0);
+
+  SDValue InOp = N->getOperand(0);
+  // If some legalization strategy other than widening is used on the operand,
+  // we can't safely assume that just extending the low lanes is the correct
+  // transformation.
+  if (getTypeAction(InOp.getValueType()) != TargetLowering::TypeWidenVector)
+    return WidenVecOp_Convert(N);
+  InOp = GetWidenedVector(InOp);
+  assert(VT.getVectorNumElements() <
+             InOp.getValueType().getVectorNumElements() &&
+         "Input wasn't widened!");
+
+  // We may need to further widen the operand until it has the same total
+  // vector size as the result.
+  EVT InVT = InOp.getValueType();
+  if (InVT.getSizeInBits() != VT.getSizeInBits()) {
+    EVT InEltVT = InVT.getVectorElementType();
+    for (int i = MVT::FIRST_VECTOR_VALUETYPE, e = MVT::LAST_VECTOR_VALUETYPE; i < e; ++i) {
+      EVT FixedVT = (MVT::SimpleValueType)i;
+      EVT FixedEltVT = FixedVT.getVectorElementType();
+      if (TLI.isTypeLegal(FixedVT) &&
+          FixedVT.getSizeInBits() == VT.getSizeInBits() &&
+          FixedEltVT == InEltVT) {
+        assert(FixedVT.getVectorNumElements() >= VT.getVectorNumElements() &&
+               "Not enough elements in the fixed type for the operand!");
+        assert(FixedVT.getVectorNumElements() != InVT.getVectorNumElements() &&
+               "We can't have the same type as we started with!");
+        if (FixedVT.getVectorNumElements() > InVT.getVectorNumElements())
+          InOp = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, FixedVT,
+                             DAG.getUNDEF(FixedVT), InOp,
+                             DAG.getConstant(0, TLI.getVectorIdxTy()));
+        else
+          InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, FixedVT, InOp,
+                             DAG.getConstant(0, TLI.getVectorIdxTy()));
+        break;
+      }
+    }
+    InVT = InOp.getValueType();
+    if (InVT.getSizeInBits() != VT.getSizeInBits())
+      // We couldn't find a legal vector type that was a widening of the input
+      // and could be extended in-register to the result type, so we have to
+      // scalarize.
+      return WidenVecOp_Convert(N);
+  }
+
+  // Use special DAG nodes to represent the operation of extending the
+  // low lanes.
+  switch (N->getOpcode()) {
+  default:
+    llvm_unreachable("Extend legalization on on extend operation!");
+  case ISD::ANY_EXTEND:
+    return DAG.getAnyExtendVectorInReg(InOp, DL, VT);
+  case ISD::SIGN_EXTEND:
+    return DAG.getSignExtendVectorInReg(InOp, DL, VT);
+  case ISD::ZERO_EXTEND:
+    return DAG.getZeroExtendVectorInReg(InOp, DL, VT);
+  }
+}
+
 SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
   // Since the result is legal and the input is illegal, it is unlikely
   // that we can fix the input to a legal type so unroll the convert
diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index f92230c..624003f 100644
--- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -442,7 +442,7 @@ signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) {
     ResCount -= (regPressureDelta(SU) * ScaleTwo);
   }
 
-  // These are platform specific things.
+  // These are platform-specific things.
   // Will need to go into the back end
   // and accessed from here via a hook.
   for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 78ec4df..13cfae7 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -170,7 +170,8 @@ public:
     if (DisableSchedCycles || !NeedLatency)
       HazardRec = new ScheduleHazardRecognizer();
     else
-      HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
+      HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(
+          tm.getSubtargetImpl(), this);
   }
 
   ~ScheduleDAGRRList() {
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index 51c51d6..4589b0c 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -73,7 +73,8 @@ public:
     : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) {
 
     const TargetMachine &tm = mf.getTarget();
-    HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
+    HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(
+        tm.getSubtargetImpl(), this);
   }
 
   ~ScheduleDAGVLIW() {
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index b1b8035..daff1f2 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -48,6 +48,7 @@
 #include "llvm/Target/TargetSelectionDAGInfo.h"
 #include <algorithm>
 #include <cmath>
+
 using namespace llvm;
 
 /// makeVTList - Return an instance of the SDVTList struct initialized with the
@@ -147,33 +148,34 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) {
 
   if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
 
-  unsigned i = 0, e = N->getNumOperands();
-
-  // Skip over all of the undef values.
-  while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF)
-    ++i;
+  bool IsAllUndef = true;
+  for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) {
+    if (N->getOperand(i).getOpcode() == ISD::UNDEF)
+      continue;
+    IsAllUndef = false;
+    // Do not accept build_vectors that aren't all constants or which have non-0
+    // elements. We have to be a bit careful here, as the type of the constant
+    // may not be the same as the type of the vector elements due to type
+    // legalization (the elements are promoted to a legal type for the target
+    // and a vector of a type may be legal when the base element type is not).
+    // We only want to check enough bits to cover the vector elements, because
+    // we care if the resultant vector is all zeros, not whether the individual
+    // constants are.
+    SDValue Zero = N->getOperand(i);
+    unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits();
+    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Zero)) {
+      if (CN->getAPIntValue().countTrailingZeros() < EltSize)
+        return false;
+    } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(Zero)) {
+      if (CFPN->getValueAPF().bitcastToAPInt().countTrailingZeros() < EltSize)
+        return false;
+    } else
+      return false;
+  }
 
   // Do not accept an all-undef vector.
-  if (i == e) return false;
-
-  // Do not accept build_vectors that aren't all constants or which have non-0
-  // elements.
-  SDValue Zero = N->getOperand(i);
-  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Zero)) {
-    if (!CN->isNullValue())
-      return false;
-  } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(Zero)) {
-    if (!CFPN->getValueAPF().isPosZero())
-      return false;
-  } else
+  if (IsAllUndef)
     return false;
-
-  // Okay, we have at least one 0 value, check to see if the rest match or are
-  // undefs.
-  for (++i; i != e; ++i)
-    if (N->getOperand(i) != Zero &&
-        N->getOperand(i).getOpcode() != ISD::UNDEF)
-      return false;
   return true;
 }
 
@@ -381,6 +383,20 @@ static void AddNodeIDOperands(FoldingSetNodeID &ID,
   }
 }
 
+static void AddBinaryNodeIDCustom(FoldingSetNodeID &ID, bool nuw, bool nsw,
+                                  bool exact) {
+  ID.AddBoolean(nuw);
+  ID.AddBoolean(nsw);
+  ID.AddBoolean(exact);
+}
+
+/// AddBinaryNodeIDCustom - Add BinarySDNodes special infos
+static void AddBinaryNodeIDCustom(FoldingSetNodeID &ID, unsigned Opcode,
+                                  bool nuw, bool nsw, bool exact) {
+  if (isBinOpWithFlags(Opcode))
+    AddBinaryNodeIDCustom(ID, nuw, nsw, exact);
+}
+
 static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned short OpC,
                           SDVTList VTList, ArrayRef<SDValue> OpList) {
   AddNodeIDOpcode(ID, OpC);
@@ -473,7 +489,21 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
     ID.AddInteger(ST->getPointerInfo().getAddrSpace());
     break;
   }
+  case ISD::SDIV:
+  case ISD::UDIV:
+  case ISD::SRA:
+  case ISD::SRL:
+  case ISD::MUL:
+  case ISD::ADD:
+  case ISD::SUB:
+  case ISD::SHL: {
+    const BinaryWithFlagsSDNode *BinNode = cast<BinaryWithFlagsSDNode>(N);
+    AddBinaryNodeIDCustom(ID, N->getOpcode(), BinNode->hasNoUnsignedWrap(),
+                          BinNode->hasNoSignedWrap(), BinNode->isExact());
+    break;
+  }
   case ISD::ATOMIC_CMP_SWAP:
+  case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
   case ISD::ATOMIC_SWAP:
   case ISD::ATOMIC_LOAD_ADD:
   case ISD::ATOMIC_LOAD_SUB:
@@ -527,7 +557,7 @@ static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) {
   // Add the return value info.
   AddNodeIDValueTypes(ID, N->getVTList());
   // Add the operand info.
-  AddNodeIDOperands(ID, makeArrayRef(N->op_begin(), N->op_end()));
+  AddNodeIDOperands(ID, N->ops());
 
   // Handle SDNode leafs with special info.
   AddNodeIDCustom(ID, N);
@@ -926,6 +956,25 @@ void SelectionDAG::allnodes_clear() {
     DeallocateNode(AllNodes.begin());
 }
 
+BinarySDNode *SelectionDAG::GetBinarySDNode(unsigned Opcode, SDLoc DL,
+                                            SDVTList VTs, SDValue N1,
+                                            SDValue N2, bool nuw, bool nsw,
+                                            bool exact) {
+  if (isBinOpWithFlags(Opcode)) {
+    BinaryWithFlagsSDNode *FN = new (NodeAllocator) BinaryWithFlagsSDNode(
+        Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2);
+    FN->setHasNoUnsignedWrap(nuw);
+    FN->setHasNoSignedWrap(nsw);
+    FN->setIsExact(exact);
+
+    return FN;
+  }
+
+  BinarySDNode *N = new (NodeAllocator)
+      BinarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2);
+  return N;
+}
+
 void SelectionDAG::clear() {
   allnodes_clear();
   OperandAllocator.Reset();
@@ -963,11 +1012,12 @@ SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, SDLoc DL, EVT VT) {
     getNode(ISD::TRUNCATE, DL, VT, Op);
 }
 
-SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, SDLoc SL, EVT VT) {
+SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, SDLoc SL, EVT VT,
+                                        EVT OpVT) {
   if (VT.bitsLE(Op.getValueType()))
     return getNode(ISD::TRUNCATE, SL, VT, Op);
 
-  TargetLowering::BooleanContent BType = TLI->getBooleanContents(VT.isVector());
+  TargetLowering::BooleanContent BType = TLI->getBooleanContents(OpVT);
   return getNode(TLI->getExtendForContent(BType), SL, VT, Op);
 }
 
@@ -983,6 +1033,36 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, SDLoc DL, EVT VT) {
                  getConstant(Imm, Op.getValueType()));
 }
 
+SDValue SelectionDAG::getAnyExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) {
+  assert(VT.isVector() && "This DAG node is restricted to vector types.");
+  assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() &&
+         "The sizes of the input and result must match in order to perform the "
+         "extend in-register.");
+  assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() &&
+         "The destination vector type must have fewer lanes than the input.");
+  return getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Op);
+}
+
+SDValue SelectionDAG::getSignExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) {
+  assert(VT.isVector() && "This DAG node is restricted to vector types.");
+  assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() &&
+         "The sizes of the input and result must match in order to perform the "
+         "extend in-register.");
+  assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() &&
+         "The destination vector type must have fewer lanes than the input.");
+  return getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT, Op);
+}
+
+SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) {
+  assert(VT.isVector() && "This DAG node is restricted to vector types.");
+  assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() &&
+         "The sizes of the input and result must match in order to perform the "
+         "extend in-register.");
+  assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() &&
+         "The destination vector type must have fewer lanes than the input.");
+  return getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, Op);
+}
+
 /// getNOT - Create a bitwise NOT operation as (XOR Val, -1).
 ///
 SDValue SelectionDAG::getNOT(SDLoc DL, SDValue Val, EVT VT) {
@@ -995,7 +1075,7 @@ SDValue SelectionDAG::getNOT(SDLoc DL, SDValue Val, EVT VT) {
 SDValue SelectionDAG::getLogicalNOT(SDLoc DL, SDValue Val, EVT VT) {
   EVT EltVT = VT.getScalarType();
   SDValue TrueValue;
-  switch (TLI->getBooleanContents(VT.isVector())) {
+  switch (TLI->getBooleanContents(VT)) {
     case TargetLowering::ZeroOrOneBooleanContent:
     case TargetLowering::UndefinedBooleanContent:
       TrueValue = getConstant(1, VT);
@@ -1190,15 +1270,8 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL,
   if (BitWidth < 64)
     Offset = SignExtend64(Offset, BitWidth);
 
-  const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
-  if (!GVar) {
-    // If GV is an alias then use the aliasee for determining thread-localness.
-    if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
-      GVar = dyn_cast_or_null<GlobalVariable>(GA->getAliasee());
-  }
-
   unsigned Opc;
-  if (GVar && GVar->isThreadLocal())
+  if (GV->isThreadLocal())
     Opc = isTargetGA ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress;
   else
     Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress;
@@ -1454,6 +1527,11 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
     N1 = getUNDEF(VT);
     commuteShuffle(N1, N2, MaskVec);
   }
+  // Reset our undef status after accounting for the mask.
+  N2Undef = N2.getOpcode() == ISD::UNDEF;
+  // Re-check whether both sides ended up undef.
+  if (N1.getOpcode() == ISD::UNDEF && N2Undef)
+    return getUNDEF(VT);
 
   // If Identity shuffle return that node.
   bool Identity = true;
@@ -1464,9 +1542,36 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
     return N1;
 
   // Shuffling a constant splat doesn't change the result.
-  if (N2Undef && N1.getOpcode() == ISD::BUILD_VECTOR)
-    if (cast<BuildVectorSDNode>(N1)->getConstantSplatValue())
-      return N1;
+  if (N2Undef) {
+    SDValue V = N1;
+
+    // Look through any bitcasts. We check that these don't change the number
+    // (and size) of elements and just changes their types.
+    while (V.getOpcode() == ISD::BITCAST)
+      V = V->getOperand(0);
+
+    // A splat should always show up as a build vector node.
+    if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
+      BitVector UndefElements;
+      SDValue Splat = BV->getSplatValue(&UndefElements);
+      // If this is a splat of an undef, shuffling it is also undef.
+      if (Splat && Splat.getOpcode() == ISD::UNDEF)
+        return getUNDEF(VT);
+
+      // We only have a splat which can skip shuffles if there is a splatted
+      // value and no undef lanes rearranged by the shuffle.
+      if (Splat && UndefElements.none()) {
+        // Splat of <x, x, ..., x>, return <x, x, ..., x>, provided that the
+        // number of elements match or the value splatted is a zero constant.
+        if (V.getValueType().getVectorNumElements() ==
+            VT.getVectorNumElements())
+          return N1;
+        if (auto *C = dyn_cast<ConstantSDNode>(Splat))
+          if (C->isNullValue())
+            return N1;
+      }
+    }
+  }
 
   FoldingSetNodeID ID;
   SDValue Ops[2] = { N1, N2 };
@@ -1692,7 +1797,8 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1,
   case ISD::SETTRUE:
   case ISD::SETTRUE2: {
     const TargetLowering *TLI = TM.getTargetLowering();
-    TargetLowering::BooleanContent Cnt = TLI->getBooleanContents(VT.isVector());
+    TargetLowering::BooleanContent Cnt =
+        TLI->getBooleanContents(N1->getValueType(0));
     return getConstant(
         Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, VT);
   }
@@ -1923,11 +2029,20 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
   case ISD::UMULO:
     if (Op.getResNo() != 1)
       break;
-    // The boolean result conforms to getBooleanContents.  Fall through.
+    // The boolean result conforms to getBooleanContents.
+    // If we know the result of a setcc has the top bits zero, use this info.
+    // We know that we have an integer-based boolean since these operations
+    // are only available for integer.
+    if (TLI->getBooleanContents(Op.getValueType().isVector(), false) ==
+            TargetLowering::ZeroOrOneBooleanContent &&
+        BitWidth > 1)
+      KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
+    break;
   case ISD::SETCC:
     // If we know the result of a setcc has the top bits zero, use this info.
-    if (TLI->getBooleanContents(Op.getValueType().isVector()) ==
-        TargetLowering::ZeroOrOneBooleanContent && BitWidth > 1)
+    if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
+            TargetLowering::ZeroOrOneBooleanContent &&
+        BitWidth > 1)
       KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
     break;
   case ISD::SHL:
@@ -2043,7 +2158,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
       unsigned MemBits = VT.getScalarType().getSizeInBits();
       KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
     } else if (const MDNode *Ranges = LD->getRanges()) {
-      computeKnownBitsLoad(*Ranges, KnownZero);
+      computeKnownBitsFromRangeMetadata(*Ranges, KnownZero);
     }
     break;
   }
@@ -2192,8 +2307,11 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
       const APInt &RA = Rem->getAPIntValue();
       if (RA.isPowerOf2()) {
         APInt LowBits = (RA - 1);
-        KnownZero |= ~LowBits;
-        computeKnownBits(Op.getOperand(0), KnownZero, KnownOne,Depth+1);
+        computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth + 1);
+
+        // The upper bits are all zero, the lower ones are unchanged.
+        KnownZero = KnownZero2 | ~LowBits;
+        KnownOne = KnownOne2 & LowBits;
         break;
       }
     }
@@ -2323,9 +2441,16 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
     if (Op.getResNo() != 1)
       break;
     // The boolean result conforms to getBooleanContents.  Fall through.
+    // If setcc returns 0/-1, all bits are sign bits.
+    // We know that we have an integer-based boolean since these operations
+    // are only available for integer.
+    if (TLI->getBooleanContents(Op.getValueType().isVector(), false) ==
+        TargetLowering::ZeroOrNegativeOneBooleanContent)
+      return VTBits;
+    break;
   case ISD::SETCC:
     // If setcc returns 0/-1, all bits are sign bits.
-    if (TLI->getBooleanContents(Op.getValueType().isVector()) ==
+    if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
         TargetLowering::ZeroOrNegativeOneBooleanContent)
       return VTBits;
     break;
@@ -2940,7 +3065,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT,
 }
 
 SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
-                              SDValue N2) {
+                              SDValue N2, bool nuw, bool nsw, bool exact) {
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
   ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
   switch (Opcode) {
@@ -3380,22 +3505,25 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
   }
 
   // Memoize this node if possible.
-  SDNode *N;
+  BinarySDNode *N;
   SDVTList VTs = getVTList(VT);
+  const bool BinOpHasFlags = isBinOpWithFlags(Opcode);
   if (VT != MVT::Glue) {
-    SDValue Ops[] = { N1, N2 };
+    SDValue Ops[] = {N1, N2};
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, Opcode, VTs, Ops);
+    if (BinOpHasFlags)
+      AddBinaryNodeIDCustom(ID, Opcode, nuw, nsw, exact);
     void *IP = nullptr;
     if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
       return SDValue(E, 0);
 
-    N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(),
-                                         DL.getDebugLoc(), VTs, N1, N2);
+    N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, nuw, nsw, exact);
+
     CSEMap.InsertNode(N, IP);
   } else {
-    N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(),
-                                         DL.getDebugLoc(), VTs, N1, N2);
+
+    N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, nuw, nsw, exact);
   }
 
   AllNodes.push_back(N);
@@ -3583,7 +3711,7 @@ static SDValue getMemsetStringVal(EVT VT, SDLoc dl, SelectionDAG &DAG,
   if (Str.empty()) {
     if (VT.isInteger())
       return DAG.getConstant(0, VT);
-    else if (VT == MVT::f32 || VT == MVT::f64)
+    else if (VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128)
       return DAG.getConstantFP(0.0, VT);
     else if (VT.isVector()) {
       unsigned NumElts = VT.getVectorNumElements();
@@ -4110,7 +4238,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst,
     .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMCPY),
                Type::getVoidTy(*getContext()),
                getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY),
-                                 TLI->getPointerTy()), &Args, 0)
+                                 TLI->getPointerTy()), std::move(Args), 0)
     .setDiscardResult();
   std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
 
@@ -4166,7 +4294,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst,
     .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMMOVE),
                Type::getVoidTy(*getContext()),
                getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE),
-                                 TLI->getPointerTy()), &Args, 0)
+                                 TLI->getPointerTy()), std::move(Args), 0)
     .setDiscardResult();
   std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
 
@@ -4230,7 +4358,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst,
     .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET),
                Type::getVoidTy(*getContext()),
                getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET),
-                                 TLI->getPointerTy()), &Args, 0)
+                                 TLI->getPointerTy()), std::move(Args), 0)
     .setDiscardResult();
 
   std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
@@ -4281,51 +4409,47 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
                    Ordering, SynchScope);
 }
 
-SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
-                                SDValue Chain, SDValue Ptr, SDValue Cmp,
-                                SDValue Swp, MachinePointerInfo PtrInfo,
-                                unsigned Alignment,
-                                AtomicOrdering SuccessOrdering,
-                                AtomicOrdering FailureOrdering,
-                                SynchronizationScope SynchScope) {
+SDValue SelectionDAG::getAtomicCmpSwap(
+    unsigned Opcode, SDLoc dl, EVT MemVT, SDVTList VTs, SDValue Chain,
+    SDValue Ptr, SDValue Cmp, SDValue Swp, MachinePointerInfo PtrInfo,
+    unsigned Alignment, AtomicOrdering SuccessOrdering,
+    AtomicOrdering FailureOrdering, SynchronizationScope SynchScope) {
+  assert(Opcode == ISD::ATOMIC_CMP_SWAP ||
+         Opcode == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS);
+  assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types");
+
   if (Alignment == 0)  // Ensure that codegen never sees alignment 0
     Alignment = getEVTAlignment(MemVT);
 
   MachineFunction &MF = getMachineFunction();
 
-  // All atomics are load and store, except for ATMOIC_LOAD and ATOMIC_STORE.
-  // For now, atomics are considered to be volatile always.
   // FIXME: Volatile isn't really correct; we should keep track of atomic
   // orderings in the memoperand.
   unsigned Flags = MachineMemOperand::MOVolatile;
-  if (Opcode != ISD::ATOMIC_STORE)
-    Flags |= MachineMemOperand::MOLoad;
-  if (Opcode != ISD::ATOMIC_LOAD)
-    Flags |= MachineMemOperand::MOStore;
+  Flags |= MachineMemOperand::MOLoad;
+  Flags |= MachineMemOperand::MOStore;
 
   MachineMemOperand *MMO =
     MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment);
 
-  return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Cmp, Swp, MMO,
-                   SuccessOrdering, FailureOrdering, SynchScope);
+  return getAtomicCmpSwap(Opcode, dl, MemVT, VTs, Chain, Ptr, Cmp, Swp, MMO,
+                          SuccessOrdering, FailureOrdering, SynchScope);
 }
 
-SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
-                                SDValue Chain,
-                                SDValue Ptr, SDValue Cmp,
-                                SDValue Swp, MachineMemOperand *MMO,
-                                AtomicOrdering SuccessOrdering,
-                                AtomicOrdering FailureOrdering,
-                                SynchronizationScope SynchScope) {
-  assert(Opcode == ISD::ATOMIC_CMP_SWAP && "Invalid Atomic Op");
+SDValue SelectionDAG::getAtomicCmpSwap(unsigned Opcode, SDLoc dl, EVT MemVT,
+                                       SDVTList VTs, SDValue Chain, SDValue Ptr,
+                                       SDValue Cmp, SDValue Swp,
+                                       MachineMemOperand *MMO,
+                                       AtomicOrdering SuccessOrdering,
+                                       AtomicOrdering FailureOrdering,
+                                       SynchronizationScope SynchScope) {
+  assert(Opcode == ISD::ATOMIC_CMP_SWAP ||
+         Opcode == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS);
   assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types");
 
-  EVT VT = Cmp.getValueType();
-
-  SDVTList VTs = getVTList(VT, MVT::Other);
   SDValue Ops[] = {Chain, Ptr, Cmp, Swp};
-  return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO, SuccessOrdering,
-                   FailureOrdering, SynchScope);
+  return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO,
+                   SuccessOrdering, FailureOrdering, SynchScope);
 }
 
 SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
@@ -5610,10 +5734,13 @@ SelectionDAG::getTargetInsertSubreg(int SRIdx, SDLoc DL, EVT VT,
 /// getNodeIfExists - Get the specified node if it's already available, or
 /// else return NULL.
 SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
-                                      ArrayRef<SDValue> Ops) {
-  if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
+                                      ArrayRef<SDValue> Ops, bool nuw, bool nsw,
+                                      bool exact) {
+  if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) {
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, Opcode, VTList, Ops);
+    if (isBinOpWithFlags(Opcode))
+      AddBinaryNodeIDCustom(ID, nuw, nsw, exact);
     void *IP = nullptr;
     if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
       return E;
@@ -5960,7 +6087,7 @@ unsigned SelectionDAG::AssignTopologicalOrder() {
   // count of outstanding operands.
   for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) {
     SDNode *N = I++;
-    checkForCycles(N);
+    checkForCycles(N, this);
     unsigned Degree = N->getNumOperands();
     if (Degree == 0) {
       // A node with no uses, add it to the result array immediately.
@@ -5980,7 +6107,7 @@ unsigned SelectionDAG::AssignTopologicalOrder() {
   // such that by the time the end is reached all nodes will be sorted.
   for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ++I) {
     SDNode *N = I;
-    checkForCycles(N);
+    checkForCycles(N, this);
     // N is in sorted position, so all its uses have one less operand
     // that needs to be sorted.
     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
@@ -6005,7 +6132,9 @@ unsigned SelectionDAG::AssignTopologicalOrder() {
 #ifndef NDEBUG
       SDNode *S = ++I;
       dbgs() << "Overran sorted position:\n";
-      S->dumprFull();
+      S->dumprFull(this); dbgs() << "\n";
+      dbgs() << "Checking if this is due to cycles\n";
+      checkForCycles(this, true);
 #endif
       llvm_unreachable(nullptr);
     }
@@ -6554,16 +6683,43 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
   return true;
 }
 
-ConstantSDNode *BuildVectorSDNode::getConstantSplatValue() const {
-  SDValue Op0 = getOperand(0);
-  if (Op0.getOpcode() != ISD::Constant)
-    return nullptr;
+SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const {
+  if (UndefElements) {
+    UndefElements->clear();
+    UndefElements->resize(getNumOperands());
+  }
+  SDValue Splatted;
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    SDValue Op = getOperand(i);
+    if (Op.getOpcode() == ISD::UNDEF) {
+      if (UndefElements)
+        (*UndefElements)[i] = true;
+    } else if (!Splatted) {
+      Splatted = Op;
+    } else if (Splatted != Op) {
+      return SDValue();
+    }
+  }
+
+  if (!Splatted) {
+    assert(getOperand(0).getOpcode() == ISD::UNDEF &&
+           "Can only have a splat without a constant for all undefs.");
+    return getOperand(0);
+  }
 
-  for (unsigned i = 1, e = getNumOperands(); i != e; ++i)
-    if (getOperand(i) != Op0)
-      return nullptr;
+  return Splatted;
+}
 
-  return cast<ConstantSDNode>(Op0);
+ConstantSDNode *
+BuildVectorSDNode::getConstantSplatNode(BitVector *UndefElements) const {
+  return dyn_cast_or_null<ConstantSDNode>(
+      getSplatValue(UndefElements).getNode());
+}
+
+ConstantFPSDNode *
+BuildVectorSDNode::getConstantFPSplatNode(BitVector *UndefElements) const {
+  return dyn_cast_or_null<ConstantFPSDNode>(
+      getSplatValue(UndefElements).getNode());
 }
 
 bool BuildVectorSDNode::isConstant() const {
@@ -6591,10 +6747,11 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) {
   return true;
 }
 
-#ifdef XDEBUG
+#ifndef NDEBUG
 static void checkForCyclesHelper(const SDNode *N,
                                  SmallPtrSet<const SDNode*, 32> &Visited,
-                                 SmallPtrSet<const SDNode*, 32> &Checked) {
+                                 SmallPtrSet<const SDNode*, 32> &Checked,
+                                 const llvm::SelectionDAG *DAG) {
   // If this node has already been checked, don't check it again.
   if (Checked.count(N))
     return;
@@ -6602,29 +6759,37 @@ static void checkForCyclesHelper(const SDNode *N,
   // If a node has already been visited on this depth-first walk, reject it as
   // a cycle.
   if (!Visited.insert(N)) {
-    dbgs() << "Offending node:\n";
-    N->dumprFull();
     errs() << "Detected cycle in SelectionDAG\n";
+    dbgs() << "Offending node:\n";
+    N->dumprFull(DAG); dbgs() << "\n";
     abort();
   }
 
   for(unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
-    checkForCyclesHelper(N->getOperand(i).getNode(), Visited, Checked);
+    checkForCyclesHelper(N->getOperand(i).getNode(), Visited, Checked, DAG);
 
   Checked.insert(N);
   Visited.erase(N);
 }
 #endif
 
-void llvm::checkForCycles(const llvm::SDNode *N) {
+void llvm::checkForCycles(const llvm::SDNode *N,
+                          const llvm::SelectionDAG *DAG,
+                          bool force) {
+#ifndef NDEBUG
+  bool check = force;
 #ifdef XDEBUG
-  assert(N && "Checking nonexistent SDNode");
-  SmallPtrSet<const SDNode*, 32> visited;
-  SmallPtrSet<const SDNode*, 32> checked;
-  checkForCyclesHelper(N, visited, checked);
-#endif
+  check = true;
+#endif  // XDEBUG
+  if (check) {
+    assert(N && "Checking nonexistent SDNode");
+    SmallPtrSet<const SDNode*, 32> visited;
+    SmallPtrSet<const SDNode*, 32> checked;
+    checkForCyclesHelper(N, visited, checked, DAG);
+  }
+#endif  // !NDEBUG
 }
 
-void llvm::checkForCycles(const llvm::SelectionDAG *DAG) {
-  checkForCycles(DAG->getRoot().getNode());
+void llvm::checkForCycles(const llvm::SelectionDAG *DAG, bool force) {
+  checkForCycles(DAG->getRoot().getNode(), DAG, force);
 }
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 070e929..28d8e98 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -169,7 +169,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL,
       SDValue Lo, Hi;
       Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]);
       Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]);
-      if (TLI.isBigEndian())
+      if (TLI.hasBigEndianPartOrdering(ValueVT))
         std::swap(Lo, Hi);
       Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi);
     } else {
@@ -2784,8 +2784,22 @@ void SelectionDAGBuilder::visitFSub(const User &I) {
 void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
   SDValue Op1 = getValue(I.getOperand(0));
   SDValue Op2 = getValue(I.getOperand(1));
-  setValue(&I, DAG.getNode(OpCode, getCurSDLoc(),
-                           Op1.getValueType(), Op1, Op2));
+
+  bool nuw = false;
+  bool nsw = false;
+  bool exact = false;
+  if (const OverflowingBinaryOperator *OFBinOp =
+          dyn_cast<const OverflowingBinaryOperator>(&I)) {
+    nuw = OFBinOp->hasNoUnsignedWrap();
+    nsw = OFBinOp->hasNoSignedWrap();
+  }
+  if (const PossiblyExactOperator *ExactOp =
+          dyn_cast<const PossiblyExactOperator>(&I))
+    exact = ExactOp->isExact();
+
+  SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(),
+                                     Op1, Op2, nuw, nsw, exact);
+  setValue(&I, BinNodeValue);
 }
 
 void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
@@ -2816,8 +2830,25 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
       Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32);
   }
 
-  setValue(&I, DAG.getNode(Opcode, getCurSDLoc(),
-                           Op1.getValueType(), Op1, Op2));
+  bool nuw = false;
+  bool nsw = false;
+  bool exact = false;
+
+  if (Opcode == ISD::SRL || Opcode == ISD::SRA || Opcode == ISD::SHL) {
+
+    if (const OverflowingBinaryOperator *OFBinOp =
+            dyn_cast<const OverflowingBinaryOperator>(&I)) {
+      nuw = OFBinOp->hasNoUnsignedWrap();
+      nsw = OFBinOp->hasNoSignedWrap();
+    }
+    if (const PossiblyExactOperator *ExactOp =
+            dyn_cast<const PossiblyExactOperator>(&I))
+      exact = ExactOp->isExact();
+  }
+
+  SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2,
+                            nuw, nsw, exact);
+  setValue(&I, Res);
 }
 
 void SelectionDAGBuilder::visitSDiv(const User &I) {
@@ -3570,12 +3601,12 @@ static SDValue InsertFenceForAtomic(SDValue Chain, AtomicOrdering Order,
   if (Before) {
     if (Order == AcquireRelease || Order == SequentiallyConsistent)
       Order = Release;
-    else if (Order == Acquire || Order == Monotonic)
+    else if (Order == Acquire || Order == Monotonic || Order == Unordered)
       return Chain;
   } else {
     if (Order == AcquireRelease)
       Order = Acquire;
-    else if (Order == Release || Order == Monotonic)
+    else if (Order == Release || Order == Monotonic || Order == Unordered)
       return Chain;
   }
   SDValue Ops[3];
@@ -3598,19 +3629,17 @@ void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
     InChain = InsertFenceForAtomic(InChain, SuccessOrder, Scope, true, dl,
                                    DAG, *TLI);
 
-  SDValue L =
-    DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl,
-                  getValue(I.getCompareOperand()).getSimpleValueType(),
-                  InChain,
-                  getValue(I.getPointerOperand()),
-                  getValue(I.getCompareOperand()),
-                  getValue(I.getNewValOperand()),
-                  MachinePointerInfo(I.getPointerOperand()), 0 /* Alignment */,
-                  TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder,
-                  TLI->getInsertFencesForAtomic() ? Monotonic : FailureOrder,
-                  Scope);
+  MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType();
+  SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other);
+  SDValue L = DAG.getAtomicCmpSwap(
+      ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, MemVT, VTs, InChain,
+      getValue(I.getPointerOperand()), getValue(I.getCompareOperand()),
+      getValue(I.getNewValOperand()), MachinePointerInfo(I.getPointerOperand()),
+      0 /* Alignment */,
+      TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder,
+      TLI->getInsertFencesForAtomic() ? Monotonic : FailureOrder, Scope);
 
-  SDValue OutChain = L.getValue(1);
+  SDValue OutChain = L.getValue(2);
 
   if (TLI->getInsertFencesForAtomic())
     OutChain = InsertFenceForAtomic(OutChain, SuccessOrder, Scope, false, dl,
@@ -5293,7 +5322,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     CLI.setDebugLoc(sdl).setChain(getRoot())
       .setCallee(CallingConv::C, I.getType(),
                  DAG.getExternalSymbol(TrapFuncName.data(), TLI->getPointerTy()),
-                 &Args, 0);
+                 std::move(Args), 0);
 
     std::pair<SDValue, SDValue> Result = TLI->LowerCallTo(CLI);
     DAG.setRoot(Result.second);
@@ -5410,6 +5439,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
 void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
                                       bool isTailCall,
                                       MachineBasicBlock *LandingPad) {
+  const TargetLowering *TLI = TM.getTargetLowering();
   PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
   FunctionType *FTy = cast<FunctionType>(PT->getElementType());
   Type *RetTy = FTy->getReturnType();
@@ -5420,45 +5450,6 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
   TargetLowering::ArgListEntry Entry;
   Args.reserve(CS.arg_size());
 
-  // Check whether the function can return without sret-demotion.
-  SmallVector<ISD::OutputArg, 4> Outs;
-  const TargetLowering *TLI = TM.getTargetLowering();
-  GetReturnInfo(RetTy, CS.getAttributes(), Outs, *TLI);
-
-  bool CanLowerReturn = TLI->CanLowerReturn(CS.getCallingConv(),
-                                            DAG.getMachineFunction(),
-                                            FTy->isVarArg(), Outs,
-                                            FTy->getContext());
-
-  SDValue DemoteStackSlot;
-  int DemoteStackIdx = -100;
-
-  if (!CanLowerReturn) {
-    assert(!CS.hasInAllocaArgument() &&
-           "sret demotion is incompatible with inalloca");
-    uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(
-                      FTy->getReturnType());
-    unsigned Align  = TLI->getDataLayout()->getPrefTypeAlignment(
-                      FTy->getReturnType());
-    MachineFunction &MF = DAG.getMachineFunction();
-    DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
-    Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType());
-
-    DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI->getPointerTy());
-    Entry.Node = DemoteStackSlot;
-    Entry.Ty = StackSlotPtrType;
-    Entry.isSExt = false;
-    Entry.isZExt = false;
-    Entry.isInReg = false;
-    Entry.isSRet = true;
-    Entry.isNest = false;
-    Entry.isByVal = false;
-    Entry.isReturned = false;
-    Entry.Alignment = Align;
-    Args.push_back(Entry);
-    RetTy = Type::getVoidTy(FTy->getContext());
-  }
-
   for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
        i != e; ++i) {
     const Value *V = *i;
@@ -5499,58 +5490,20 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
 
   // Check if target-independent constraints permit a tail call here.
   // Target-dependent constraints are checked within TLI->LowerCallTo.
-  if (isTailCall && !isInTailCallPosition(CS, *TLI))
+  if (isTailCall && !isInTailCallPosition(CS, DAG))
     isTailCall = false;
 
   TargetLowering::CallLoweringInfo CLI(DAG);
   CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot())
-    .setCallee(RetTy, FTy, Callee, &Args, CS).setTailCall(isTailCall);
+    .setCallee(RetTy, FTy, Callee, std::move(Args), CS).setTailCall(isTailCall);
 
   std::pair<SDValue,SDValue> Result = TLI->LowerCallTo(CLI);
   assert((isTailCall || Result.second.getNode()) &&
          "Non-null chain expected with non-tail call!");
   assert((Result.second.getNode() || !Result.first.getNode()) &&
          "Null value expected with tail call!");
-  if (Result.first.getNode()) {
+  if (Result.first.getNode())
     setValue(CS.getInstruction(), Result.first);
-  } else if (!CanLowerReturn && Result.second.getNode()) {
-    // The instruction result is the result of loading from the
-    // hidden sret parameter.
-    SmallVector<EVT, 1> PVTs;
-    Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType());
-
-    ComputeValueVTs(*TLI, PtrRetTy, PVTs);
-    assert(PVTs.size() == 1 && "Pointers should fit in one register");
-    EVT PtrVT = PVTs[0];
-
-    SmallVector<EVT, 4> RetTys;
-    SmallVector<uint64_t, 4> Offsets;
-    RetTy = FTy->getReturnType();
-    ComputeValueVTs(*TLI, RetTy, RetTys, &Offsets);
-
-    unsigned NumValues = RetTys.size();
-    SmallVector<SDValue, 4> Values(NumValues);
-    SmallVector<SDValue, 4> Chains(NumValues);
-
-    for (unsigned i = 0; i < NumValues; ++i) {
-      SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), PtrVT,
-                                DemoteStackSlot,
-                                DAG.getConstant(Offsets[i], PtrVT));
-      SDValue L = DAG.getLoad(RetTys[i], getCurSDLoc(), Result.second, Add,
-                  MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]),
-                              false, false, false, 1);
-      Values[i] = L;
-      Chains[i] = L.getValue(1);
-    }
-
-    SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(),
-                                MVT::Other, Chains);
-    PendingLoads.push_back(Chain);
-
-    setValue(CS.getInstruction(),
-             DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
-                         DAG.getVTList(RetTys), Values));
-  }
 
   if (!Result.second.getNode()) {
     // As a special case, a null chain means that a tail call has been emitted
@@ -6845,7 +6798,7 @@ SelectionDAGBuilder::LowerCallOperands(const CallInst &CI, unsigned ArgIdx,
   Type *retTy = useVoidTy ? Type::getVoidTy(*DAG.getContext()) : CI.getType();
   TargetLowering::CallLoweringInfo CLI(DAG);
   CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot())
-    .setCallee(CI.getCallingConv(), retTy, Callee, &Args, NumArgs)
+    .setCallee(CI.getCallingConv(), retTy, Callee, std::move(Args), NumArgs)
     .setDiscardResult(!CI.use_empty());
 
   const TargetLowering *TLI = TM.getTargetLowering();
@@ -7092,6 +7045,21 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) {
   FuncInfo.MF->getFrameInfo()->setHasPatchPoint();
 }
 
+/// Returns an AttributeSet representing the attributes applied to the return
+/// value of the given call.
+static AttributeSet getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) {
+  SmallVector<Attribute::AttrKind, 2> Attrs;
+  if (CLI.RetSExt)
+    Attrs.push_back(Attribute::SExt);
+  if (CLI.RetZExt)
+    Attrs.push_back(Attribute::ZExt);
+  if (CLI.IsInReg)
+    Attrs.push_back(Attribute::InReg);
+
+  return AttributeSet::get(CLI.RetTy->getContext(), AttributeSet::ReturnIndex,
+                           Attrs);
+}
+
 /// TargetLowering::LowerCallTo - This is the default LowerCallTo
 /// implementation, which just calls LowerCall.
 /// FIXME: When all targets are
@@ -7100,24 +7068,62 @@ std::pair<SDValue, SDValue>
 TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
   // Handle the incoming return values from the call.
   CLI.Ins.clear();
+  Type *OrigRetTy = CLI.RetTy;
   SmallVector<EVT, 4> RetTys;
-  ComputeValueVTs(*this, CLI.RetTy, RetTys);
-  for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
-    EVT VT = RetTys[I];
-    MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
-    unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
-    for (unsigned i = 0; i != NumRegs; ++i) {
-      ISD::InputArg MyFlags;
-      MyFlags.VT = RegisterVT;
-      MyFlags.ArgVT = VT;
-      MyFlags.Used = CLI.IsReturnValueUsed;
-      if (CLI.RetSExt)
-        MyFlags.Flags.setSExt();
-      if (CLI.RetZExt)
-        MyFlags.Flags.setZExt();
-      if (CLI.IsInReg)
-        MyFlags.Flags.setInReg();
-      CLI.Ins.push_back(MyFlags);
+  SmallVector<uint64_t, 4> Offsets;
+  ComputeValueVTs(*this, CLI.RetTy, RetTys, &Offsets);
+
+  SmallVector<ISD::OutputArg, 4> Outs;
+  GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, *this);
+
+  bool CanLowerReturn =
+      this->CanLowerReturn(CLI.CallConv, CLI.DAG.getMachineFunction(),
+                           CLI.IsVarArg, Outs, CLI.RetTy->getContext());
+
+  SDValue DemoteStackSlot;
+  int DemoteStackIdx = -100;
+  if (!CanLowerReturn) {
+    // FIXME: equivalent assert?
+    // assert(!CS.hasInAllocaArgument() &&
+    //        "sret demotion is incompatible with inalloca");
+    uint64_t TySize = getDataLayout()->getTypeAllocSize(CLI.RetTy);
+    unsigned Align  = getDataLayout()->getPrefTypeAlignment(CLI.RetTy);
+    MachineFunction &MF = CLI.DAG.getMachineFunction();
+    DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
+    Type *StackSlotPtrType = PointerType::getUnqual(CLI.RetTy);
+
+    DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getPointerTy());
+    ArgListEntry Entry;
+    Entry.Node = DemoteStackSlot;
+    Entry.Ty = StackSlotPtrType;
+    Entry.isSExt = false;
+    Entry.isZExt = false;
+    Entry.isInReg = false;
+    Entry.isSRet = true;
+    Entry.isNest = false;
+    Entry.isByVal = false;
+    Entry.isReturned = false;
+    Entry.Alignment = Align;
+    CLI.getArgs().insert(CLI.getArgs().begin(), Entry);
+    CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext());
+  } else {
+    for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+      EVT VT = RetTys[I];
+      MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
+      unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
+      for (unsigned i = 0; i != NumRegs; ++i) {
+        ISD::InputArg MyFlags;
+        MyFlags.VT = RegisterVT;
+        MyFlags.ArgVT = VT;
+        MyFlags.Used = CLI.IsReturnValueUsed;
+        if (CLI.RetSExt)
+          MyFlags.Flags.setSExt();
+        if (CLI.RetZExt)
+          MyFlags.Flags.setZExt();
+        if (CLI.IsInReg)
+          MyFlags.Flags.setInReg();
+        CLI.Ins.push_back(MyFlags);
+      }
     }
   }
 
@@ -7260,31 +7266,59 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
                  "LowerCall emitted a value with the wrong type!");
         });
 
-  // Collect the legal value parts into potentially illegal values
-  // that correspond to the original function's return values.
-  ISD::NodeType AssertOp = ISD::DELETED_NODE;
-  if (CLI.RetSExt)
-    AssertOp = ISD::AssertSext;
-  else if (CLI.RetZExt)
-    AssertOp = ISD::AssertZext;
   SmallVector<SDValue, 4> ReturnValues;
-  unsigned CurReg = 0;
-  for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
-    EVT VT = RetTys[I];
-    MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
-    unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
-
-    ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],
-                                            NumRegs, RegisterVT, VT, nullptr,
-                                            AssertOp));
-    CurReg += NumRegs;
-  }
-
-  // For a function returning void, there is no return value. We can't create
-  // such a node, so we just return a null return value in that case. In
-  // that case, nothing will actually look at the value.
-  if (ReturnValues.empty())
-    return std::make_pair(SDValue(), CLI.Chain);
+  if (!CanLowerReturn) {
+    // The instruction result is the result of loading from the
+    // hidden sret parameter.
+    SmallVector<EVT, 1> PVTs;
+    Type *PtrRetTy = PointerType::getUnqual(OrigRetTy);
+
+    ComputeValueVTs(*this, PtrRetTy, PVTs);
+    assert(PVTs.size() == 1 && "Pointers should fit in one register");
+    EVT PtrVT = PVTs[0];
+
+    unsigned NumValues = RetTys.size();
+    ReturnValues.resize(NumValues);
+    SmallVector<SDValue, 4> Chains(NumValues);
+
+    for (unsigned i = 0; i < NumValues; ++i) {
+      SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot,
+                                    CLI.DAG.getConstant(Offsets[i], PtrVT));
+      SDValue L = CLI.DAG.getLoad(
+          RetTys[i], CLI.DL, CLI.Chain, Add,
+          MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), false,
+          false, false, 1);
+      ReturnValues[i] = L;
+      Chains[i] = L.getValue(1);
+    }
+
+    CLI.Chain = CLI.DAG.getNode(ISD::TokenFactor, CLI.DL, MVT::Other, Chains);
+  } else {
+    // Collect the legal value parts into potentially illegal values
+    // that correspond to the original function's return values.
+    ISD::NodeType AssertOp = ISD::DELETED_NODE;
+    if (CLI.RetSExt)
+      AssertOp = ISD::AssertSext;
+    else if (CLI.RetZExt)
+      AssertOp = ISD::AssertZext;
+    unsigned CurReg = 0;
+    for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+      EVT VT = RetTys[I];
+      MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
+      unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
+
+      ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],
+                                              NumRegs, RegisterVT, VT, nullptr,
+                                              AssertOp));
+      CurReg += NumRegs;
+    }
+
+    // For a function returning void, there is no return value. We can't create
+    // such a node, so we just return a null return value in that case. In
+    // that case, nothing will actually look at the value.
+    if (ReturnValues.empty())
+      return std::make_pair(SDValue(), CLI.Chain);
+  }
 
   SDValue Res = CLI.DAG.getNode(ISD::MERGE_VALUES, CLI.DL,
                                 CLI.DAG.getVTList(RetTys), ReturnValues);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index fb29691..84679f9 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -320,7 +320,7 @@ private:
   ///   1. Preserve the architecture independence of stack protector generation.
   ///
   ///   2. Preserve the normal IR level stack protector check for platforms like
-  ///      OpenBSD for which we support platform specific stack protector
+  ///      OpenBSD for which we support platform-specific stack protector
   ///      generation.
   ///
   /// The main problem that guided the present solution is that one can not
@@ -338,7 +338,7 @@ private:
   ///      basic block (where the return inst is placed) and then move it back
   ///      later at SelectionDAG/MI time before the stack protector check if the
   ///      tail call optimization failed. The MI level option was nixed
-  ///      immediately since it would require platform specific pattern
+  ///      immediately since it would require platform-specific pattern
   ///      matching. The SelectionDAG level option was nixed because
   ///      SelectionDAG only processes one IR level basic block at a time
   ///      implying one could not create a DAG Combine to move the callinst.
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index d6b5255..b3a452f 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -55,6 +55,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::PREFETCH:                   return "Prefetch";
   case ISD::ATOMIC_FENCE:               return "AtomicFence";
   case ISD::ATOMIC_CMP_SWAP:            return "AtomicCmpSwap";
+  case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: return "AtomicCmpSwapWithSuccess";
   case ISD::ATOMIC_SWAP:                return "AtomicSwap";
   case ISD::ATOMIC_LOAD_ADD:            return "AtomicLoadAdd";
   case ISD::ATOMIC_LOAD_SUB:            return "AtomicLoadSub";
@@ -220,6 +221,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::ZERO_EXTEND:                return "zero_extend";
   case ISD::ANY_EXTEND:                 return "any_extend";
   case ISD::SIGN_EXTEND_INREG:          return "sign_extend_inreg";
+  case ISD::ANY_EXTEND_VECTOR_INREG:    return "any_extend_vector_inreg";
+  case ISD::SIGN_EXTEND_VECTOR_INREG:   return "sign_extend_vector_inreg";
+  case ISD::ZERO_EXTEND_VECTOR_INREG:   return "zero_extend_vector_inreg";
   case ISD::TRUNCATE:                   return "truncate";
   case ISD::FP_ROUND:                   return "fp_round";
   case ISD::FLT_ROUNDS_:                return "flt_rounds";
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 472fc9c..57e22e2 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -141,6 +141,25 @@ STATISTIC(NumFastIselFailShuffleVector,"Fast isel fails on ShuffleVector");
 STATISTIC(NumFastIselFailExtractValue,"Fast isel fails on ExtractValue");
 STATISTIC(NumFastIselFailInsertValue,"Fast isel fails on InsertValue");
 STATISTIC(NumFastIselFailLandingPad,"Fast isel fails on LandingPad");
+
+// Intrinsic instructions...
+STATISTIC(NumFastIselFailIntrinsicCall, "Fast isel fails on Intrinsic call");
+STATISTIC(NumFastIselFailSAddWithOverflow,
+          "Fast isel fails on sadd.with.overflow");
+STATISTIC(NumFastIselFailUAddWithOverflow,
+          "Fast isel fails on uadd.with.overflow");
+STATISTIC(NumFastIselFailSSubWithOverflow,
+          "Fast isel fails on ssub.with.overflow");
+STATISTIC(NumFastIselFailUSubWithOverflow,
+          "Fast isel fails on usub.with.overflow");
+STATISTIC(NumFastIselFailSMulWithOverflow,
+          "Fast isel fails on smul.with.overflow");
+STATISTIC(NumFastIselFailUMulWithOverflow,
+          "Fast isel fails on umul.with.overflow");
+STATISTIC(NumFastIselFailFrameaddress, "Fast isel fails on Frameaddress");
+STATISTIC(NumFastIselFailSqrt, "Fast isel fails on sqrt call");
+STATISTIC(NumFastIselFailStackMap, "Fast isel fails on StackMap call");
+STATISTIC(NumFastIselFailPatchPoint, "Fast isel fails on PatchPoint call");
 #endif
 
 static cl::opt<bool>
@@ -974,7 +993,37 @@ static void collectFailStats(const Instruction *I) {
   case Instruction::FCmp:           NumFastIselFailFCmp++; return;
   case Instruction::PHI:            NumFastIselFailPHI++; return;
   case Instruction::Select:         NumFastIselFailSelect++; return;
-  case Instruction::Call:           NumFastIselFailCall++; return;
+  case Instruction::Call: {
+    if (auto const *Intrinsic = dyn_cast<IntrinsicInst>(I)) {
+      switch (Intrinsic->getIntrinsicID()) {
+      default:
+        NumFastIselFailIntrinsicCall++; return;
+      case Intrinsic::sadd_with_overflow:
+        NumFastIselFailSAddWithOverflow++; return;
+      case Intrinsic::uadd_with_overflow:
+        NumFastIselFailUAddWithOverflow++; return;
+      case Intrinsic::ssub_with_overflow:
+        NumFastIselFailSSubWithOverflow++; return;
+      case Intrinsic::usub_with_overflow:
+        NumFastIselFailUSubWithOverflow++; return;
+      case Intrinsic::smul_with_overflow:
+        NumFastIselFailSMulWithOverflow++; return;
+      case Intrinsic::umul_with_overflow:
+        NumFastIselFailUMulWithOverflow++; return;
+      case Intrinsic::frameaddress:
+        NumFastIselFailFrameaddress++; return;
+      case Intrinsic::sqrt:
+          NumFastIselFailSqrt++; return;
+      case Intrinsic::experimental_stackmap:
+        NumFastIselFailStackMap++; return;
+      case Intrinsic::experimental_patchpoint_void: // fall-through
+      case Intrinsic::experimental_patchpoint_i64:
+        NumFastIselFailPatchPoint++; return;
+      }
+    }
+    NumFastIselFailCall++;
+    return;
+  }
   case Instruction::Shl:            NumFastIselFailShl++; return;
   case Instruction::LShr:           NumFastIselFailLShr++; return;
   case Instruction::AShr:           NumFastIselFailAShr++; return;
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index b75d805..42372a2 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -105,7 +105,7 @@ TargetLowering::makeLibCall(SelectionDAG &DAG,
   Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
   TargetLowering::CallLoweringInfo CLI(DAG);
   CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
-    .setCallee(getLibcallCallingConv(LC), RetTy, Callee, &Args, 0)
+    .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0)
     .setNoReturn(doesNotReturn).setDiscardResult(!isReturnValueUsed)
     .setSExtResult(isSigned).setZExtResult(!isSigned);
   return LowerCallTo(CLI);
@@ -327,6 +327,10 @@ TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op,
   assert(Op.getNode()->getNumValues() == 1 &&
          "ShrinkDemandedOp only supports nodes with one result!");
 
+  // Early return, as this function cannot handle vector types.
+  if (Op.getValueType().isVector())
+    return false;
+
   // Don't do this if the node has another user, which may require the
   // full value.
   if (!Op.getNode()->hasOneUse())
@@ -1146,18 +1150,21 @@ bool TargetLowering::isConstTrueVal(const SDNode *N) const {
   if (!N)
     return false;
 
-  bool IsVec = false;
   const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
   if (!CN) {
     const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
     if (!BV)
       return false;
 
-    IsVec = true;
-    CN = BV->getConstantSplatValue();
+    BitVector UndefElements;
+    CN = BV->getConstantSplatNode(&UndefElements);
+    // Only interested in constant splats, and we don't try to handle undef
+    // elements in identifying boolean constants.
+    if (!CN || UndefElements.none())
+      return false;
   }
 
-  switch (getBooleanContents(IsVec)) {
+  switch (getBooleanContents(N->getValueType(0))) {
   case UndefinedBooleanContent:
     return CN->getAPIntValue()[0];
   case ZeroOrOneBooleanContent:
@@ -1173,18 +1180,21 @@ bool TargetLowering::isConstFalseVal(const SDNode *N) const {
   if (!N)
     return false;
 
-  bool IsVec = false;
   const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
   if (!CN) {
     const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
     if (!BV)
       return false;
 
-    IsVec = true;
-    CN = BV->getConstantSplatValue();
+    BitVector UndefElements;
+    CN = BV->getConstantSplatNode(&UndefElements);
+    // Only interested in constant splats, and we don't try to handle undef
+    // elements in identifying boolean constants.
+    if (!CN || UndefElements.none())
+      return false;
   }
 
-  if (getBooleanContents(IsVec) == UndefinedBooleanContent)
+  if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
     return !CN->getAPIntValue()[0];
 
   return CN->isNullValue();
@@ -1205,7 +1215,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
   case ISD::SETFALSE2: return DAG.getConstant(0, VT);
   case ISD::SETTRUE:
   case ISD::SETTRUE2: {
-    TargetLowering::BooleanContent Cnt = getBooleanContents(VT.isVector());
+    TargetLowering::BooleanContent Cnt =
+        getBooleanContents(N0->getValueType(0));
     return DAG.getConstant(
         Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, VT);
   }
@@ -1412,7 +1423,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
 
           SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
                                           NewConst, Cond);
-          return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT);
+          return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
         }
         break;
       }
@@ -1496,7 +1507,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         }
       } else if (N1C->getAPIntValue() == 1 &&
                  (VT == MVT::i1 ||
-                  getBooleanContents(false) == ZeroOrOneBooleanContent)) {
+                  getBooleanContents(N0->getValueType(0)) ==
+                      ZeroOrOneBooleanContent)) {
         SDValue Op0 = N0;
         if (Op0.getOpcode() == ISD::TRUNCATE)
           Op0 = Op0.getOperand(0);
@@ -1767,7 +1779,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
     // The sext(setcc()) => setcc() optimization relies on the appropriate
     // constant being emitted.
     uint64_t EqVal = 0;
-    switch (getBooleanContents(N0.getValueType().isVector())) {
+    switch (getBooleanContents(N0.getValueType())) {
     case UndefinedBooleanContent:
     case ZeroOrOneBooleanContent:
       EqVal = ISD::isTrueWhenEqual(Cond);
@@ -2613,7 +2625,8 @@ SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, SDLoc dl,
   if (ShAmt) {
     // TODO: For UDIV use SRL instead of SRA.
     SDValue Amt = DAG.getConstant(ShAmt, getShiftAmountTy(Op1.getValueType()));
-    Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt);
+    Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt, false, false,
+                      true);
     d = d.ashr(ShAmt);
   }
 
diff --git a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
index 1120be8..0e89bad 100644
--- a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
@@ -15,8 +15,8 @@
 #include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
-TargetSelectionDAGInfo::TargetSelectionDAGInfo(const TargetMachine &TM)
-  : DL(TM.getDataLayout()) {
+TargetSelectionDAGInfo::TargetSelectionDAGInfo(const DataLayout *DL)
+  : DL(DL) {
 }
 
 TargetSelectionDAGInfo::~TargetSelectionDAGInfo() {
diff --git a/lib/CodeGen/StackMapLivenessAnalysis.cpp b/lib/CodeGen/StackMapLivenessAnalysis.cpp
index 4dd87dd..3ba502f 100644
--- a/lib/CodeGen/StackMapLivenessAnalysis.cpp
+++ b/lib/CodeGen/StackMapLivenessAnalysis.cpp
@@ -28,10 +28,9 @@ using namespace llvm;
 #define DEBUG_TYPE "stackmaps"
 
 namespace llvm {
-cl::opt<bool> EnableStackMapLiveness("enable-stackmap-liveness",
-  cl::Hidden, cl::desc("Enable StackMap Liveness Analysis Pass"));
 cl::opt<bool> EnablePatchPointLiveness("enable-patchpoint-liveness",
-  cl::Hidden, cl::desc("Enable PatchPoint Liveness Analysis Pass"));
+  cl::Hidden, cl::init(true),
+  cl::desc("Enable PatchPoint Liveness Analysis Pass"));
 }
 
 STATISTIC(NumStackMapFuncVisited, "Number of functions visited");
@@ -62,15 +61,17 @@ void StackMapLiveness::getAnalysisUsage(AnalysisUsage &AU) const {
 
 /// Calculate the liveness information for the given machine function.
 bool StackMapLiveness::runOnMachineFunction(MachineFunction &_MF) {
+  if (!EnablePatchPointLiveness)
+    return false;
+
   DEBUG(dbgs() << "********** COMPUTING STACKMAP LIVENESS: "
                << _MF.getName() << " **********\n");
   MF = &_MF;
   TRI = MF->getTarget().getRegisterInfo();
   ++NumStackMapFuncVisited;
 
-  // Skip this function if there are no stackmaps or patchpoints to process.
-  if (!((MF->getFrameInfo()->hasStackMap() && EnableStackMapLiveness) ||
-        (MF->getFrameInfo()->hasPatchPoint() && EnablePatchPointLiveness))) {
+  // Skip this function if there are no patchpoints to process.
+  if (!MF->getFrameInfo()->hasPatchPoint()) {
     ++NumStackMapFuncSkipped;
     return false;
   }
@@ -88,13 +89,10 @@ bool StackMapLiveness::calculateLiveness() {
     LiveRegs.addLiveOuts(MBBI);
     bool HasStackMap = false;
     // Reverse iterate over all instructions and add the current live register
-    // set to an instruction if we encounter a stackmap or patchpoint
-    // instruction.
+    // set to an instruction if we encounter a patchpoint instruction.
     for (MachineBasicBlock::reverse_iterator I = MBBI->rbegin(),
          E = MBBI->rend(); I != E; ++I) {
-      int Opc = I->getOpcode();
-      if ((EnableStackMapLiveness && (Opc == TargetOpcode::STACKMAP)) ||
-          (EnablePatchPointLiveness && (Opc == TargetOpcode::PATCHPOINT))) {
+      if (I->getOpcode() == TargetOpcode::PATCHPOINT) {
         addLiveOutSetToMI(*I);
         HasChanged = true;
         HasStackMap = true;
diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp
index c3f84c6..83966bd0 100644
--- a/lib/CodeGen/TargetInstrInfo.cpp
+++ b/lib/CodeGen/TargetInstrInfo.cpp
@@ -671,7 +671,7 @@ bool TargetInstrInfo::usePreRAHazardRecognizer() const {
 
 // Default implementation of CreateTargetRAHazardRecognizer.
 ScheduleHazardRecognizer *TargetInstrInfo::
-CreateTargetHazardRecognizer(const TargetMachine *TM,
+CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
                              const ScheduleDAG *DAG) const {
   // Dummy hazard recognizer allows all instructions to issue.
   return new ScheduleHazardRecognizer();
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index 2634d71..c574fd4 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -39,7 +39,7 @@ using namespace llvm;
 
 /// InitLibcallNames - Set default libcall names.
 ///
-static void InitLibcallNames(const char **Names, const TargetMachine &TM) {
+static void InitLibcallNames(const char **Names, const Triple &TT) {
   Names[RTLIB::SHL_I16] = "__ashlhi3";
   Names[RTLIB::SHL_I32] = "__ashlsi3";
   Names[RTLIB::SHL_I64] = "__ashldi3";
@@ -384,7 +384,7 @@ static void InitLibcallNames(const char **Names, const TargetMachine &TM) {
   Names[RTLIB::SYNC_FETCH_AND_UMIN_8] = "__sync_fetch_and_umin_8";
   Names[RTLIB::SYNC_FETCH_AND_UMIN_16] = "__sync_fetch_and_umin_16";
   
-  if (Triple(TM.getTargetTriple()).getEnvironment() == Triple::GNU) {
+  if (TT.getEnvironment() == Triple::GNU) {
     Names[RTLIB::SINCOS_F32] = "sincosf";
     Names[RTLIB::SINCOS_F64] = "sincos";
     Names[RTLIB::SINCOS_F80] = "sincosl";
@@ -399,7 +399,7 @@ static void InitLibcallNames(const char **Names, const TargetMachine &TM) {
     Names[RTLIB::SINCOS_PPCF128] = nullptr;
   }
 
-  if (Triple(TM.getTargetTriple()).getOS() != Triple::OpenBSD) {
+  if (TT.getOS() != Triple::OpenBSD) {
     Names[RTLIB::STACKPROTECTOR_CHECK_FAIL] = "__stack_chk_fail";
   } else {
     // These are generally not available.
@@ -690,6 +690,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm,
   ExceptionPointerRegister = 0;
   ExceptionSelectorRegister = 0;
   BooleanContents = UndefinedBooleanContent;
+  BooleanFloatContents = UndefinedBooleanContent;
   BooleanVectorContents = UndefinedBooleanContent;
   SchedPreferenceInfo = Sched::ILP;
   JumpBufSize = 0;
@@ -702,7 +703,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm,
   SupportJumpTables = true;
   MinimumJumpTableEntries = 4;
 
-  InitLibcallNames(LibcallRoutineNames, TM);
+  InitLibcallNames(LibcallRoutineNames, Triple(TM.getTargetTriple()));
   InitCmpLibcallCCs(CmpLibcallCCs);
   InitLibcallCallingConvs(LibcallCallingConvs);
 }
@@ -730,6 +731,10 @@ void TargetLoweringBase::initActions() {
       setIndexedStoreAction(IM, (MVT::SimpleValueType)VT, Expand);
     }
 
+    // Most backends expect to see the node which just returns the value loaded.
+    setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS,
+                       (MVT::SimpleValueType)VT, Expand);
+
     // These operations default to expand.
     setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand);
     setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand);
@@ -739,8 +744,15 @@ void TargetLoweringBase::initActions() {
 
     // These operations default to expand for vector types.
     if (VT >= MVT::FIRST_VECTOR_VALUETYPE &&
-        VT <= MVT::LAST_VECTOR_VALUETYPE)
+        VT <= MVT::LAST_VECTOR_VALUETYPE) {
       setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand);
+      setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG,
+                         (MVT::SimpleValueType)VT, Expand);
+      setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG,
+                         (MVT::SimpleValueType)VT, Expand);
+      setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG,
+                         (MVT::SimpleValueType)VT, Expand);
+    }
   }
 
   // Most targets ignore the @llvm.prefetch intrinsic.
@@ -1080,24 +1092,25 @@ void TargetLoweringBase::computeRegisterProperties() {
   // Loop over all of the vector value types to see which need transformations.
   for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE;
        i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
-    MVT VT = (MVT::SimpleValueType)i;
-    if (isTypeLegal(VT)) continue;
+    MVT VT = (MVT::SimpleValueType) i;
+    if (isTypeLegal(VT))
+      continue;
 
-    // Determine if there is a legal wider type.  If so, we should promote to
-    // that wider vector type.
     MVT EltVT = VT.getVectorElementType();
     unsigned NElts = VT.getVectorNumElements();
-    if (NElts != 1 && !shouldSplitVectorType(VT)) {
-      bool IsLegalWiderType = false;
-      // First try to promote the elements of integer vectors. If no legal
-      // promotion was found, fallback to the widen-vector method.
-      for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
-        MVT SVT = (MVT::SimpleValueType)nVT;
+    bool IsLegalWiderType = false;
+    LegalizeTypeAction PreferredAction = getPreferredVectorAction(VT);
+    switch (PreferredAction) {
+    case TypePromoteInteger: {
+      // Try to promote the elements of integer vectors. If no legal
+      // promotion was found, fall through to the widen-vector method.
+      for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+        MVT SVT = (MVT::SimpleValueType) nVT;
         // Promote vectors of integers to vectors with the same number
         // of elements, with a wider element type.
         if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits()
-            && SVT.getVectorNumElements() == NElts &&
-            isTypeLegal(SVT) && SVT.getScalarType().isInteger()) {
+            && SVT.getVectorNumElements() == NElts && isTypeLegal(SVT)
+            && SVT.getScalarType().isInteger()) {
           TransformToType[i] = SVT;
           RegisterTypeForVT[i] = SVT;
           NumRegistersForVT[i] = 1;
@@ -1106,15 +1119,15 @@ void TargetLoweringBase::computeRegisterProperties() {
           break;
         }
       }
-
-      if (IsLegalWiderType) continue;
-
+      if (IsLegalWiderType)
+        break;
+    }
+    case TypeWidenVector: {
       // Try to widen the vector.
-      for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
-        MVT SVT = (MVT::SimpleValueType)nVT;
-        if (SVT.getVectorElementType() == EltVT &&
-            SVT.getVectorNumElements() > NElts &&
-            isTypeLegal(SVT)) {
+      for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+        MVT SVT = (MVT::SimpleValueType) nVT;
+        if (SVT.getVectorElementType() == EltVT
+            && SVT.getVectorNumElements() > NElts && isTypeLegal(SVT)) {
           TransformToType[i] = SVT;
           RegisterTypeForVT[i] = SVT;
           NumRegistersForVT[i] = 1;
@@ -1123,27 +1136,34 @@ void TargetLoweringBase::computeRegisterProperties() {
           break;
         }
       }
-      if (IsLegalWiderType) continue;
+      if (IsLegalWiderType)
+        break;
     }
-
-    MVT IntermediateVT;
-    MVT RegisterVT;
-    unsigned NumIntermediates;
-    NumRegistersForVT[i] =
-      getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates,
-                                RegisterVT, this);
-    RegisterTypeForVT[i] = RegisterVT;
-
-    MVT NVT = VT.getPow2VectorType();
-    if (NVT == VT) {
-      // Type is already a power of 2.  The default action is to split.
-      TransformToType[i] = MVT::Other;
-      unsigned NumElts = VT.getVectorNumElements();
-      ValueTypeActions.setTypeAction(VT,
-            NumElts > 1 ? TypeSplitVector : TypeScalarizeVector);
-    } else {
-      TransformToType[i] = NVT;
-      ValueTypeActions.setTypeAction(VT, TypeWidenVector);
+    case TypeSplitVector:
+    case TypeScalarizeVector: {
+      MVT IntermediateVT;
+      MVT RegisterVT;
+      unsigned NumIntermediates;
+      NumRegistersForVT[i] = getVectorTypeBreakdownMVT(VT, IntermediateVT,
+          NumIntermediates, RegisterVT, this);
+      RegisterTypeForVT[i] = RegisterVT;
+
+      MVT NVT = VT.getPow2VectorType();
+      if (NVT == VT) {
+        // Type is already a power of 2.  The default action is to split.
+        TransformToType[i] = MVT::Other;
+        if (PreferredAction == TypeScalarizeVector)
+          ValueTypeActions.setTypeAction(VT, TypeScalarizeVector);
+        else
+          ValueTypeActions.setTypeAction(VT, TypeSplitVector);
+      } else {
+        TransformToType[i] = NVT;
+        ValueTypeActions.setTypeAction(VT, TypeWidenVector);
+      }
+      break;
+    }
+    default:
+      llvm_unreachable("Unknown vector legalization action!");
     }
   }
 
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index dda2259..03f4a51 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -48,16 +48,12 @@ MCSymbol *TargetLoweringObjectFileELF::getCFIPersonalitySymbol(
     const GlobalValue *GV, Mangler &Mang, const TargetMachine &TM,
     MachineModuleInfo *MMI) const {
   unsigned Encoding = getPersonalityEncoding();
-  switch (Encoding & 0x70) {
-  default:
-    report_fatal_error("We do not support this DWARF encoding yet!");
-  case dwarf::DW_EH_PE_absptr:
-    return TM.getSymbol(GV, Mang);
-  case dwarf::DW_EH_PE_pcrel: {
+  if ((Encoding & 0x80) == dwarf::DW_EH_PE_indirect)
     return getContext().GetOrCreateSymbol(StringRef("DW.ref.") +
                                           TM.getSymbol(GV, Mang)->getName());
-  }
-  }
+  if ((Encoding & 0x70) == dwarf::DW_EH_PE_absptr)
+    return TM.getSymbol(GV, Mang);
+  report_fatal_error("We do not support this DWARF encoding yet!");
 }
 
 void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer,
@@ -196,6 +192,18 @@ getELFSectionFlags(SectionKind K) {
   return Flags;
 }
 
+static const Comdat *getELFComdat(const GlobalValue *GV) {
+  const Comdat *C = GV->getComdat();
+  if (!C)
+    return nullptr;
+
+  if (C->getSelectionKind() != Comdat::Any)
+    report_fatal_error("ELF COMDATs only support SelectionKind::Any, '" +
+                       C->getName() + "' cannot be lowered.");
+
+  return C;
+}
+
 const MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
     const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
     const TargetMachine &TM) const {
@@ -204,14 +212,20 @@ const MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
   // Infer section flags from the section name if we can.
   Kind = getELFKindForNamedSection(SectionName, Kind);
 
+  StringRef Group = "";
+  unsigned Flags = getELFSectionFlags(Kind);
+  if (const Comdat *C = getELFComdat(GV)) {
+    Group = C->getName();
+    Flags |= ELF::SHF_GROUP;
+  }
   return getContext().getELFSection(SectionName,
-                                    getELFSectionType(SectionName, Kind),
-                                    getELFSectionFlags(Kind), Kind);
+                                    getELFSectionType(SectionName, Kind), Flags,
+                                    Kind, /*EntrySize=*/0, Group);
 }
 
 /// getSectionPrefixForGlobal - Return the section prefix name used by options
 /// FunctionsSections and DataSections.
-static const char *getSectionPrefixForGlobal(SectionKind Kind) {
+static StringRef getSectionPrefixForGlobal(SectionKind Kind) {
   if (Kind.isText())                 return ".text.";
   if (Kind.isReadOnly())             return ".rodata.";
   if (Kind.isBSS())                  return ".bss.";
@@ -228,7 +242,6 @@ static const char *getSectionPrefixForGlobal(SectionKind Kind) {
   return ".data.rel.ro.";
 }
 
-
 const MCSection *TargetLoweringObjectFileELF::
 SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
                        Mangler &Mang, const TargetMachine &TM) const {
@@ -242,18 +255,20 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
 
   // If this global is linkonce/weak and the target handles this by emitting it
   // into a 'uniqued' section name, create and return the section now.
-  if ((GV->isWeakForLinker() || EmitUniquedSection) &&
+  if ((GV->isWeakForLinker() || EmitUniquedSection || GV->hasComdat()) &&
       !Kind.isCommon()) {
-    const char *Prefix;
-    Prefix = getSectionPrefixForGlobal(Kind);
+    StringRef Prefix = getSectionPrefixForGlobal(Kind);
 
-    SmallString<128> Name(Prefix, Prefix+strlen(Prefix));
+    SmallString<128> Name(Prefix);
     TM.getNameWithPrefix(Name, GV, Mang, true);
 
     StringRef Group = "";
     unsigned Flags = getELFSectionFlags(Kind);
-    if (GV->isWeakForLinker()) {
-      Group = Name.substr(strlen(Prefix));
+    if (GV->isWeakForLinker() || GV->hasComdat()) {
+      if (const Comdat *C = getELFComdat(GV))
+        Group = C->getName();
+      else
+        Group = Name.substr(Prefix.size());
       Flags |= ELF::SHF_GROUP;
     }
 
@@ -340,7 +355,7 @@ getSectionForConstant(SectionKind Kind) const {
 }
 
 const MCSection *TargetLoweringObjectFileELF::getStaticCtorSection(
-    unsigned Priority, const MCSymbol *KeySym, const MCSection *KeySec) const {
+    unsigned Priority, const MCSymbol *KeySym) const {
   // The default scheme is .ctor / .dtor, so we have to invert the priority
   // numbering.
   if (Priority == 65535)
@@ -360,7 +375,7 @@ const MCSection *TargetLoweringObjectFileELF::getStaticCtorSection(
 }
 
 const MCSection *TargetLoweringObjectFileELF::getStaticDtorSection(
-    unsigned Priority, const MCSymbol *KeySym, const MCSection *KeySec) const {
+    unsigned Priority, const MCSymbol *KeySym) const {
   // The default scheme is .ctor / .dtor, so we have to invert the priority
   // numbering.
   if (Priority == 65535)
@@ -487,6 +502,15 @@ emitModuleFlags(MCStreamer &Streamer,
   Streamer.AddBlankLine();
 }
 
+static void checkMachOComdat(const GlobalValue *GV) {
+  const Comdat *C = GV->getComdat();
+  if (!C)
+    return;
+
+  report_fatal_error("MachO doesn't support COMDATs, '" + C->getName() +
+                     "' cannot be lowered.");
+}
+
 const MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal(
     const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
     const TargetMachine &TM) const {
@@ -494,6 +518,9 @@ const MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal(
   StringRef Segment, Section;
   unsigned TAA = 0, StubSize = 0;
   bool TAAParsed;
+
+  checkMachOComdat(GV);
+
   std::string ErrorCode =
     MCSectionMachO::ParseSectionSpecifier(GV->getSection(), Segment, Section,
                                           TAA, TAAParsed, StubSize);
@@ -564,6 +591,7 @@ bool TargetLoweringObjectFileMachO::isSectionAtomizableBySymbols(
 const MCSection *TargetLoweringObjectFileMachO::
 SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
                        Mangler &Mang, const TargetMachine &TM) const {
+  checkMachOComdat(GV);
 
   // Handle thread local data.
   if (Kind.isThreadBSS()) return TLSBSSSection;
@@ -732,6 +760,50 @@ getCOFFSectionFlags(SectionKind K) {
   return Flags;
 }
 
+static const GlobalValue *getComdatGVForCOFF(const GlobalValue *GV) {
+  const Comdat *C = GV->getComdat();
+  assert(C && "expected GV to have a Comdat!");
+
+  StringRef ComdatGVName = C->getName();
+  const GlobalValue *ComdatGV = GV->getParent()->getNamedValue(ComdatGVName);
+  if (!ComdatGV)
+    report_fatal_error("Associative COMDAT symbol '" + ComdatGVName +
+                       "' does not exist.");
+
+  if (ComdatGV->getComdat() != C)
+    report_fatal_error("Associative COMDAT symbol '" + ComdatGVName +
+                       "' is not a key for it's COMDAT.");
+
+  return ComdatGV;
+}
+
+static int getSelectionForCOFF(const GlobalValue *GV) {
+  if (const Comdat *C = GV->getComdat()) {
+    const GlobalValue *ComdatKey = getComdatGVForCOFF(GV);
+    if (const auto *GA = dyn_cast<GlobalAlias>(ComdatKey))
+      ComdatKey = GA->getBaseObject();
+    if (ComdatKey == GV) {
+      switch (C->getSelectionKind()) {
+      case Comdat::Any:
+        return COFF::IMAGE_COMDAT_SELECT_ANY;
+      case Comdat::ExactMatch:
+        return COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH;
+      case Comdat::Largest:
+        return COFF::IMAGE_COMDAT_SELECT_LARGEST;
+      case Comdat::NoDuplicates:
+        return COFF::IMAGE_COMDAT_SELECT_NODUPLICATES;
+      case Comdat::SameSize:
+        return COFF::IMAGE_COMDAT_SELECT_SAME_SIZE;
+      }
+    } else {
+      return COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE;
+    }
+  } else if (GV->isWeakForLinker()) {
+    return COFF::IMAGE_COMDAT_SELECT_ANY;
+  }
+  return 0;
+}
+
 const MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal(
     const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
     const TargetMachine &TM) const {
@@ -739,11 +811,21 @@ const MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal(
   unsigned Characteristics = getCOFFSectionFlags(Kind);
   StringRef Name = GV->getSection();
   StringRef COMDATSymName = "";
-  if (GV->isWeakForLinker()) {
-    Selection = COFF::IMAGE_COMDAT_SELECT_ANY;
-    Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
-    MCSymbol *Sym = TM.getSymbol(GV, Mang);
-    COMDATSymName = Sym->getName();
+  if ((GV->isWeakForLinker() || GV->hasComdat()) && !Kind.isCommon()) {
+    Selection = getSelectionForCOFF(GV);
+    const GlobalValue *ComdatGV;
+    if (Selection == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE)
+      ComdatGV = getComdatGVForCOFF(GV);
+    else
+      ComdatGV = GV;
+
+    if (!ComdatGV->hasPrivateLinkage()) {
+      MCSymbol *Sym = TM.getSymbol(ComdatGV, Mang);
+      COMDATSymName = Sym->getName();
+      Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
+    } else {
+      Selection = 0;
+    }
   }
   return getContext().getCOFFSection(Name,
                                      Characteristics,
@@ -780,17 +862,27 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
   // into a 'uniqued' section name, create and return the section now.
   // Section names depend on the name of the symbol which is not feasible if the
   // symbol has private linkage.
-  if ((GV->isWeakForLinker() || EmitUniquedSection) &&
-      !GV->hasPrivateLinkage() && !Kind.isCommon()) {
+  if ((GV->isWeakForLinker() || EmitUniquedSection || GV->hasComdat()) &&
+      !Kind.isCommon()) {
     const char *Name = getCOFFSectionNameForUniqueGlobal(Kind);
     unsigned Characteristics = getCOFFSectionFlags(Kind);
 
     Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
-    MCSymbol *Sym = TM.getSymbol(GV, Mang);
-    return getContext().getCOFFSection(
-        Name, Characteristics, Kind, Sym->getName(),
-        GV->isWeakForLinker() ? COFF::IMAGE_COMDAT_SELECT_ANY
-                              : COFF::IMAGE_COMDAT_SELECT_NODUPLICATES);
+    int Selection = getSelectionForCOFF(GV);
+    if (!Selection)
+      Selection = COFF::IMAGE_COMDAT_SELECT_NODUPLICATES;
+    const GlobalValue *ComdatGV;
+    if (GV->hasComdat())
+      ComdatGV = getComdatGVForCOFF(GV);
+    else
+      ComdatGV = GV;
+
+    if (!ComdatGV->hasPrivateLinkage()) {
+      MCSymbol *Sym = TM.getSymbol(ComdatGV, Mang);
+      StringRef COMDATSymName = Sym->getName();
+      return getContext().getCOFFSection(Name, Characteristics, Kind,
+                                         COMDATSymName, Selection);
+    }
   }
 
   if (Kind.isText())
@@ -868,8 +960,7 @@ emitModuleFlags(MCStreamer &Streamer,
 
 static const MCSection *getAssociativeCOFFSection(MCContext &Ctx,
                                                   const MCSection *Sec,
-                                                  const MCSymbol *KeySym,
-                                                  const MCSection *KeySec) {
+                                                  const MCSymbol *KeySym) {
   // Return the normal section if we don't have to be associative.
   if (!KeySym)
     return Sec;
@@ -877,20 +968,19 @@ static const MCSection *getAssociativeCOFFSection(MCContext &Ctx,
   // Make an associative section with the same name and kind as the normal
   // section.
   const MCSectionCOFF *SecCOFF = cast<MCSectionCOFF>(Sec);
-  const MCSectionCOFF *KeySecCOFF = cast<MCSectionCOFF>(KeySec);
   unsigned Characteristics =
       SecCOFF->getCharacteristics() | COFF::IMAGE_SCN_LNK_COMDAT;
   return Ctx.getCOFFSection(SecCOFF->getSectionName(), Characteristics,
                             SecCOFF->getKind(), KeySym->getName(),
-                            COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE, KeySecCOFF);
+                            COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE);
 }
 
 const MCSection *TargetLoweringObjectFileCOFF::getStaticCtorSection(
-    unsigned Priority, const MCSymbol *KeySym, const MCSection *KeySec) const {
-  return getAssociativeCOFFSection(getContext(), StaticCtorSection, KeySym, KeySec);
+    unsigned Priority, const MCSymbol *KeySym) const {
+  return getAssociativeCOFFSection(getContext(), StaticCtorSection, KeySym);
 }
 
 const MCSection *TargetLoweringObjectFileCOFF::getStaticDtorSection(
-    unsigned Priority, const MCSymbol *KeySym, const MCSection *KeySec) const {
-  return getAssociativeCOFFSection(getContext(), StaticDtorSection, KeySym, KeySec);
+    unsigned Priority, const MCSymbol *KeySym) const {
+  return getAssociativeCOFFSection(getContext(), StaticDtorSection, KeySym);
 }