90 files changed, 3595 insertions, 4423 deletions
diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp
index 92eaf9e..387f1f6 100644
--- a/lib/Target/ARM/A15SDOptimizer.cpp
+++ b/lib/Target/ARM/A15SDOptimizer.cpp
@@ -34,6 +34,8 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <map>
 #include <set>
 
 using namespace llvm;
@@ -676,8 +678,8 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) {
 }
 
 bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) {
-  TII = static_cast<const ARMBaseInstrInfo*>(Fn.getTarget().getInstrInfo());
-  TRI = Fn.getTarget().getRegisterInfo();
+  TII = static_cast<const ARMBaseInstrInfo *>(Fn.getSubtarget().getInstrInfo());
+  TRI = Fn.getSubtarget().getRegisterInfo();
   MRI = &Fn.getRegInfo();
   bool Modified = false;
 
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index 55df29c..02db53a 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef TARGET_ARM_H
-#define TARGET_ARM_H
+#ifndef LLVM_LIB_TARGET_ARM_ARM_H
+#define LLVM_LIB_TARGET_ARM_ARM_H
 
 #include "llvm/Support/CodeGen.h"
 
@@ -23,7 +23,6 @@ class ARMAsmPrinter;
 class ARMBaseTargetMachine;
 class FunctionPass;
 class ImmutablePass;
-class JITCodeEmitter;
 class MachineInstr;
 class MCInst;
 class TargetLowering;
@@ -31,10 +30,6 @@ class TargetMachine;
 
 FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
                                CodeGenOpt::Level OptLevel);
-
-FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
-                                          JITCodeEmitter &JCE);
-
 FunctionPass *createA15SDOptimizerPass();
 FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
 FunctionPass *createARMExpandPseudoPass();
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 25385a6..80b976b 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -228,6 +228,15 @@ def ProcA15      : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15",
                                     FeatureAvoidPartialCPSR,
                                     FeatureTrustZone, FeatureVirtualization]>;
 
+def ProcA17     : SubtargetFeature<"a17", "ARMProcFamily", "CortexA17",
+                                   "Cortex-A17 ARM processors",
+                                   [FeatureVMLxForwarding,
+                                    FeatureT2XtPk, FeatureVFP4,
+                                    FeatureHWDiv, FeatureHWDivARM,
+                                    FeatureAvoidPartialCPSR,
+                                    FeatureVirtualization,
+                                    FeatureTrustZone]>;
+
 def ProcA53     : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",
                                    "Cortex-A53 ARM processors",
                                    [FeatureHWDiv, FeatureHWDivARM,
@@ -351,12 +360,8 @@ def : ProcessorModel<"cortex-a8",   CortexA8Model,
                                      FeatureAClass]>;
 def : ProcessorModel<"cortex-a9",   CortexA9Model,
                                     [ProcA9, HasV7Ops, FeatureNEON, FeatureDB,
-                                     FeatureDSPThumb2, FeatureHasRAS,
+                                     FeatureDSPThumb2, FeatureHasRAS, FeatureMP,
                                      FeatureAClass]>;
-def : ProcessorModel<"cortex-a9-mp", CortexA9Model,
-                                    [ProcA9, HasV7Ops, FeatureNEON, FeatureDB,
-                                     FeatureDSPThumb2, FeatureMP,
-                                     FeatureHasRAS, FeatureAClass]>;
 
 // FIXME: A12 has currently the same Schedule model as A9
 def : ProcessorModel<"cortex-a12", CortexA9Model,
@@ -370,6 +375,12 @@ def : ProcessorModel<"cortex-a15",   CortexA9Model,
                                      FeatureDSPThumb2, FeatureHasRAS,
                                      FeatureAClass]>;
 
+// FIXME: A17 has currently the same Schedule model as A9
+def : ProcessorModel<"cortex-a17",  CortexA9Model,
+                                    [ProcA17, HasV7Ops, FeatureNEON, FeatureDB,
+                                     FeatureDSPThumb2, FeatureMP,
+                                     FeatureHasRAS, FeatureAClass]>;
+
 // FIXME: krait has currently the same Schedule model as A9
 def : ProcessorModel<"krait",       CortexA9Model,
                                     [ProcKrait, HasV7Ops,
@@ -396,6 +407,12 @@ def : ProcNoItin<"cortex-m4",       [HasV7Ops,
                                      FeatureT2XtPk, FeatureVFP4,
                                      FeatureVFPOnlySP, FeatureD16,
                                      FeatureMClass]>;
+def : ProcNoItin<"cortex-m7",       [HasV7Ops,
+                                     FeatureThumb2, FeatureNoARM, FeatureDB,
+                                     FeatureHWDiv, FeatureDSPThumb2,
+                                     FeatureT2XtPk, FeatureFPARMv8,
+                                     FeatureD16, FeatureMClass]>;
+
 
 // Swift uArch Processors.
 def : ProcessorModel<"swift",       SwiftModel,
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 28d2610..695fd4d 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -76,7 +76,8 @@ void ARMAsmPrinter::EmitFunctionEntryLabel() {
 }
 
 void ARMAsmPrinter::EmitXXStructor(const Constant *CV) {
-  uint64_t Size = TM.getDataLayout()->getTypeAllocSize(CV->getType());
+  uint64_t Size =
+      TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(CV->getType());
   assert(Size && "C++ constructor pointer had zero size!");
 
   const GlobalValue *GV = dyn_cast<GlobalValue>(CV->stripPointerCasts());
@@ -136,7 +137,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
     assert(!MO.getSubReg() && "Subregs should be eliminated!");
     if(ARM::GPRPairRegClass.contains(Reg)) {
       const MachineFunction &MF = *MI->getParent()->getParent();
-      const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+      const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
       Reg = TRI->getSubReg(Reg, ARM::gsub_0);
     }
     O << ARMInstPrinter::getRegisterName(Reg);
@@ -182,7 +183,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
 
 MCSymbol *ARMAsmPrinter::
 GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const {
-  const DataLayout *DL = TM.getDataLayout();
+  const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
   SmallString<60> Name;
   raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "JTI"
     << getFunctionNumber() << '_' << uid << '_' << uid2;
@@ -191,7 +192,7 @@ GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const {
 
 
 MCSymbol *ARMAsmPrinter::GetARMSJLJEHLabel() const {
-  const DataLayout *DL = TM.getDataLayout();
+  const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
   SmallString<60> Name;
   raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "SJLJEH"
     << getFunctionNumber();
@@ -229,7 +230,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
     case 'y': // Print a VFP single precision register as indexed double.
       if (MI->getOperand(OpNum).isReg()) {
         unsigned Reg = MI->getOperand(OpNum).getReg();
-        const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+        const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
         // Find the 'd' register that has this 's' register as a sub-register,
         // and determine the lane number.
         for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR) {
@@ -261,7 +262,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
       // inline asm statement.
       O << "{";
       if (ARM::GPRPairRegClass.contains(RegBegin)) {
-        const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+        const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
         unsigned Reg0 = TRI->getSubReg(RegBegin, ARM::gsub_0);
         O << ARMInstPrinter::getRegisterName(Reg0) << ", ";
         RegBegin = TRI->getSubReg(RegBegin, ARM::gsub_1);
@@ -317,7 +318,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
         const MachineOperand &MO = MI->getOperand(OpNum);
         if (!MO.isReg())
           return true;
-        const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+        const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
         unsigned Reg = TRI->getSubReg(MO.getReg(), ExtraCode[0] == 'Q' ?
             ARM::gsub_0 : ARM::gsub_1);
         O << ARMInstPrinter::getRegisterName(Reg);
@@ -343,7 +344,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
       unsigned Reg = MI->getOperand(OpNum).getReg();
       if (!ARM::QPRRegClass.contains(Reg))
         return true;
-      const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+      const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
       unsigned SubReg = TRI->getSubReg(Reg, ExtraCode[0] == 'e' ?
                                        ARM::dsub_0 : ARM::dsub_1);
       O << ARMInstPrinter::getRegisterName(SubReg);
@@ -358,7 +359,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
       if (!MO.isReg())
         return true;
       const MachineFunction &MF = *MI->getParent()->getParent();
-      const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+      const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
       unsigned Reg = MO.getReg();
       if(!ARM::GPRPairRegClass.contains(Reg))
         return false;
@@ -478,6 +479,9 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
   // Emit ARM Build Attributes
   if (Subtarget->isTargetELF())
     emitAttributes();
+
+  if (!M.getModuleInlineAsm().empty() && Subtarget->isThumb())
+    OutStreamer.EmitAssemblerFlag(MCAF_Code16);
 }
 
 static void
@@ -558,7 +562,7 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
     MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
     if (!Stubs.empty()) {
       OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
-      const DataLayout *TD = TM.getDataLayout();
+      const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
 
       for (auto &stub: Stubs) {
         OutStreamer.EmitLabel(stub.first);
@@ -663,7 +667,9 @@ void ARMAsmPrinter::emitAttributes() {
                         ARMBuildAttrs::AllowNeonARMv8);
   } else {
     if (Subtarget->hasFPARMv8())
-      ATS.emitFPU(ARM::FP_ARMV8);
+      // FPv5 and FP-ARMv8 have the same instructions, so are modeled as one
+      // FPU, but there are two different names for it depending on the CPU.
+      ATS.emitFPU(Subtarget->hasD16() ? ARM::FPV5_D16 : ARM::FP_ARMV8);
     else if (Subtarget->hasVFP4())
       ATS.emitFPU(Subtarget->hasD16() ? ARM::VFPV4_D16 : ARM::VFPV4);
     else if (Subtarget->hasVFP3())
@@ -700,6 +706,13 @@ void ARMAsmPrinter::emitAttributes() {
     ATS.emitAttribute(ARMBuildAttrs::ABI_FP_number_model,
                       ARMBuildAttrs::AllowIEE754);
 
+  if (Subtarget->allowsUnalignedMem())
+    ATS.emitAttribute(ARMBuildAttrs::CPU_unaligned_access,
+                      ARMBuildAttrs::Allowed);
+  else
+    ATS.emitAttribute(ARMBuildAttrs::CPU_unaligned_access,
+                      ARMBuildAttrs::Not_Allowed);
+
   // FIXME: add more flags to ARMBuildAttributes.h
   // 8-bytes alignment stuff.
   ATS.emitAttribute(ARMBuildAttrs::ABI_align_needed, 1);
@@ -757,6 +770,17 @@ void ARMAsmPrinter::emitAttributes() {
     }
   }
 
+  // TODO: We currently only support either reserving the register, or treating
+  // it as another callee-saved register, but not as SB or a TLS pointer; It
+  // would instead be nicer to push this from the frontend as metadata, as we do
+  // for the wchar and enum size tags
+  if (Subtarget->isR9Reserved())
+      ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use,
+                        ARMBuildAttrs::R9Reserved);
+  else
+      ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use,
+                        ARMBuildAttrs::R9IsGPR);
+
   if (Subtarget->hasTrustZone() && Subtarget->hasVirtualization())
       ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
                         ARMBuildAttrs::AllowTZVirtualization);
@@ -834,8 +858,9 @@ MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV,
 
 void ARMAsmPrinter::
 EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
-  const DataLayout *DL = TM.getDataLayout();
-  int Size = TM.getDataLayout()->getTypeAllocSize(MCPV->getType());
+  const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
+  int Size =
+      TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(MCPV->getType());
 
   ARMConstantPoolValue *ACPV = static_cast<ARMConstantPoolValue*>(MCPV);
 
@@ -1013,7 +1038,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
   MCTargetStreamer &TS = *OutStreamer.getTargetStreamer();
   ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
   const MachineFunction &MF = *MI->getParent()->getParent();
-  const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+  const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
   const ARMFunctionInfo &AFI = *MF.getInfo<ARMFunctionInfo>();
 
   unsigned FramePtr = RegInfo->getFrameRegister(MF);
@@ -1151,7 +1176,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
 #include "ARMGenMCPseudoLowering.inc"
 
 void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
-  const DataLayout *DL = TM.getDataLayout();
+  const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
 
   // If we just ended a constant pool, mark it as such.
   if (InConstantPool && MI->getOpcode() != ARM::CONSTPOOL_ENTRY) {
@@ -1567,6 +1592,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     EmitJumpTable(MI);
     return;
   }
+  case ARM::SPACE:
+    OutStreamer.EmitZeros(MI->getOperand(1).getImm());
+    return;
   case ARM::TRAP: {
     // Non-Darwin binutils don't yet support the "trap" mnemonic.
     // FIXME: Remove this special case when they do.
diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h
index 7c103c6..5ff20ce 100644
--- a/lib/Target/ARM/ARMAsmPrinter.h
+++ b/lib/Target/ARM/ARMAsmPrinter.h
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef ARMASMPRINTER_H
-#define ARMASMPRINTER_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMASMPRINTER_H
+#define LLVM_LIB_TARGET_ARM_ARMASMPRINTER_H
 
 #include "ARMSubtarget.h"
 #include "llvm/CodeGen/AsmPrinter.h"
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 0288db9..7a315c4 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -108,7 +108,7 @@ ARMBaseInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
                                                const ScheduleDAG *DAG) const {
   if (usePreRAHazardRecognizer()) {
     const InstrItineraryData *II =
-        &static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData();
+        static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData();
     return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
   }
   return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG);
@@ -518,6 +518,42 @@ bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
   return Found;
 }
 
+static bool isCPSRDefined(const MachineInstr *MI) {
+  for (const auto &MO : MI->operands())
+    if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef())
+      return true;
+  return false;
+}
+
+static bool isEligibleForITBlock(const MachineInstr *MI) {
+  switch (MI->getOpcode()) {
+  default: return true;
+  case ARM::tADC:   // ADC (register) T1
+  case ARM::tADDi3: // ADD (immediate) T1
+  case ARM::tADDi8: // ADD (immediate) T2
+  case ARM::tADDrr: // ADD (register) T1
+  case ARM::tAND:   // AND (register) T1
+  case ARM::tASRri: // ASR (immediate) T1
+  case ARM::tASRrr: // ASR (register) T1
+  case ARM::tBIC:   // BIC (register) T1
+  case ARM::tEOR:   // EOR (register) T1
+  case ARM::tLSLri: // LSL (immediate) T1
+  case ARM::tLSLrr: // LSL (register) T1
+  case ARM::tLSRri: // LSR (immediate) T1
+  case ARM::tLSRrr: // LSR (register) T1
+  case ARM::tMUL:   // MUL T1
+  case ARM::tMVN:   // MVN (register) T1
+  case ARM::tORR:   // ORR (register) T1
+  case ARM::tROR:   // ROR (register) T1
+  case ARM::tRSB:   // RSB (immediate) T1
+  case ARM::tSBC:   // SBC (register) T1
+  case ARM::tSUBi3: // SUB (immediate) T1
+  case ARM::tSUBi8: // SUB (immediate) T2
+  case ARM::tSUBrr: // SUB (register) T1
+    return !isCPSRDefined(MI);
+  }
+}
+
 /// isPredicable - Return true if the specified instruction can be predicated.
 /// By default, this returns true for every instruction with a
 /// PredicateOperand.
@@ -525,6 +561,9 @@ bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const {
   if (!MI->isPredicable())
     return false;
 
+  if (!isEligibleForITBlock(MI))
+    return false;
+
   ARMFunctionInfo *AFI =
     MI->getParent()->getParent()->getInfo<ARMFunctionInfo>();
 
@@ -555,16 +594,6 @@ template <> bool IsCPSRDead<MachineInstr>(MachineInstr *MI) {
 }
 }
 
-/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing.
-LLVM_ATTRIBUTE_NOINLINE
-static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
-                                unsigned JTI);
-static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
-                                unsigned JTI) {
-  assert(JTI < JT.size());
-  return JT[JTI].MBBs.size();
-}
-
 /// GetInstSize - Return the size of the specified MachineInstr.
 ///
 unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
@@ -637,7 +666,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
     // bytes, we can use 16-bit entries instead. Then there won't be an
     // alignment issue.
     unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4;
-    unsigned NumEntries = getNumJTEntries(JT, JTI);
+    unsigned NumEntries = JT[JTI].MBBs.size();
     if (Opc == ARM::t2TBB_JT && (NumEntries & 1))
       // Make sure the instruction that follows TBB is 2-byte aligned.
       // FIXME: Constant island pass should insert an "ALIGN" instruction
@@ -645,6 +674,8 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
       ++NumEntries;
     return NumEntries * EntrySize + InstSize;
   }
+  case ARM::SPACE:
+    return MI->getOperand(1).getImm();
   }
 }
 
@@ -659,6 +690,49 @@ unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr *MI) const {
   return Size;
 }
 
+void ARMBaseInstrInfo::copyFromCPSR(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator I,
+                                    unsigned DestReg, bool KillSrc,
+                                    const ARMSubtarget &Subtarget) const {
+  unsigned Opc = Subtarget.isThumb()
+                     ? (Subtarget.isMClass() ? ARM::t2MRS_M : ARM::t2MRS_AR)
+                     : ARM::MRS;
+
+  MachineInstrBuilder MIB =
+      BuildMI(MBB, I, I->getDebugLoc(), get(Opc), DestReg);
+
+  // There is only 1 A/R class MRS instruction, and it always refers to
+  // APSR. However, there are lots of other possibilities on M-class cores.
+  if (Subtarget.isMClass())
+    MIB.addImm(0x800);
+
+  AddDefaultPred(MIB);
+
+  MIB.addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc));
+}
+
+void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB,
+                                  MachineBasicBlock::iterator I,
+                                  unsigned SrcReg, bool KillSrc,
+                                  const ARMSubtarget &Subtarget) const {
+  unsigned Opc = Subtarget.isThumb()
+                     ? (Subtarget.isMClass() ? ARM::t2MSR_M : ARM::t2MSR_AR)
+                     : ARM::MSR;
+
+  MachineInstrBuilder MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Opc));
+
+  if (Subtarget.isMClass())
+    MIB.addImm(0x800);
+  else
+    MIB.addImm(8);
+
+  MIB.addReg(SrcReg, getKillRegState(KillSrc));
+
+  AddDefaultPred(MIB);
+
+  MIB.addReg(ARM::CPSR, RegState::Implicit | RegState::Define);
+}
+
 void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator I, DebugLoc DL,
                                    unsigned DestReg, unsigned SrcReg,
@@ -682,7 +756,7 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     Opc = ARM::VMOVRS;
   else if (SPRDest && GPRSrc)
     Opc = ARM::VMOVSR;
-  else if (ARM::DPRRegClass.contains(DestReg, SrcReg))
+  else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && !Subtarget.isFPOnlySP())
     Opc = ARM::VMOVD;
   else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
     Opc = ARM::VORRq;
@@ -742,6 +816,16 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     BeginIdx = ARM::dsub_0;
     SubRegs = 4;
     Spacing = 2;
+  } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.isFPOnlySP()) {
+    Opc = ARM::VMOVS;
+    BeginIdx = ARM::ssub_0;
+    SubRegs = 2;
+  } else if (SrcReg == ARM::CPSR) {
+    copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget);
+    return;
+  } else if (DestReg == ARM::CPSR) {
+    copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget);
+    return;
   }
 
   assert(Opc && "Impossible reg-to-reg copy");
@@ -1174,12 +1258,26 @@ unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
   return MI->mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex);
 }
 
-bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{
+bool
+ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
+  MachineFunction &MF = *MI->getParent()->getParent();
+  Reloc::Model RM = MF.getTarget().getRelocationModel();
+
+  if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {
+    assert(getSubtarget().getTargetTriple().getObjectFormat() ==
+           Triple::MachO &&
+           "LOAD_STACK_GUARD currently supported only for MachO.");
+    expandLoadStackGuard(MI, RM);
+    MI->getParent()->erase(MI);
+    return true;
+  }
+
   // This hook gets to expand COPY instructions before they become
   // copyPhysReg() calls.  Look for VMOVS instructions that can legally be
   // widened to VMOVD.  We prefer the VMOVD when possible because it may be
   // changed into a VORR that can go down the NEON pipeline.
-  if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15())
+  if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15() ||
+      Subtarget.isFPOnlySP())
     return false;
 
   // Look for a copy between even S-registers.  That is where we keep floats
@@ -2832,7 +2930,7 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
 // FIXME: The current MachineInstr design does not support relying on machine
 // mem operands to determine the width of a memory access. Instead, we expect
 // the target to provide this information based on the instruction opcode and
-// operands. However, using MachineMemOperand is a the best solution now for
+// operands. However, using MachineMemOperand is the best solution now for
 // two reasons:
 //
 // 1) getNumMicroOps tries to infer LDM memory width from the total number of MI
@@ -3933,6 +4031,38 @@ bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr *MI,
   return true;
 }
 
+// LoadStackGuard has so far only been implemented for MachO. Different code
+// sequence is needed for other targets.
+void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
+                                                unsigned LoadImmOpc,
+                                                unsigned LoadOpc,
+                                                Reloc::Model RM) const {
+  MachineBasicBlock &MBB = *MI->getParent();
+  DebugLoc DL = MI->getDebugLoc();
+  unsigned Reg = MI->getOperand(0).getReg();
+  const GlobalValue *GV =
+      cast<GlobalValue>((*MI->memoperands_begin())->getValue());
+  MachineInstrBuilder MIB;
+
+  BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
+      .addGlobalAddress(GV, 0, ARMII::MO_NONLAZY);
+
+  if (Subtarget.GVIsIndirectSymbol(GV, RM)) {
+    MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
+    MIB.addReg(Reg, RegState::Kill).addImm(0);
+    unsigned Flag = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant;
+    MachineMemOperand *MMO = MBB.getParent()->
+        getMachineMemOperand(MachinePointerInfo::getGOT(), Flag, 4, 4);
+    MIB.addMemOperand(MMO);
+    AddDefaultPred(MIB);
+  }
+
+  MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
+  MIB.addReg(Reg, RegState::Kill).addImm(0);
+  MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+  AddDefaultPred(MIB);
+}
+
 bool
 ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
                                      unsigned &AddSubOpc,
@@ -4361,29 +4491,6 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI,
   MI->addRegisterKilled(DReg, TRI, true);
 }
 
-void ARMBaseInstrInfo::getUnconditionalBranch(
-    MCInst &Branch, const MCSymbolRefExpr *BranchTarget) const {
-  if (Subtarget.isThumb())
-    Branch.setOpcode(ARM::tB);
-  else if (Subtarget.isThumb2())
-    Branch.setOpcode(ARM::t2B);
-  else
-    Branch.setOpcode(ARM::Bcc);
-
-  Branch.addOperand(MCOperand::CreateExpr(BranchTarget));
-  Branch.addOperand(MCOperand::CreateImm(ARMCC::AL));
-  Branch.addOperand(MCOperand::CreateReg(0));
-}
-
-void ARMBaseInstrInfo::getTrap(MCInst &MI) const {
-  if (Subtarget.isThumb())
-    MI.setOpcode(ARM::tTRAP);
-  else if (Subtarget.useNaClTrap())
-    MI.setOpcode(ARM::TRAPNaCl);
-  else
-    MI.setOpcode(ARM::TRAP);
-}
-
 bool ARMBaseInstrInfo::hasNOP() const {
   return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0;
 }
@@ -4401,3 +4508,72 @@ bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const {
 
   return false;
 }
+
+bool ARMBaseInstrInfo::getRegSequenceLikeInputs(
+    const MachineInstr &MI, unsigned DefIdx,
+    SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const {
+  assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
+  assert(MI.isRegSequenceLike() && "Invalid kind of instruction");
+
+  switch (MI.getOpcode()) {
+  case ARM::VMOVDRR:
+    // dX = VMOVDRR rY, rZ
+    // is the same as:
+    // dX = REG_SEQUENCE rY, ssub_0, rZ, ssub_1
+    // Populate the InputRegs accordingly.
+    // rY
+    const MachineOperand *MOReg = &MI.getOperand(1);
+    InputRegs.push_back(
+        RegSubRegPairAndIdx(MOReg->getReg(), MOReg->getSubReg(), ARM::ssub_0));
+    // rZ
+    MOReg = &MI.getOperand(2);
+    InputRegs.push_back(
+        RegSubRegPairAndIdx(MOReg->getReg(), MOReg->getSubReg(), ARM::ssub_1));
+    return true;
+  }
+  llvm_unreachable("Target dependent opcode missing");
+}
+
+bool ARMBaseInstrInfo::getExtractSubregLikeInputs(
+    const MachineInstr &MI, unsigned DefIdx,
+    RegSubRegPairAndIdx &InputReg) const {
+  assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
+  assert(MI.isExtractSubregLike() && "Invalid kind of instruction");
+
+  switch (MI.getOpcode()) {
+  case ARM::VMOVRRD:
+    // rX, rY = VMOVRRD dZ
+    // is the same as:
+    // rX = EXTRACT_SUBREG dZ, ssub_0
+    // rY = EXTRACT_SUBREG dZ, ssub_1
+    const MachineOperand &MOReg = MI.getOperand(2);
+    InputReg.Reg = MOReg.getReg();
+    InputReg.SubReg = MOReg.getSubReg();
+    InputReg.SubIdx = DefIdx == 0 ? ARM::ssub_0 : ARM::ssub_1;
+    return true;
+  }
+  llvm_unreachable("Target dependent opcode missing");
+}
+
+bool ARMBaseInstrInfo::getInsertSubregLikeInputs(
+    const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg,
+    RegSubRegPairAndIdx &InsertedReg) const {
+  assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
+  assert(MI.isInsertSubregLike() && "Invalid kind of instruction");
+
+  switch (MI.getOpcode()) {
+  case ARM::VSETLNi32:
+    // dX = VSETLNi32 dY, rZ, imm
+    const MachineOperand &MOBaseReg = MI.getOperand(1);
+    const MachineOperand &MOInsertedReg = MI.getOperand(2);
+    const MachineOperand &MOIndex = MI.getOperand(3);
+    BaseReg.Reg = MOBaseReg.getReg();
+    BaseReg.SubReg = MOBaseReg.getSubReg();
+
+    InsertedReg.Reg = MOInsertedReg.getReg();
+    InsertedReg.SubReg = MOInsertedReg.getSubReg();
+    InsertedReg.SubIdx = MOIndex.getImm() == 0 ? ARM::ssub_0 : ARM::ssub_1;
+    return true;
+  }
+  llvm_unreachable("Target dependent opcode missing");
+}
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index b8d6758..0ae291b 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -11,13 +11,14 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef ARMBASEINSTRUCTIONINFO_H
-#define ARMBASEINSTRUCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMBASEINSTRINFO_H
+#define LLVM_LIB_TARGET_ARM_ARMBASEINSTRINFO_H
 
 #include "MCTargetDesc/ARMBaseInfo.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/CodeGen.h"
 #include "llvm/Target/TargetInstrInfo.h"
 
 #define GET_INSTRINFO_HEADER
@@ -34,6 +35,57 @@ protected:
   // Can be only subclassed.
   explicit ARMBaseInstrInfo(const ARMSubtarget &STI);
 
+  void expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
+                                unsigned LoadImmOpc, unsigned LoadOpc,
+                                Reloc::Model RM) const;
+
+  /// Build the equivalent inputs of a REG_SEQUENCE for the given \p MI
+  /// and \p DefIdx.
+  /// \p [out] InputRegs of the equivalent REG_SEQUENCE. Each element of
+  /// the list is modeled as <Reg:SubReg, SubIdx>.
+  /// E.g., REG_SEQUENCE vreg1:sub1, sub0, vreg2, sub1 would produce
+  /// two elements:
+  /// - vreg1:sub1, sub0
+  /// - vreg2<:0>, sub1
+  ///
+  /// \returns true if it is possible to build such an input sequence
+  /// with the pair \p MI, \p DefIdx. False otherwise.
+  ///
+  /// \pre MI.isRegSequenceLike().
+  bool getRegSequenceLikeInputs(
+      const MachineInstr &MI, unsigned DefIdx,
+      SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const override;
+
+  /// Build the equivalent inputs of a EXTRACT_SUBREG for the given \p MI
+  /// and \p DefIdx.
+  /// \p [out] InputReg of the equivalent EXTRACT_SUBREG.
+  /// E.g., EXTRACT_SUBREG vreg1:sub1, sub0, sub1 would produce:
+  /// - vreg1:sub1, sub0
+  ///
+  /// \returns true if it is possible to build such an input sequence
+  /// with the pair \p MI, \p DefIdx. False otherwise.
+  ///
+  /// \pre MI.isExtractSubregLike().
+  bool getExtractSubregLikeInputs(const MachineInstr &MI, unsigned DefIdx,
+                                  RegSubRegPairAndIdx &InputReg) const override;
+
+  /// Build the equivalent inputs of a INSERT_SUBREG for the given \p MI
+  /// and \p DefIdx.
+  /// \p [out] BaseReg and \p [out] InsertedReg contain
+  /// the equivalent inputs of INSERT_SUBREG.
+  /// E.g., INSERT_SUBREG vreg0:sub0, vreg1:sub1, sub3 would produce:
+  /// - BaseReg: vreg0:sub0
+  /// - InsertedReg: vreg1:sub1, sub3
+  ///
+  /// \returns true if it is possible to build such an input sequence
+  /// with the pair \p MI, \p DefIdx. False otherwise.
+  ///
+  /// \pre MI.isInsertSubregLike().
+  bool
+  getInsertSubregLikeInputs(const MachineInstr &MI, unsigned DefIdx,
+                            RegSubRegPair &BaseReg,
+                            RegSubRegPairAndIdx &InsertedReg) const override;
+
 public:
   // Return whether the target has an explicit NOP encoding.
   bool hasNOP() const;
@@ -104,6 +156,13 @@ public:
   unsigned isStoreToStackSlotPostFE(const MachineInstr *MI,
                                     int &FrameIndex) const override;
 
+  void copyToCPSR(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                  unsigned SrcReg, bool KillSrc,
+                  const ARMSubtarget &Subtarget) const;
+  void copyFromCPSR(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                    unsigned DestReg, bool KillSrc,
+                    const ARMSubtarget &Subtarget) const;
+
   void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                    DebugLoc DL, unsigned DestReg, unsigned SrcReg,
                    bool KillSrc) const override;
@@ -230,12 +289,6 @@ public:
   void breakPartialRegDependency(MachineBasicBlock::iterator, unsigned,
                                  const TargetRegisterInfo *TRI) const override;
 
-  void
-  getUnconditionalBranch(MCInst &Branch,
-                         const MCSymbolRefExpr *BranchTarget) const override;
-
-  void getTrap(MCInst &MI) const override;
-
   /// Get the number of addresses by LDM or VLDM or zero for unknown.
   unsigned getNumLDMAddresses(const MachineInstr *MI) const;
 
@@ -286,6 +339,9 @@ private:
   bool verifyInstruction(const MachineInstr *MI,
                          StringRef &ErrInfo) const override;
 
+  virtual void expandLoadStackGuard(MachineBasicBlock::iterator MI,
+                                    Reloc::Model RM) const = 0;
+
 private:
   /// Modeling special VFP / NEON fp MLA / MLS hazards.
 
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index cdd91c7..6dc0493 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -38,6 +38,8 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 
+#define DEBUG_TYPE "arm-register-info"
+
 #define GET_REGINFO_TARGET_DESC
 #include "ARMGenRegisterInfo.inc"
 
@@ -121,7 +123,7 @@ ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const {
 
 BitVector ARMBaseRegisterInfo::
 getReservedRegs(const MachineFunction &MF) const {
-  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
 
   // FIXME: avoid re-calculating this every time.
   BitVector Reserved(getNumRegs());
@@ -180,14 +182,14 @@ ARMBaseRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind
 const TargetRegisterClass *
 ARMBaseRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
   if (RC == &ARM::CCRRegClass)
-    return nullptr;  // Can't copy CCR registers.
+    return &ARM::rGPRRegClass;  // Can't copy CCR registers.
   return RC;
 }
 
 unsigned
 ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
                                          MachineFunction &MF) const {
-  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
 
   switch (RC->getID()) {
   default:
@@ -309,7 +311,7 @@ ARMBaseRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const {
 bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
 
   // When outgoing call frames are so large that we adjust the stack pointer
   // around the call, we can no longer use the stack pointer to reach the
@@ -354,7 +356,10 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
     return false;
   // We may also need a base pointer if there are dynamic allocas or stack
   // pointer adjustments around calls.
-  if (MF.getTarget().getFrameLowering()->hasReservedCallFrame(MF))
+  if (MF.getTarget()
+          .getSubtargetImpl()
+          ->getFrameLowering()
+          ->hasReservedCallFrame(MF))
     return true;
   // A base pointer is required and allowed.  Check that it isn't too late to
   // reserve it.
@@ -365,7 +370,10 @@ bool ARMBaseRegisterInfo::
 needsStackRealignment(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   const Function *F = MF.getFunction();
-  unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
+  unsigned StackAlign = MF.getTarget()
+                            .getSubtargetImpl()
+                            ->getFrameLowering()
+                            ->getStackAlignment();
   bool requiresRealignment =
     ((MFI->getMaxAlignment() > StackAlign) ||
      F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
@@ -385,7 +393,7 @@ cannotEliminateFrame(const MachineFunction &MF) const {
 
 unsigned
 ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
-  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
 
   if (TFI->hasFP(MF))
     return FramePtr;
@@ -402,7 +410,7 @@ emitLoadConstPool(MachineBasicBlock &MBB,
                   ARMCC::CondCodes Pred,
                   unsigned PredReg, unsigned MIFlags) const {
   MachineFunction &MF = *MBB.getParent();
-  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
   MachineConstantPool *ConstantPool = MF.getConstantPool();
   const Constant *C =
         ConstantInt::get(Type::getInt32Ty(MF.getFunction()->getContext()), Val);
@@ -415,11 +423,6 @@ emitLoadConstPool(MachineBasicBlock &MBB,
     .setMIFlags(MIFlags);
 }
 
-bool ARMBaseRegisterInfo::mayOverrideLocalAssignment() const {
-  // The native linux build hits a downstream codegen bug when this is enabled.
-  return STI.isTargetDarwin();
-}
-
 bool ARMBaseRegisterInfo::
 requiresRegisterScavenging(const MachineFunction &MF) const {
   return true;
@@ -529,7 +532,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
   // Note that the incoming offset is based on the SP value at function entry,
   // so it'll be negative.
   MachineFunction &MF = *MI->getParent()->getParent();
-  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
 
@@ -582,7 +585,7 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB,
                              int64_t Offset) const {
   ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>();
   unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri :
-    (AFI->isThumb1OnlyFunction() ? ARM::tADDrSPi : ARM::t2ADDri);
+    (AFI->isThumb1OnlyFunction() ? ARM::tADDframe : ARM::t2ADDri);
 
   MachineBasicBlock::iterator Ins = MBB->begin();
   DebugLoc DL;                  // Defaults to "unknown"
@@ -591,15 +594,15 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB,
 
   const MachineFunction &MF = *MBB->getParent();
   MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
-  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
   const MCInstrDesc &MCID = TII.get(ADDriOpc);
   MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF));
 
-  MachineInstrBuilder MIB = AddDefaultPred(BuildMI(*MBB, Ins, DL, MCID, BaseReg)
-    .addFrameIndex(FrameIdx).addImm(Offset));
+  MachineInstrBuilder MIB = BuildMI(*MBB, Ins, DL, MCID, BaseReg)
+    .addFrameIndex(FrameIdx).addImm(Offset);
 
   if (!AFI->isThumb1OnlyFunction())
-    AddDefaultCC(MIB);
+    AddDefaultCC(AddDefaultPred(MIB));
 }
 
 void ARMBaseRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
@@ -607,7 +610,7 @@ void ARMBaseRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
   MachineBasicBlock &MBB = *MI.getParent();
   MachineFunction &MF = *MBB.getParent();
   const ARMBaseInstrInfo &TII =
-    *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+      *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   int Off = Offset; // ARM doesn't need the general 64-bit offsets
   unsigned i = 0;
@@ -706,9 +709,9 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   MachineBasicBlock &MBB = *MI.getParent();
   MachineFunction &MF = *MBB.getParent();
   const ARMBaseInstrInfo &TII =
-    *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
-  const ARMFrameLowering *TFI =
-    static_cast<const ARMFrameLowering*>(MF.getTarget().getFrameLowering());
+      *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
+  const ARMFrameLowering *TFI = static_cast<const ARMFrameLowering *>(
+      MF.getSubtarget().getFrameLowering());
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   assert(!AFI->isThumb1OnlyFunction() &&
          "This eliminateFrameIndex does not support Thumb1!");
@@ -775,3 +778,60 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg, false, false,true);
   }
 }
+
+bool ARMBaseRegisterInfo::shouldCoalesce(MachineInstr *MI,
+                                  const TargetRegisterClass *SrcRC,
+                                  unsigned SubReg,
+                                  const TargetRegisterClass *DstRC,
+                                  unsigned DstSubReg,
+                                  const TargetRegisterClass *NewRC) const {
+  auto MBB = MI->getParent();
+  auto MF = MBB->getParent();
+  const MachineRegisterInfo &MRI = MF->getRegInfo();
+  // If not copying into a sub-register this should be ok because we shouldn't
+  // need to split the reg.
+  if (!DstSubReg)
+    return true;
+  // Small registers don't frequently cause a problem, so we can coalesce them.
+  if (NewRC->getSize() < 32 && DstRC->getSize() < 32 && SrcRC->getSize() < 32)
+    return true;
+
+  auto NewRCWeight =
+              MRI.getTargetRegisterInfo()->getRegClassWeight(NewRC);
+  auto SrcRCWeight =
+              MRI.getTargetRegisterInfo()->getRegClassWeight(SrcRC);
+  auto DstRCWeight =
+              MRI.getTargetRegisterInfo()->getRegClassWeight(DstRC);
+  // If the source register class is more expensive than the destination, the
+  // coalescing is probably profitable.
+  if (SrcRCWeight.RegWeight > NewRCWeight.RegWeight)
+    return true;
+  if (DstRCWeight.RegWeight > NewRCWeight.RegWeight)
+    return true;
+
+  // If the register allocator isn't constrained, we can always allow coalescing
+  // unfortunately we don't know yet if we will be constrained.
+  // The goal of this heuristic is to restrict how many expensive registers
+  // we allow to coalesce in a given basic block.
+  auto AFI = MF->getInfo<ARMFunctionInfo>();
+  auto It = AFI->getCoalescedWeight(MBB);
+
+  DEBUG(dbgs() << "\tARM::shouldCoalesce - Coalesced Weight: "
+    << It->second << "\n");
+  DEBUG(dbgs() << "\tARM::shouldCoalesce - Reg Weight: "
+    << NewRCWeight.RegWeight << "\n");
+
+  // This number is the largest round number that which meets the criteria:
+  //  (1) addresses PR18825
+  //  (2) generates better code in some test cases (like vldm-shed-a9.ll)
+  //  (3) Doesn't regress any test cases (in-tree, test-suite, and SPEC)
+  // In practice the SizeMultiplier will only factor in for straight line code
+  // that uses a lot of NEON vectors, which isn't terribly common.
+  unsigned SizeMultiplier = MBB->size()/100;
+  SizeMultiplier = SizeMultiplier ? SizeMultiplier : 1;
+  if (It->second < NewRCWeight.WeightLimit * SizeMultiplier) {
+    It->second += NewRCWeight.RegWeight;
+    return true;
+  }
+  return false;
+}
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index 91df565..e9bc412 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef ARMBASEREGISTERINFO_H
-#define ARMBASEREGISTERINFO_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMBASEREGISTERINFO_H
+#define LLVM_LIB_TARGET_ARM_ARMBASEREGISTERINFO_H
 
 #include "MCTargetDesc/ARMBaseInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -174,8 +174,6 @@ public:
                                  unsigned MIFlags = MachineInstr::NoFlags)const;
 
   /// Code Generation virtual methods...
-  bool mayOverrideLocalAssignment() const override;
-
   bool requiresRegisterScavenging(const MachineFunction &MF) const override;
 
   bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override;
@@ -187,6 +185,14 @@ public:
   void eliminateFrameIndex(MachineBasicBlock::iterator II,
                            int SPAdj, unsigned FIOperandNum,
                            RegScavenger *RS = nullptr) const override;
+
+  /// \brief SrcRC and DstRC will be morphed into NewRC if this returns true
+  bool shouldCoalesce(MachineInstr *MI,
+                      const TargetRegisterClass *SrcRC,
+                      unsigned SubReg,
+                      const TargetRegisterClass *DstRC,
+                      unsigned DstSubReg,
+                      const TargetRegisterClass *NewRC) const override;
 };
 
 } // end namespace llvm
diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h
index dc41c1c..bd07236 100644
--- a/lib/Target/ARM/ARMCallingConv.h
+++ b/lib/Target/ARM/ARMCallingConv.h
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef ARMCALLINGCONV_H
-#define ARMCALLINGCONV_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMCALLINGCONV_H
+#define LLVM_LIB_TARGET_ARM_ARMCALLINGCONV_H
 
 #include "ARM.h"
 #include "ARMBaseInstrInfo.h"
@@ -177,8 +177,9 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
                                    CCValAssign::LocInfo &LocInfo,
                                    ISD::ArgFlagsTy &ArgFlags, CCState &State) {
   SmallVectorImpl<CCValAssign> &PendingHAMembers = State.getPendingLocs();
+
   // AAPCS HFAs must have 1-4 elements, all of the same type
-  assert(PendingHAMembers.size() < 8);
+  assert(PendingHAMembers.size() < 4);
   if (PendingHAMembers.size() > 0)
     assert(PendingHAMembers[0].getLocVT() == LocVT);
 
@@ -188,7 +189,7 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
       CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
 
   if (ArgFlags.isInConsecutiveRegsLast()) {
-    assert(PendingHAMembers.size() > 0 && PendingHAMembers.size() <= 8 &&
+    assert(PendingHAMembers.size() > 0 && PendingHAMembers.size() <= 4 &&
            "Homogeneous aggregates must have between 1 and 4 members");
 
     // Try to allocate a contiguous block of registers, each of the correct
@@ -196,7 +197,6 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
     const uint16_t *RegList;
     unsigned NumRegs;
     switch (LocVT.SimpleTy) {
-    case MVT::i32:
     case MVT::f32:
       RegList = SRegList;
       NumRegs = 16;
@@ -235,20 +235,11 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
       State.AllocateReg(SRegList[regNo]);
 
     unsigned Size = LocVT.getSizeInBits() / 8;
-    unsigned Align = Size;
-
-    if (LocVT.SimpleTy == MVT::v2f64 || LocVT.SimpleTy == MVT::i32) {
-      // Vectors are always aligned to 8 bytes. If we've seen an i32 here
-      // it's because it's been split from a larger type, also with align 8.
-      Align = 8;
-    }
+    unsigned Align = std::min(Size, 8U);
 
     for (auto It : PendingHAMembers) {
       It.convertToMem(State.AllocateStack(Size, Align));
       State.addLoc(It);
-
-      // Only the first member needs to be aligned.
-      Align = 1;
     }
 
     // All pending members have now been allocated
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
deleted file mode 100644
index 5fb6ebf..0000000
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ /dev/null
@@ -1,1909 +0,0 @@
-//===-- ARM/ARMCodeEmitter.cpp - Convert ARM code to machine code ---------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the pass that transforms the ARM machine instructions into
-// relocatable machine code.
-//
-//===----------------------------------------------------------------------===//
-
-#include "ARM.h"
-#include "ARMBaseInstrInfo.h"
-#include "ARMConstantPoolValue.h"
-#include "ARMMachineFunctionInfo.h"
-#include "ARMRelocations.h"
-#include "ARMSubtarget.h"
-#include "ARMTargetMachine.h"
-#include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/PassManager.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#ifndef NDEBUG
-#include <iomanip>
-#endif
-using namespace llvm;
-
-#define DEBUG_TYPE "jit"
-
-STATISTIC(NumEmitted, "Number of machine instructions emitted");
-
-namespace {
-
-  class ARMCodeEmitter : public MachineFunctionPass {
-    ARMJITInfo                *JTI;
-    const ARMBaseInstrInfo    *II;
-    const DataLayout          *TD;
-    const ARMSubtarget        *Subtarget;
-    TargetMachine             &TM;
-    JITCodeEmitter            &MCE;
-    MachineModuleInfo *MMI;
-    const std::vector<MachineConstantPoolEntry> *MCPEs;
-    const std::vector<MachineJumpTableEntry> *MJTEs;
-    bool IsPIC;
-    bool IsThumb;
-
-    void getAnalysisUsage(AnalysisUsage &AU) const override {
-      AU.addRequired<MachineModuleInfo>();
-      MachineFunctionPass::getAnalysisUsage(AU);
-    }
-
-    static char ID;
-  public:
-    ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
-      : MachineFunctionPass(ID), JTI(nullptr),
-        II((const ARMBaseInstrInfo *)tm.getInstrInfo()),
-        TD(tm.getDataLayout()), TM(tm),
-        MCE(mce), MCPEs(nullptr), MJTEs(nullptr),
-        IsPIC(TM.getRelocationModel() == Reloc::PIC_), IsThumb(false) {}
-
-    /// getBinaryCodeForInstr - This function, generated by the
-    /// CodeEmitterGenerator using TableGen, produces the binary encoding for
-    /// machine instructions.
-    uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
-
-    bool runOnMachineFunction(MachineFunction &MF) override;
-
-    const char *getPassName() const override {
-      return "ARM Machine Code Emitter";
-    }
-
-    void emitInstruction(const MachineInstr &MI);
-
-  private:
-
-    void emitWordLE(unsigned Binary);
-    void emitDWordLE(uint64_t Binary);
-    void emitConstPoolInstruction(const MachineInstr &MI);
-    void emitMOVi32immInstruction(const MachineInstr &MI);
-    void emitMOVi2piecesInstruction(const MachineInstr &MI);
-    void emitLEApcrelJTInstruction(const MachineInstr &MI);
-    void emitPseudoMoveInstruction(const MachineInstr &MI);
-    void addPCLabel(unsigned LabelID);
-    void emitPseudoInstruction(const MachineInstr &MI);
-    unsigned getMachineSoRegOpValue(const MachineInstr &MI,
-                                    const MCInstrDesc &MCID,
-                                    const MachineOperand &MO,
-                                    unsigned OpIdx);
-
-    unsigned getMachineSoImmOpValue(unsigned SoImm);
-    unsigned getAddrModeSBit(const MachineInstr &MI,
-                             const MCInstrDesc &MCID) const;
-
-    void emitDataProcessingInstruction(const MachineInstr &MI,
-                                       unsigned ImplicitRd = 0,
-                                       unsigned ImplicitRn = 0);
-
-    void emitLoadStoreInstruction(const MachineInstr &MI,
-                                  unsigned ImplicitRd = 0,
-                                  unsigned ImplicitRn = 0);
-
-    void emitMiscLoadStoreInstruction(const MachineInstr &MI,
-                                      unsigned ImplicitRn = 0);
-
-    void emitLoadStoreMultipleInstruction(const MachineInstr &MI);
-
-    void emitMulFrmInstruction(const MachineInstr &MI);
-
-    void emitExtendInstruction(const MachineInstr &MI);
-
-    void emitMiscArithInstruction(const MachineInstr &MI);
-
-    void emitSaturateInstruction(const MachineInstr &MI);
-
-    void emitBranchInstruction(const MachineInstr &MI);
-
-    void emitInlineJumpTable(unsigned JTIndex);
-
-    void emitMiscBranchInstruction(const MachineInstr &MI);
-
-    void emitVFPArithInstruction(const MachineInstr &MI);
-
-    void emitVFPConversionInstruction(const MachineInstr &MI);
-
-    void emitVFPLoadStoreInstruction(const MachineInstr &MI);
-
-    void emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI);
-
-    void emitNEONLaneInstruction(const MachineInstr &MI);
-    void emitNEONDupInstruction(const MachineInstr &MI);
-    void emitNEON1RegModImmInstruction(const MachineInstr &MI);
-    void emitNEON2RegInstruction(const MachineInstr &MI);
-    void emitNEON3RegInstruction(const MachineInstr &MI);
-
-    /// getMachineOpValue - Return binary encoding of operand. If the machine
-    /// operand requires relocation, record the relocation and return zero.
-    unsigned getMachineOpValue(const MachineInstr &MI,
-                               const MachineOperand &MO) const;
-    unsigned getMachineOpValue(const MachineInstr &MI, unsigned OpIdx) const {
-      return getMachineOpValue(MI, MI.getOperand(OpIdx));
-    }
-
-    // FIXME: The legacy JIT ARMCodeEmitter doesn't rely on the the
-    //  TableGen'erated getBinaryCodeForInstr() function to encode any
-    //  operand values, instead querying getMachineOpValue() directly for
-    //  each operand it needs to encode. Thus, any of the new encoder
-    //  helper functions can simply return 0 as the values the return
-    //  are already handled elsewhere. They are placeholders to allow this
-    //  encoder to continue to function until the MC encoder is sufficiently
-    //  far along that this one can be eliminated entirely.
-    unsigned NEONThumb2DataIPostEncoder(const MachineInstr &MI, unsigned Val)
-      const { return 0; }
-    unsigned NEONThumb2LoadStorePostEncoder(const MachineInstr &MI,unsigned Val)
-      const { return 0; }
-    unsigned NEONThumb2DupPostEncoder(const MachineInstr &MI,unsigned Val)
-      const { return 0; }
-    unsigned NEONThumb2V8PostEncoder(const MachineInstr &MI,unsigned Val)
-      const { return 0; }
-    unsigned VFPThumb2PostEncoder(const MachineInstr&MI, unsigned Val)
-      const { return 0; }
-    unsigned getAdrLabelOpValue(const MachineInstr &MI, unsigned Op)
-      const { return 0; }
-    unsigned getThumbAdrLabelOpValue(const MachineInstr &MI, unsigned Op)
-      const { return 0; }
-    unsigned getThumbBLTargetOpValue(const MachineInstr &MI, unsigned Op)
-      const { return 0; }
-    unsigned getThumbBLXTargetOpValue(const MachineInstr &MI, unsigned Op)
-      const { return 0; }
-    unsigned getThumbBRTargetOpValue(const MachineInstr &MI, unsigned Op)
-      const { return 0; }
-    unsigned getThumbBCCTargetOpValue(const MachineInstr &MI, unsigned Op)
-      const { return 0; }
-    unsigned getThumbCBTargetOpValue(const MachineInstr &MI, unsigned Op)
-      const { return 0; }
-    unsigned getBranchTargetOpValue(const MachineInstr &MI, unsigned Op)
-      const { return 0; }
-    unsigned getUnconditionalBranchTargetOpValue(const MachineInstr &MI,
-      unsigned Op) const { return 0; }
-    unsigned getARMBranchTargetOpValue(const MachineInstr &MI, unsigned Op)
-      const { return 0; }
-    unsigned getARMBLTargetOpValue(const MachineInstr &MI, unsigned Op)
-      const { return 0; }
-    unsigned getARMBLXTargetOpValue(const MachineInstr &MI, unsigned Op)
-      const { return 0; }
-    unsigned getCCOutOpValue(const MachineInstr &MI, unsigned Op)
-      const { return 0; }
-    unsigned getSOImmOpValue(const MachineInstr &MI, unsigned Op)
-      const { return 0; }
-    unsigned getT2SOImmOpValue(const MachineInstr &MI, unsigned Op)
-      const { return 0; }
-    unsigned getSORegRegOpValue(const MachineInstr &MI, unsigned Op)
-      const { return 0; }
-    unsigned getSORegImmOpValue(const MachineInstr &MI, unsigned Op)
-      const { return 0; }
-    unsigned getThumbAddrModeRegRegOpValue(const MachineInstr &MI, unsigned Op)
-      const { return 0; }
-    unsigned getT2AddrModeImm8OpValue(const MachineInstr &MI, unsigned Op)
-      const { return 0; }
-    unsigned getT2Imm8s4OpValue(const MachineInstr &MI, unsigned Op)
-      const { return 0; }
-    unsigned getT2AddrModeImm8s4OpValue(const MachineInstr &MI, unsigned Op)
-      const { return 0; }
-    unsigned getT2AddrModeImm0_1020s4OpValue(const MachineInstr &MI,unsigned Op)
-      const { return 0; }
-    unsigned getT2AddrModeImm8OffsetOpValue(const MachineInstr &MI, unsigned Op)
-      const { return 0; }
-    unsigned getT2AddrModeSORegOpValue(const MachineInstr &MI, unsigned Op)
-      const { return 0; }
-    unsigned getT2SORegOpValue(const MachineInstr &MI, unsigned Op)
-      const { return 0; }
-    unsigned getT2AdrLabelOpValue(const MachineInstr &MI, unsigned Op)
-      const { return 0; }
-    unsigned getAddrMode6AddressOpValue(const MachineInstr &MI, unsigned Op)
-      const { return 0; }
-    unsigned getAddrMode6OneLane32AddressOpValue(const MachineInstr &MI,
-                                                 unsigned Op)
-      const { return 0; }
-    unsigned getAddrMode6DupAddressOpValue(const MachineInstr &MI, unsigned Op)
-      const { return 0; }
-    unsigned getAddrMode6OffsetOpValue(const MachineInstr &MI, unsigned Op)
-      const { return 0; }
-    unsigned getBitfieldInvertedMaskOpValue(const MachineInstr &MI,
-                                            unsigned Op) const { return 0; }
-    uint32_t getLdStSORegOpValue(const MachineInstr &MI, unsigned OpIdx)
-      const { return 0; }
-
-    unsigned getAddrModeImm12OpValue(const MachineInstr &MI, unsigned Op)
-      const {
-      // {17-13} = reg
-      // {12}    = (U)nsigned (add == '1', sub == '0')
-      // {11-0}  = imm12
-      const MachineOperand &MO  = MI.getOperand(Op);
-      const MachineOperand &MO1 = MI.getOperand(Op + 1);
-      if (!MO.isReg()) {
-        emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry);
-        return 0;
-      }
-      unsigned Reg = II->getRegisterInfo().getEncodingValue(MO.getReg());
-      int32_t Imm12 = MO1.getImm();
-      uint32_t Binary;
-      Binary = Imm12 & 0xfff;
-      if (Imm12 >= 0)
-        Binary |= (1 << 12);
-      Binary |= (Reg << 13);
-      return Binary;
-    }
-
-    unsigned getHiLo16ImmOpValue(const MachineInstr &MI, unsigned Op) const {
-      return 0;
-    }
-
-    uint32_t getAddrMode2OffsetOpValue(const MachineInstr &MI, unsigned OpIdx)
-      const { return 0;}
-    uint32_t getPostIdxRegOpValue(const MachineInstr &MI, unsigned OpIdx)
-      const { return 0;}
-    uint32_t getAddrMode3OffsetOpValue(const MachineInstr &MI, unsigned OpIdx)
-      const { return 0;}
-    uint32_t getAddrMode3OpValue(const MachineInstr &MI, unsigned Op)
-      const { return 0; }
-    uint32_t getAddrModeThumbSPOpValue(const MachineInstr &MI, unsigned Op)
-      const { return 0; }
-    uint32_t getAddrModeISOpValue(const MachineInstr &MI, unsigned Op)
-      const { return 0; }
-    uint32_t getAddrModePCOpValue(const MachineInstr &MI, unsigned Op)
-      const { return 0; }
-    uint32_t getAddrMode5OpValue(const MachineInstr &MI, unsigned Op) const {
-      // {17-13} = reg
-      // {12}    = (U)nsigned (add == '1', sub == '0')
-      // {11-0}  = imm12
-      const MachineOperand &MO  = MI.getOperand(Op);
-      const MachineOperand &MO1 = MI.getOperand(Op + 1);
-      if (!MO.isReg()) {
-        emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry);
-        return 0;
-      }
-      unsigned Reg = II->getRegisterInfo().getEncodingValue(MO.getReg());
-      int32_t Imm12 = MO1.getImm();
-
-      // Special value for #-0
-      if (Imm12 == INT32_MIN)
-        Imm12 = 0;
-
-      // Immediate is always encoded as positive. The 'U' bit controls add vs
-      // sub.
-      bool isAdd = true;
-      if (Imm12 < 0) {
-        Imm12 = -Imm12;
-        isAdd = false;
-      }
-
-      uint32_t Binary = Imm12 & 0xfff;
-      if (isAdd)
-        Binary |= (1 << 12);
-      Binary |= (Reg << 13);
-      return Binary;
-    }
-    unsigned getNEONVcvtImm32OpValue(const MachineInstr &MI, unsigned Op)
-      const { return 0; }
-
-    unsigned getRegisterListOpValue(const MachineInstr &MI, unsigned Op)
-      const { return 0; }
-
-    unsigned getShiftRight8Imm(const MachineInstr &MI, unsigned Op)
-      const { return 0; }
-    unsigned getShiftRight16Imm(const MachineInstr &MI, unsigned Op)
-      const { return 0; }
-    unsigned getShiftRight32Imm(const MachineInstr &MI, unsigned Op)
-      const { return 0; }
-    unsigned getShiftRight64Imm(const MachineInstr &MI, unsigned Op)
-      const { return 0; }
-
-    /// getMovi32Value - Return binary encoding of operand for movw/movt. If the
-    /// machine operand requires relocation, record the relocation and return
-    /// zero.
-    unsigned getMovi32Value(const MachineInstr &MI,const MachineOperand &MO,
-                            unsigned Reloc);
-
-    /// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value.
-    ///
-    unsigned getShiftOp(unsigned Imm) const ;
-
-    /// Routines that handle operands which add machine relocations which are
-    /// fixed up by the relocation stage.
-    void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
-                           bool MayNeedFarStub,  bool Indirect,
-                           intptr_t ACPV = 0) const;
-    void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const;
-    void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const;
-    void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const;
-    void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc,
-                               intptr_t JTBase = 0) const;
-    unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) const;
-    unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) const;
-    unsigned encodeVFPRm(const MachineInstr &MI, unsigned OpIdx) const;
-    unsigned encodeNEONRd(const MachineInstr &MI, unsigned OpIdx) const;
-    unsigned encodeNEONRn(const MachineInstr &MI, unsigned OpIdx) const;
-    unsigned encodeNEONRm(const MachineInstr &MI, unsigned OpIdx) const;
-  };
-}
-
-char ARMCodeEmitter::ID = 0;
-
-/// createARMJITCodeEmitterPass - Return a pass that emits the collected ARM
-/// code to the specified MCE object.
-FunctionPass *llvm::createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
-                                                JITCodeEmitter &JCE) {
-  return new ARMCodeEmitter(TM, JCE);
-}
-
-bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
-  TargetMachine &Target = const_cast<TargetMachine&>(MF.getTarget());
-
-  assert((Target.getRelocationModel() != Reloc::Default ||
-          Target.getRelocationModel() != Reloc::Static) &&
-         "JIT relocation model must be set to static or default!");
-
-  JTI = static_cast<ARMJITInfo*>(Target.getJITInfo());
-  II = static_cast<const ARMBaseInstrInfo*>(Target.getInstrInfo());
-  TD = Target.getDataLayout();
-
-  Subtarget = &TM.getSubtarget<ARMSubtarget>();
-  MCPEs = &MF.getConstantPool()->getConstants();
-  MJTEs = nullptr;
-  if (MF.getJumpTableInfo()) MJTEs = &MF.getJumpTableInfo()->getJumpTables();
-  IsPIC = TM.getRelocationModel() == Reloc::PIC_;
-  IsThumb = MF.getInfo<ARMFunctionInfo>()->isThumbFunction();
-  JTI->Initialize(MF, IsPIC);
-  MMI = &getAnalysis<MachineModuleInfo>();
-  MCE.setModuleInfo(MMI);
-
-  do {
-    DEBUG(errs() << "JITTing function '"
-          << MF.getName() << "'\n");
-    MCE.startFunction(MF);
-    for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
-         MBB != E; ++MBB) {
-      MCE.StartMachineBasicBlock(MBB);
-      for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
-           I != E; ++I)
-        emitInstruction(*I);
-    }
-  } while (MCE.finishFunction(MF));
-
-  return false;
-}
-
-/// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value.
-///
-unsigned ARMCodeEmitter::getShiftOp(unsigned Imm) const {
-  switch (ARM_AM::getAM2ShiftOpc(Imm)) {
-  default: llvm_unreachable("Unknown shift opc!");
-  case ARM_AM::asr: return 2;
-  case ARM_AM::lsl: return 0;
-  case ARM_AM::lsr: return 1;
-  case ARM_AM::ror:
-  case ARM_AM::rrx: return 3;
-  }
-}
-
-/// getMovi32Value - Return binary encoding of operand for movw/movt. If the
-/// machine operand requires relocation, record the relocation and return zero.
-unsigned ARMCodeEmitter::getMovi32Value(const MachineInstr &MI,
-                                        const MachineOperand &MO,
-                                        unsigned Reloc) {
-  assert(((Reloc == ARM::reloc_arm_movt) || (Reloc == ARM::reloc_arm_movw))
-      && "Relocation to this function should be for movt or movw");
-
-  if (MO.isImm())
-    return static_cast<unsigned>(MO.getImm());
-  else if (MO.isGlobal())
-    emitGlobalAddress(MO.getGlobal(), Reloc, true, false);
-  else if (MO.isSymbol())
-    emitExternalSymbolAddress(MO.getSymbolName(), Reloc);
-  else if (MO.isMBB())
-    emitMachineBasicBlock(MO.getMBB(), Reloc);
-  else {
-#ifndef NDEBUG
-    errs() << MO;
-#endif
-    llvm_unreachable("Unsupported operand type for movw/movt");
-  }
-  return 0;
-}
-
-/// getMachineOpValue - Return binary encoding of operand. If the machine
-/// operand requires relocation, record the relocation and return zero.
-unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI,
-                                           const MachineOperand &MO) const {
-  if (MO.isReg())
-    return II->getRegisterInfo().getEncodingValue(MO.getReg());
-  else if (MO.isImm())
-    return static_cast<unsigned>(MO.getImm());
-  else if (MO.isGlobal())
-    emitGlobalAddress(MO.getGlobal(), ARM::reloc_arm_branch, true, false);
-  else if (MO.isSymbol())
-    emitExternalSymbolAddress(MO.getSymbolName(), ARM::reloc_arm_branch);
-  else if (MO.isCPI()) {
-    const MCInstrDesc &MCID = MI.getDesc();
-    // For VFP load, the immediate offset is multiplied by 4.
-    unsigned Reloc =  ((MCID.TSFlags & ARMII::FormMask) == ARMII::VFPLdStFrm)
-      ? ARM::reloc_arm_vfp_cp_entry : ARM::reloc_arm_cp_entry;
-    emitConstPoolAddress(MO.getIndex(), Reloc);
-  } else if (MO.isJTI())
-    emitJumpTableAddress(MO.getIndex(), ARM::reloc_arm_relative);
-  else if (MO.isMBB())
-    emitMachineBasicBlock(MO.getMBB(), ARM::reloc_arm_branch);
-  else
-    llvm_unreachable("Unable to encode MachineOperand!");
-  return 0;
-}
-
-/// emitGlobalAddress - Emit the specified address to the code stream.
-///
-void ARMCodeEmitter::emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
-                                       bool MayNeedFarStub, bool Indirect,
-                                       intptr_t ACPV) const {
-  MachineRelocation MR = Indirect
-    ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc,
-                                           const_cast<GlobalValue *>(GV),
-                                           ACPV, MayNeedFarStub)
-    : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
-                               const_cast<GlobalValue *>(GV), ACPV,
-                               MayNeedFarStub);
-  MCE.addRelocation(MR);
-}
-
-/// emitExternalSymbolAddress - Arrange for the address of an external symbol to
-/// be emitted to the current location in the function, and allow it to be PC
-/// relative.
-void ARMCodeEmitter::
-emitExternalSymbolAddress(const char *ES, unsigned Reloc) const {
-  MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
-                                                 Reloc, ES));
-}
-
-/// emitConstPoolAddress - Arrange for the address of an constant pool
-/// to be emitted to the current location in the function, and allow it to be PC
-/// relative.
-void ARMCodeEmitter::emitConstPoolAddress(unsigned CPI, unsigned Reloc) const {
-  // Tell JIT emitter we'll resolve the address.
-  MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
-                                                    Reloc, CPI, 0, true));
-}
-
-/// emitJumpTableAddress - Arrange for the address of a jump table to
-/// be emitted to the current location in the function, and allow it to be PC
-/// relative.
-void ARMCodeEmitter::
-emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const {
-  MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
-                                                    Reloc, JTIndex, 0, true));
-}
-
-/// emitMachineBasicBlock - Emit the specified address basic block.
-void ARMCodeEmitter::emitMachineBasicBlock(MachineBasicBlock *BB,
-                                           unsigned Reloc,
-                                           intptr_t JTBase) const {
-  MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
-                                             Reloc, BB, JTBase));
-}
-
-void ARMCodeEmitter::emitWordLE(unsigned Binary) {
-  DEBUG(errs() << "  0x";
-        errs().write_hex(Binary) << "\n");
-  MCE.emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitDWordLE(uint64_t Binary) {
-  DEBUG(errs() << "  0x";
-        errs().write_hex(Binary) << "\n");
-  MCE.emitDWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) {
-  DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << MI);
-
-  MCE.processDebugLoc(MI.getDebugLoc(), true);
-
-  ++NumEmitted;  // Keep track of the # of mi's emitted
-  switch (MI.getDesc().TSFlags & ARMII::FormMask) {
-  default: {
-    llvm_unreachable("Unhandled instruction encoding format!");
-  }
-  case ARMII::MiscFrm:
-    if (MI.getOpcode() == ARM::LEApcrelJT) {
-      // Materialize jumptable address.
-      emitLEApcrelJTInstruction(MI);
-      break;
-    }
-    llvm_unreachable("Unhandled instruction encoding!");
-  case ARMII::Pseudo:
-    emitPseudoInstruction(MI);
-    break;
-  case ARMII::DPFrm:
-  case ARMII::DPSoRegFrm:
-    emitDataProcessingInstruction(MI);
-    break;
-  case ARMII::LdFrm:
-  case ARMII::StFrm:
-    emitLoadStoreInstruction(MI);
-    break;
-  case ARMII::LdMiscFrm:
-  case ARMII::StMiscFrm:
-    emitMiscLoadStoreInstruction(MI);
-    break;
-  case ARMII::LdStMulFrm:
-    emitLoadStoreMultipleInstruction(MI);
-    break;
-  case ARMII::MulFrm:
-    emitMulFrmInstruction(MI);
-    break;
-  case ARMII::ExtFrm:
-    emitExtendInstruction(MI);
-    break;
-  case ARMII::ArithMiscFrm:
-    emitMiscArithInstruction(MI);
-    break;
-  case ARMII::SatFrm:
-    emitSaturateInstruction(MI);
-    break;
-  case ARMII::BrFrm:
-    emitBranchInstruction(MI);
-    break;
-  case ARMII::BrMiscFrm:
-    emitMiscBranchInstruction(MI);
-    break;
-  // VFP instructions.
-  case ARMII::VFPUnaryFrm:
-  case ARMII::VFPBinaryFrm:
-    emitVFPArithInstruction(MI);
-    break;
-  case ARMII::VFPConv1Frm:
-  case ARMII::VFPConv2Frm:
-  case ARMII::VFPConv3Frm:
-  case ARMII::VFPConv4Frm:
-  case ARMII::VFPConv5Frm:
-    emitVFPConversionInstruction(MI);
-    break;
-  case ARMII::VFPLdStFrm:
-    emitVFPLoadStoreInstruction(MI);
-    break;
-  case ARMII::VFPLdStMulFrm:
-    emitVFPLoadStoreMultipleInstruction(MI);
-    break;
-
-  // NEON instructions.
-  case ARMII::NGetLnFrm:
-  case ARMII::NSetLnFrm:
-    emitNEONLaneInstruction(MI);
-    break;
-  case ARMII::NDupFrm:
-    emitNEONDupInstruction(MI);
-    break;
-  case ARMII::N1RegModImmFrm:
-    emitNEON1RegModImmInstruction(MI);
-    break;
-  case ARMII::N2RegFrm:
-    emitNEON2RegInstruction(MI);
-    break;
-  case ARMII::N3RegFrm:
-    emitNEON3RegInstruction(MI);
-    break;
-  }
-  MCE.processDebugLoc(MI.getDebugLoc(), false);
-}
-
-void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) {
-  unsigned CPI = MI.getOperand(0).getImm();       // CP instruction index.
-  unsigned CPIndex = MI.getOperand(1).getIndex(); // Actual cp entry index.
-  const MachineConstantPoolEntry &MCPE = (*MCPEs)[CPIndex];
-
-  // Remember the CONSTPOOL_ENTRY address for later relocation.
-  JTI->addConstantPoolEntryAddr(CPI, MCE.getCurrentPCValue());
-
-  // Emit constpool island entry. In most cases, the actual values will be
-  // resolved and relocated after code emission.
-  if (MCPE.isMachineConstantPoolEntry()) {
-    ARMConstantPoolValue *ACPV =
-      static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
-
-    DEBUG(errs() << "  ** ARM constant pool #" << CPI << " @ "
-          << (void*)MCE.getCurrentPCValue() << " " << *ACPV << '\n');
-
-    assert(ACPV->isGlobalValue() && "unsupported constant pool value");
-    const GlobalValue *GV = cast<ARMConstantPoolConstant>(ACPV)->getGV();
-    if (GV) {
-      Reloc::Model RelocM = TM.getRelocationModel();
-      emitGlobalAddress(GV, ARM::reloc_arm_machine_cp_entry,
-                        isa<Function>(GV),
-                        Subtarget->GVIsIndirectSymbol(GV, RelocM),
-                        (intptr_t)ACPV);
-    } else  {
-      const char *Sym = cast<ARMConstantPoolSymbol>(ACPV)->getSymbol();
-      emitExternalSymbolAddress(Sym, ARM::reloc_arm_absolute);
-    }
-    emitWordLE(0);
-  } else {
-    const Constant *CV = MCPE.Val.ConstVal;
-
-    DEBUG({
-        errs() << "  ** Constant pool #" << CPI << " @ "
-               << (void*)MCE.getCurrentPCValue() << " ";
-        if (const Function *F = dyn_cast<Function>(CV))
-          errs() << F->getName();
-        else
-          errs() << *CV;
-        errs() << '\n';
-      });
-
-    if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
-      emitGlobalAddress(GV, ARM::reloc_arm_absolute, isa<Function>(GV), false);
-      emitWordLE(0);
-    } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
-      uint32_t Val = uint32_t(*CI->getValue().getRawData());
-      emitWordLE(Val);
-    } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
-      if (CFP->getType()->isFloatTy())
-        emitWordLE(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
-      else if (CFP->getType()->isDoubleTy())
-        emitDWordLE(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
-      else {
-        llvm_unreachable("Unable to handle this constantpool entry!");
-      }
-    } else {
-      llvm_unreachable("Unable to handle this constantpool entry!");
-    }
-  }
-}
-
-void ARMCodeEmitter::emitMOVi32immInstruction(const MachineInstr &MI) {
-  const MachineOperand &MO0 = MI.getOperand(0);
-  const MachineOperand &MO1 = MI.getOperand(1);
-
-  // Emit the 'movw' instruction.
-  unsigned Binary = 0x30 << 20;  // mov: Insts{27-20} = 0b00110000
-
-  unsigned Lo16 = getMovi32Value(MI, MO1, ARM::reloc_arm_movw) & 0xFFFF;
-
-  // Set the conditional execution predicate.
-  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
-  // Encode Rd.
-  Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
-
-  // Encode imm16 as imm4:imm12
-  Binary |= Lo16 & 0xFFF; // Insts{11-0} = imm12
-  Binary |= ((Lo16 >> 12) & 0xF) << 16; // Insts{19-16} = imm4
-  emitWordLE(Binary);
-
-  unsigned Hi16 = getMovi32Value(MI, MO1, ARM::reloc_arm_movt) >> 16;
-  // Emit the 'movt' instruction.
-  Binary = 0x34 << 20; // movt: Insts{27-20} = 0b00110100
-
-  // Set the conditional execution predicate.
-  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
-  // Encode Rd.
-  Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
-
-  // Encode imm16 as imm4:imm1, same as movw above.
-  Binary |= Hi16 & 0xFFF;
-  Binary |= ((Hi16 >> 12) & 0xF) << 16;
-  emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitMOVi2piecesInstruction(const MachineInstr &MI) {
-  const MachineOperand &MO0 = MI.getOperand(0);
-  const MachineOperand &MO1 = MI.getOperand(1);
-  assert(MO1.isImm() && ARM_AM::isSOImmTwoPartVal(MO1.getImm()) &&
-                                                  "Not a valid so_imm value!");
-  unsigned V1 = ARM_AM::getSOImmTwoPartFirst(MO1.getImm());
-  unsigned V2 = ARM_AM::getSOImmTwoPartSecond(MO1.getImm());
-
-  // Emit the 'mov' instruction.
-  unsigned Binary = 0xd << 21;  // mov: Insts{24-21} = 0b1101
-
-  // Set the conditional execution predicate.
-  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
-  // Encode Rd.
-  Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
-
-  // Encode so_imm.
-  // Set bit I(25) to identify this is the immediate form of <shifter_op>
-  Binary |= 1 << ARMII::I_BitShift;
-  Binary |= getMachineSoImmOpValue(V1);
-  emitWordLE(Binary);
-
-  // Now the 'orr' instruction.
-  Binary = 0xc << 21;  // orr: Insts{24-21} = 0b1100
-
-  // Set the conditional execution predicate.
-  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
-  // Encode Rd.
-  Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
-
-  // Encode Rn.
-  Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRnShift;
-
-  // Encode so_imm.
-  // Set bit I(25) to identify this is the immediate form of <shifter_op>
-  Binary |= 1 << ARMII::I_BitShift;
-  Binary |= getMachineSoImmOpValue(V2);
-  emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitLEApcrelJTInstruction(const MachineInstr &MI) {
-  // It's basically add r, pc, (LJTI - $+8)
-
-  const MCInstrDesc &MCID = MI.getDesc();
-
-  // Emit the 'add' instruction.
-  unsigned Binary = 0x4 << 21;  // add: Insts{24-21} = 0b0100
-
-  // Set the conditional execution predicate
-  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
-  // Encode S bit if MI modifies CPSR.
-  Binary |= getAddrModeSBit(MI, MCID);
-
-  // Encode Rd.
-  Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
-
-  // Encode Rn which is PC.
-  Binary |= II->getRegisterInfo().getEncodingValue(ARM::PC) << ARMII::RegRnShift;
-
-  // Encode the displacement.
-  Binary |= 1 << ARMII::I_BitShift;
-  emitJumpTableAddress(MI.getOperand(1).getIndex(), ARM::reloc_arm_jt_base);
-
-  emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitPseudoMoveInstruction(const MachineInstr &MI) {
-  unsigned Opcode = MI.getDesc().Opcode;
-
-  // Part of binary is determined by TableGn.
-  unsigned Binary = getBinaryCodeForInstr(MI);
-
-  // Set the conditional execution predicate
-  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
-  // Encode S bit if MI modifies CPSR.
-  if (Opcode == ARM::MOVsrl_flag || Opcode == ARM::MOVsra_flag)
-    Binary |= 1 << ARMII::S_BitShift;
-
-  // Encode register def if there is one.
-  Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
-
-  // Encode the shift operation.
-  switch (Opcode) {
-  default: break;
-  case ARM::RRX:
-    // rrx
-    Binary |= 0x6 << 4;
-    break;
-  case ARM::MOVsrl_flag:
-    // lsr #1
-    Binary |= (0x2 << 4) | (1 << 7);
-    break;
-  case ARM::MOVsra_flag:
-    // asr #1
-    Binary |= (0x4 << 4) | (1 << 7);
-    break;
-  }
-
-  // Encode register Rm.
-  Binary |= getMachineOpValue(MI, 1);
-
-  emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::addPCLabel(unsigned LabelID) {
-  DEBUG(errs() << "  ** LPC" << LabelID << " @ "
-        << (void*)MCE.getCurrentPCValue() << '\n');
-  JTI->addPCLabelAddr(LabelID, MCE.getCurrentPCValue());
-}
-
-void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) {
-  unsigned Opcode = MI.getDesc().Opcode;
-  switch (Opcode) {
-  default:
-    llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction");
-  case ARM::BX_CALL:
-  case ARM::BMOVPCRX_CALL: {
-    // First emit mov lr, pc
-    unsigned Binary = 0x01a0e00f;
-    Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-    emitWordLE(Binary);
-
-    // and then emit the branch.
-    emitMiscBranchInstruction(MI);
-    break;
-  }
-  case TargetOpcode::INLINEASM: {
-    // We allow inline assembler nodes with empty bodies - they can
-    // implicitly define registers, which is ok for JIT.
-    if (MI.getOperand(0).getSymbolName()[0]) {
-      report_fatal_error("JIT does not support inline asm!");
-    }
-    break;
-  }
-  case TargetOpcode::CFI_INSTRUCTION:
-    break;
-  case TargetOpcode::EH_LABEL:
-    MCE.emitLabel(MI.getOperand(0).getMCSymbol());
-    break;
-  case TargetOpcode::IMPLICIT_DEF:
-  case TargetOpcode::KILL:
-    // Do nothing.
-    break;
-  case ARM::CONSTPOOL_ENTRY:
-    emitConstPoolInstruction(MI);
-    break;
-  case ARM::PICADD: {
-    // Remember of the address of the PC label for relocation later.
-    addPCLabel(MI.getOperand(2).getImm());
-    // PICADD is just an add instruction that implicitly read pc.
-    emitDataProcessingInstruction(MI, 0, ARM::PC);
-    break;
-  }
-  case ARM::PICLDR:
-  case ARM::PICLDRB:
-  case ARM::PICSTR:
-  case ARM::PICSTRB: {
-    // Remember of the address of the PC label for relocation later.
-    addPCLabel(MI.getOperand(2).getImm());
-    // These are just load / store instructions that implicitly read pc.
-    emitLoadStoreInstruction(MI, 0, ARM::PC);
-    break;
-  }
-  case ARM::PICLDRH:
-  case ARM::PICLDRSH:
-  case ARM::PICLDRSB:
-  case ARM::PICSTRH: {
-    // Remember of the address of the PC label for relocation later.
-    addPCLabel(MI.getOperand(2).getImm());
-    // These are just load / store instructions that implicitly read pc.
-    emitMiscLoadStoreInstruction(MI, ARM::PC);
-    break;
-  }
-
-  case ARM::MOVi32imm:
-    // Two instructions to materialize a constant.
-    if (Subtarget->hasV6T2Ops())
-      emitMOVi32immInstruction(MI);
-    else
-      emitMOVi2piecesInstruction(MI);
-    break;
-
-  case ARM::LEApcrelJT:
-    // Materialize jumptable address.
-    emitLEApcrelJTInstruction(MI);
-    break;
-  case ARM::RRX:
-  case ARM::MOVsrl_flag:
-  case ARM::MOVsra_flag:
-    emitPseudoMoveInstruction(MI);
-    break;
-  }
-}
-
-unsigned ARMCodeEmitter::getMachineSoRegOpValue(const MachineInstr &MI,
-                                                const MCInstrDesc &MCID,
-                                                const MachineOperand &MO,
-                                                unsigned OpIdx) {
-  unsigned Binary = getMachineOpValue(MI, MO);
-
-  const MachineOperand &MO1 = MI.getOperand(OpIdx + 1);
-  const MachineOperand &MO2 = MI.getOperand(OpIdx + 2);
-  ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO2.getImm());
-
-  // Encode the shift opcode.
-  unsigned SBits = 0;
-  unsigned Rs = MO1.getReg();
-  if (Rs) {
-    // Set shift operand (bit[7:4]).
-    // LSL - 0001
-    // LSR - 0011
-    // ASR - 0101
-    // ROR - 0111
-    // RRX - 0110 and bit[11:8] clear.
-    switch (SOpc) {
-    default: llvm_unreachable("Unknown shift opc!");
-    case ARM_AM::lsl: SBits = 0x1; break;
-    case ARM_AM::lsr: SBits = 0x3; break;
-    case ARM_AM::asr: SBits = 0x5; break;
-    case ARM_AM::ror: SBits = 0x7; break;
-    case ARM_AM::rrx: SBits = 0x6; break;
-    }
-  } else {
-    // Set shift operand (bit[6:4]).
-    // LSL - 000
-    // LSR - 010
-    // ASR - 100
-    // ROR - 110
-    switch (SOpc) {
-    default: llvm_unreachable("Unknown shift opc!");
-    case ARM_AM::lsl: SBits = 0x0; break;
-    case ARM_AM::lsr: SBits = 0x2; break;
-    case ARM_AM::asr: SBits = 0x4; break;
-    case ARM_AM::ror: SBits = 0x6; break;
-    }
-  }
-  Binary |= SBits << 4;
-  if (SOpc == ARM_AM::rrx)
-    return Binary;
-
-  // Encode the shift operation Rs or shift_imm (except rrx).
-  if (Rs) {
-    // Encode Rs bit[11:8].
-    assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0);
-    return Binary | (II->getRegisterInfo().getEncodingValue(Rs) << ARMII::RegRsShift);
-  }
-
-  // Encode shift_imm bit[11:7].
-  return Binary | ARM_AM::getSORegOffset(MO2.getImm()) << 7;
-}
-
-unsigned ARMCodeEmitter::getMachineSoImmOpValue(unsigned SoImm) {
-  int SoImmVal = ARM_AM::getSOImmVal(SoImm);
-  assert(SoImmVal != -1 && "Not a valid so_imm value!");
-
-  // Encode rotate_imm.
-  unsigned Binary = (ARM_AM::getSOImmValRot((unsigned)SoImmVal) >> 1)
-    << ARMII::SoRotImmShift;
-
-  // Encode immed_8.
-  Binary |= ARM_AM::getSOImmValImm((unsigned)SoImmVal);
-  return Binary;
-}
-
-unsigned ARMCodeEmitter::getAddrModeSBit(const MachineInstr &MI,
-                                         const MCInstrDesc &MCID) const {
-  for (unsigned i = MI.getNumOperands(), e = MCID.getNumOperands(); i >= e;--i){
-    const MachineOperand &MO = MI.getOperand(i-1);
-    if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)
-      return 1 << ARMII::S_BitShift;
-  }
-  return 0;
-}
-
-void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
-                                                   unsigned ImplicitRd,
-                                                   unsigned ImplicitRn) {
-  const MCInstrDesc &MCID = MI.getDesc();
-
-  // Part of binary is determined by TableGn.
-  unsigned Binary = getBinaryCodeForInstr(MI);
-
-  // Set the conditional execution predicate
-  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
-  // Encode S bit if MI modifies CPSR.
-  Binary |= getAddrModeSBit(MI, MCID);
-
-  // Encode register def if there is one.
-  unsigned NumDefs = MCID.getNumDefs();
-  unsigned OpIdx = 0;
-  if (NumDefs)
-    Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
-  else if (ImplicitRd)
-    // Special handling for implicit use (e.g. PC).
-    Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRd) << ARMII::RegRdShift);
-
-  if (MCID.Opcode == ARM::MOVi16) {
-      // Get immediate from MI.
-      unsigned Lo16 = getMovi32Value(MI, MI.getOperand(OpIdx),
-                      ARM::reloc_arm_movw);
-      // Encode imm which is the same as in emitMOVi32immInstruction().
-      Binary |= Lo16 & 0xFFF;
-      Binary |= ((Lo16 >> 12) & 0xF) << 16;
-      emitWordLE(Binary);
-      return;
-  } else if(MCID.Opcode == ARM::MOVTi16) {
-      unsigned Hi16 = (getMovi32Value(MI, MI.getOperand(OpIdx),
-                       ARM::reloc_arm_movt) >> 16);
-      Binary |= Hi16 & 0xFFF;
-      Binary |= ((Hi16 >> 12) & 0xF) << 16;
-      emitWordLE(Binary);
-      return;
-  } else if ((MCID.Opcode == ARM::BFC) || (MCID.Opcode == ARM::BFI)) {
-      uint32_t v = ~MI.getOperand(2).getImm();
-      int32_t lsb = countTrailingZeros(v);
-      int32_t msb = (32 - countLeadingZeros(v)) - 1;
-      // Instr{20-16} = msb, Instr{11-7} = lsb
-      Binary |= (msb & 0x1F) << 16;
-      Binary |= (lsb & 0x1F) << 7;
-      emitWordLE(Binary);
-      return;
-  } else if ((MCID.Opcode == ARM::UBFX) || (MCID.Opcode == ARM::SBFX)) {
-      // Encode Rn in Instr{0-3}
-      Binary |= getMachineOpValue(MI, OpIdx++);
-
-      uint32_t lsb = MI.getOperand(OpIdx++).getImm();
-      uint32_t widthm1 = MI.getOperand(OpIdx++).getImm() - 1;
-
-      // Instr{20-16} = widthm1, Instr{11-7} = lsb
-      Binary |= (widthm1 & 0x1F) << 16;
-      Binary |= (lsb & 0x1F) << 7;
-      emitWordLE(Binary);
-      return;
-  }
-
-  // If this is a two-address operand, skip it. e.g. MOVCCr operand 1.
-  if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
-    ++OpIdx;
-
-  // Encode first non-shifter register operand if there is one.
-  bool isUnary = MCID.TSFlags & ARMII::UnaryDP;
-  if (!isUnary) {
-    if (ImplicitRn)
-      // Special handling for implicit use (e.g. PC).
-      Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift);
-    else {
-      Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRnShift;
-      ++OpIdx;
-    }
-  }
-
-  // Encode shifter operand.
-  const MachineOperand &MO = MI.getOperand(OpIdx);
-  if ((MCID.TSFlags & ARMII::FormMask) == ARMII::DPSoRegFrm) {
-    // Encode SoReg.
-    emitWordLE(Binary | getMachineSoRegOpValue(MI, MCID, MO, OpIdx));
-    return;
-  }
-
-  if (MO.isReg()) {
-    // Encode register Rm.
-    emitWordLE(Binary | II->getRegisterInfo().getEncodingValue(MO.getReg()));
-    return;
-  }
-
-  // Encode so_imm.
-  Binary |= getMachineSoImmOpValue((unsigned)MO.getImm());
-
-  emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI,
-                                              unsigned ImplicitRd,
-                                              unsigned ImplicitRn) {
-  const MCInstrDesc &MCID = MI.getDesc();
-  unsigned Form = MCID.TSFlags & ARMII::FormMask;
-  bool IsPrePost = (MCID.TSFlags & ARMII::IndexModeMask) != 0;
-
-  // Part of binary is determined by TableGn.
-  unsigned Binary = getBinaryCodeForInstr(MI);
-
-  // If this is an LDRi12, STRi12 or LDRcp, nothing more needs be done.
-  if (MI.getOpcode() == ARM::LDRi12 || MI.getOpcode() == ARM::LDRcp ||
-      MI.getOpcode() == ARM::STRi12) {
-    emitWordLE(Binary);
-    return;
-  }
-
-  // Set the conditional execution predicate
-  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
-  unsigned OpIdx = 0;
-
-  // Operand 0 of a pre- and post-indexed store is the address base
-  // writeback. Skip it.
-  bool Skipped = false;
-  if (IsPrePost && Form == ARMII::StFrm) {
-    ++OpIdx;
-    Skipped = true;
-  }
-
-  // Set first operand
-  if (ImplicitRd)
-    // Special handling for implicit use (e.g. PC).
-    Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRd) << ARMII::RegRdShift);
-  else
-    Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
-
-  // Set second operand
-  if (ImplicitRn)
-    // Special handling for implicit use (e.g. PC).
-    Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift);
-  else
-    Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
-
-  // If this is a two-address operand, skip it. e.g. LDR_PRE.
-  if (!Skipped && MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
-    ++OpIdx;
-
-  const MachineOperand &MO2 = MI.getOperand(OpIdx);
-  unsigned AM2Opc = (ImplicitRn == ARM::PC)
-    ? 0 : MI.getOperand(OpIdx+1).getImm();
-
-  // Set bit U(23) according to sign of immed value (positive or negative).
-  Binary |= ((ARM_AM::getAM2Op(AM2Opc) == ARM_AM::add ? 1 : 0) <<
-             ARMII::U_BitShift);
-  if (!MO2.getReg()) { // is immediate
-    if (ARM_AM::getAM2Offset(AM2Opc))
-      // Set the value of offset_12 field
-      Binary |= ARM_AM::getAM2Offset(AM2Opc);
-    emitWordLE(Binary);
-    return;
-  }
-
-  // Set bit I(25), because this is not in immediate encoding.
-  Binary |= 1 << ARMII::I_BitShift;
-  assert(TargetRegisterInfo::isPhysicalRegister(MO2.getReg()));
-  // Set bit[3:0] to the corresponding Rm register
-  Binary |= II->getRegisterInfo().getEncodingValue(MO2.getReg());
-
-  // If this instr is in scaled register offset/index instruction, set
-  // shift_immed(bit[11:7]) and shift(bit[6:5]) fields.
-  if (unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc)) {
-    Binary |= getShiftOp(AM2Opc) << ARMII::ShiftImmShift;  // shift
-    Binary |= ShImm              << ARMII::ShiftShift;     // shift_immed
-  }
-
-  emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI,
-                                                  unsigned ImplicitRn) {
-  const MCInstrDesc &MCID = MI.getDesc();
-  unsigned Form = MCID.TSFlags & ARMII::FormMask;
-  bool IsPrePost = (MCID.TSFlags & ARMII::IndexModeMask) != 0;
-
-  // Part of binary is determined by TableGn.
-  unsigned Binary = getBinaryCodeForInstr(MI);
-
-  // Set the conditional execution predicate
-  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
-  unsigned OpIdx = 0;
-
-  // Operand 0 of a pre- and post-indexed store is the address base
-  // writeback. Skip it.
-  bool Skipped = false;
-  if (IsPrePost && Form == ARMII::StMiscFrm) {
-    ++OpIdx;
-    Skipped = true;
-  }
-
-  // Set first operand
-  Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
-
-  // Skip LDRD and STRD's second operand.
-  if (MCID.Opcode == ARM::LDRD || MCID.Opcode == ARM::STRD)
-    ++OpIdx;
-
-  // Set second operand
-  if (ImplicitRn)
-    // Special handling for implicit use (e.g. PC).
-    Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift);
-  else
-    Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
-
-  // If this is a two-address operand, skip it. e.g. LDRH_POST.
-  if (!Skipped && MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
-    ++OpIdx;
-
-  const MachineOperand &MO2 = MI.getOperand(OpIdx);
-  unsigned AM3Opc = (ImplicitRn == ARM::PC)
-    ? 0 : MI.getOperand(OpIdx+1).getImm();
-
-  // Set bit U(23) according to sign of immed value (positive or negative)
-  Binary |= ((ARM_AM::getAM3Op(AM3Opc) == ARM_AM::add ? 1 : 0) <<
-             ARMII::U_BitShift);
-
-  // If this instr is in register offset/index encoding, set bit[3:0]
-  // to the corresponding Rm register.
-  if (MO2.getReg()) {
-    Binary |= II->getRegisterInfo().getEncodingValue(MO2.getReg());
-    emitWordLE(Binary);
-    return;
-  }
-
-  // This instr is in immediate offset/index encoding, set bit 22 to 1.
-  Binary |= 1 << ARMII::AM3_I_BitShift;
-  if (unsigned ImmOffs = ARM_AM::getAM3Offset(AM3Opc)) {
-    // Set operands
-    Binary |= (ImmOffs >> 4) << ARMII::ImmHiShift;  // immedH
-    Binary |= (ImmOffs & 0xF);                      // immedL
-  }
-
-  emitWordLE(Binary);
-}
-
-static unsigned getAddrModeUPBits(unsigned Mode) {
-  unsigned Binary = 0;
-
-  // Set addressing mode by modifying bits U(23) and P(24)
-  // IA - Increment after  - bit U = 1 and bit P = 0
-  // IB - Increment before - bit U = 1 and bit P = 1
-  // DA - Decrement after  - bit U = 0 and bit P = 0
-  // DB - Decrement before - bit U = 0 and bit P = 1
-  switch (Mode) {
-  default: llvm_unreachable("Unknown addressing sub-mode!");
-  case ARM_AM::da:                                     break;
-  case ARM_AM::db: Binary |= 0x1 << ARMII::P_BitShift; break;
-  case ARM_AM::ia: Binary |= 0x1 << ARMII::U_BitShift; break;
-  case ARM_AM::ib: Binary |= 0x3 << ARMII::U_BitShift; break;
-  }
-
-  return Binary;
-}
-
-void ARMCodeEmitter::emitLoadStoreMultipleInstruction(const MachineInstr &MI) {
-  const MCInstrDesc &MCID = MI.getDesc();
-  bool IsUpdating = (MCID.TSFlags & ARMII::IndexModeMask) != 0;
-
-  // Part of binary is determined by TableGn.
-  unsigned Binary = getBinaryCodeForInstr(MI);
-
-  // Set the conditional execution predicate
-  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
-  // Skip operand 0 of an instruction with base register update.
-  unsigned OpIdx = 0;
-  if (IsUpdating)
-    ++OpIdx;
-
-  // Set base address operand
-  Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
-
-  // Set addressing mode by modifying bits U(23) and P(24)
-  ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(MI.getOpcode());
-  Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(Mode));
-
-  // Set bit W(21)
-  if (IsUpdating)
-    Binary |= 0x1 << ARMII::W_BitShift;
-
-  // Set registers
-  for (unsigned i = OpIdx+2, e = MI.getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = MI.getOperand(i);
-    if (!MO.isReg() || MO.isImplicit())
-      break;
-    unsigned RegNum = II->getRegisterInfo().getEncodingValue(MO.getReg());
-    assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
-           RegNum < 16);
-    Binary |= 0x1 << RegNum;
-  }
-
-  emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitMulFrmInstruction(const MachineInstr &MI) {
-  const MCInstrDesc &MCID = MI.getDesc();
-
-  // Part of binary is determined by TableGn.
-  unsigned Binary = getBinaryCodeForInstr(MI);
-
-  // Set the conditional execution predicate
-  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
-  // Encode S bit if MI modifies CPSR.
-  Binary |= getAddrModeSBit(MI, MCID);
-
-  // 32x32->64bit operations have two destination registers. The number
-  // of register definitions will tell us if that's what we're dealing with.
-  unsigned OpIdx = 0;
-  if (MCID.getNumDefs() == 2)
-    Binary |= getMachineOpValue (MI, OpIdx++) << ARMII::RegRdLoShift;
-
-  // Encode Rd
-  Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdHiShift;
-
-  // Encode Rm
-  Binary |= getMachineOpValue(MI, OpIdx++);
-
-  // Encode Rs
-  Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRsShift;
-
-  // Many multiple instructions (e.g. MLA) have three src operands. Encode
-  // it as Rn (for multiply, that's in the same offset as RdLo.
-  if (MCID.getNumOperands() > OpIdx &&
-      !MCID.OpInfo[OpIdx].isPredicate() &&
-      !MCID.OpInfo[OpIdx].isOptionalDef())
-    Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRdLoShift;
-
-  emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitExtendInstruction(const MachineInstr &MI) {
-  const MCInstrDesc &MCID = MI.getDesc();
-
-  // Part of binary is determined by TableGn.
-  unsigned Binary = getBinaryCodeForInstr(MI);
-
-  // Set the conditional execution predicate
-  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
-  unsigned OpIdx = 0;
-
-  // Encode Rd
-  Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
-
-  const MachineOperand &MO1 = MI.getOperand(OpIdx++);
-  const MachineOperand &MO2 = MI.getOperand(OpIdx);
-  if (MO2.isReg()) {
-    // Two register operand form.
-    // Encode Rn.
-    Binary |= getMachineOpValue(MI, MO1) << ARMII::RegRnShift;
-
-    // Encode Rm.
-    Binary |= getMachineOpValue(MI, MO2);
-    ++OpIdx;
-  } else {
-    Binary |= getMachineOpValue(MI, MO1);
-  }
-
-  // Encode rot imm (0, 8, 16, or 24) if it has a rotate immediate operand.
-  if (MI.getOperand(OpIdx).isImm() &&
-      !MCID.OpInfo[OpIdx].isPredicate() &&
-      !MCID.OpInfo[OpIdx].isOptionalDef())
-    Binary |= (getMachineOpValue(MI, OpIdx) / 8) << ARMII::ExtRotImmShift;
-
-  emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitMiscArithInstruction(const MachineInstr &MI) {
-  const MCInstrDesc &MCID = MI.getDesc();
-
-  // Part of binary is determined by TableGn.
-  unsigned Binary = getBinaryCodeForInstr(MI);
-
-  // Set the conditional execution predicate
-  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
-  // PKH instructions are finished at this point
-  if (MCID.Opcode == ARM::PKHBT || MCID.Opcode == ARM::PKHTB) {
-    emitWordLE(Binary);
-    return;
-  }
-
-  unsigned OpIdx = 0;
-
-  // Encode Rd
-  Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
-
-  const MachineOperand &MO = MI.getOperand(OpIdx++);
-  if (OpIdx == MCID.getNumOperands() ||
-      MCID.OpInfo[OpIdx].isPredicate() ||
-      MCID.OpInfo[OpIdx].isOptionalDef()) {
-    // Encode Rm and it's done.
-    Binary |= getMachineOpValue(MI, MO);
-    emitWordLE(Binary);
-    return;
-  }
-
-  // Encode Rn.
-  Binary |= getMachineOpValue(MI, MO) << ARMII::RegRnShift;
-
-  // Encode Rm.
-  Binary |= getMachineOpValue(MI, OpIdx++);
-
-  // Encode shift_imm.
-  unsigned ShiftAmt = MI.getOperand(OpIdx).getImm();
-  if (MCID.Opcode == ARM::PKHTB) {
-    assert(ShiftAmt != 0 && "PKHTB shift_imm is 0!");
-    if (ShiftAmt == 32)
-      ShiftAmt = 0;
-  }
-  assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!");
-  Binary |= ShiftAmt << ARMII::ShiftShift;
-
-  emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitSaturateInstruction(const MachineInstr &MI) {
-  const MCInstrDesc &MCID = MI.getDesc();
-
-  // Part of binary is determined by TableGen.
-  unsigned Binary = getBinaryCodeForInstr(MI);
-
-  // Set the conditional execution predicate
-  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
-  // Encode Rd
-  Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
-
-  // Encode saturate bit position.
-  unsigned Pos = MI.getOperand(1).getImm();
-  if (MCID.Opcode == ARM::SSAT || MCID.Opcode == ARM::SSAT16)
-    Pos -= 1;
-  assert((Pos < 16 || (Pos < 32 &&
-                       MCID.Opcode != ARM::SSAT16 &&
-                       MCID.Opcode != ARM::USAT16)) &&
-         "saturate bit position out of range");
-  Binary |= Pos << 16;
-
-  // Encode Rm
-  Binary |= getMachineOpValue(MI, 2);
-
-  // Encode shift_imm.
-  if (MCID.getNumOperands() == 4) {
-    unsigned ShiftOp = MI.getOperand(3).getImm();
-    ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp);
-    if (Opc == ARM_AM::asr)
-      Binary |= (1 << 6);
-    unsigned ShiftAmt = MI.getOperand(3).getImm();
-    if (ShiftAmt == 32 && Opc == ARM_AM::asr)
-      ShiftAmt = 0;
-    assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!");
-    Binary |= ShiftAmt << ARMII::ShiftShift;
-  }
-
-  emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitBranchInstruction(const MachineInstr &MI) {
-  const MCInstrDesc &MCID = MI.getDesc();
-
-  if (MCID.Opcode == ARM::TPsoft) {
-    llvm_unreachable("ARM::TPsoft FIXME"); // FIXME
-  }
-
-  // Part of binary is determined by TableGn.
-  unsigned Binary = getBinaryCodeForInstr(MI);
-
-  // Set the conditional execution predicate
-  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
-  // Set signed_immed_24 field
-  Binary |= getMachineOpValue(MI, 0);
-
-  emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitInlineJumpTable(unsigned JTIndex) {
-  // Remember the base address of the inline jump table.
-  uintptr_t JTBase = MCE.getCurrentPCValue();
-  JTI->addJumpTableBaseAddr(JTIndex, JTBase);
-  DEBUG(errs() << "  ** Jump Table #" << JTIndex << " @ " << (void*)JTBase
-               << '\n');
-
-  // Now emit the jump table entries.
-  const std::vector<MachineBasicBlock*> &MBBs = (*MJTEs)[JTIndex].MBBs;
-  for (unsigned i = 0, e = MBBs.size(); i != e; ++i) {
-    if (IsPIC)
-      // DestBB address - JT base.
-      emitMachineBasicBlock(MBBs[i], ARM::reloc_arm_pic_jt, JTBase);
-    else
-      // Absolute DestBB address.
-      emitMachineBasicBlock(MBBs[i], ARM::reloc_arm_absolute);
-    emitWordLE(0);
-  }
-}
-
-void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) {
-  const MCInstrDesc &MCID = MI.getDesc();
-
-  // Handle jump tables.
-  if (MCID.Opcode == ARM::BR_JTr || MCID.Opcode == ARM::BR_JTadd) {
-    // First emit a ldr pc, [] instruction.
-    emitDataProcessingInstruction(MI, ARM::PC);
-
-    // Then emit the inline jump table.
-    unsigned JTIndex =
-      (MCID.Opcode == ARM::BR_JTr)
-      ? MI.getOperand(1).getIndex() : MI.getOperand(2).getIndex();
-    emitInlineJumpTable(JTIndex);
-    return;
-  } else if (MCID.Opcode == ARM::BR_JTm) {
-    // First emit a ldr pc, [] instruction.
-    emitLoadStoreInstruction(MI, ARM::PC);
-
-    // Then emit the inline jump table.
-    emitInlineJumpTable(MI.getOperand(3).getIndex());
-    return;
-  }
-
-  // Part of binary is determined by TableGn.
-  unsigned Binary = getBinaryCodeForInstr(MI);
-
-  // Set the conditional execution predicate
-  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
-  if (MCID.Opcode == ARM::BX_RET || MCID.Opcode == ARM::MOVPCLR)
-    // The return register is LR.
-    Binary |= II->getRegisterInfo().getEncodingValue(ARM::LR);
-  else
-    // otherwise, set the return register
-    Binary |= getMachineOpValue(MI, 0);
-
-  emitWordLE(Binary);
-}
-
-unsigned ARMCodeEmitter::encodeVFPRd(const MachineInstr &MI,
-                                     unsigned OpIdx) const {
-  unsigned RegD = MI.getOperand(OpIdx).getReg();
-  unsigned Binary = 0;
-  bool isSPVFP = ARM::SPRRegClass.contains(RegD);
-  RegD = II->getRegisterInfo().getEncodingValue(RegD);
-  if (!isSPVFP)
-    Binary |=   RegD               << ARMII::RegRdShift;
-  else {
-    Binary |= ((RegD & 0x1E) >> 1) << ARMII::RegRdShift;
-    Binary |=  (RegD & 0x01)       << ARMII::D_BitShift;
-  }
-  return Binary;
-}
-
-unsigned ARMCodeEmitter::encodeVFPRn(const MachineInstr &MI,
-                                     unsigned OpIdx) const {
-  unsigned RegN = MI.getOperand(OpIdx).getReg();
-  unsigned Binary = 0;
-  bool isSPVFP = ARM::SPRRegClass.contains(RegN);
-  RegN = II->getRegisterInfo().getEncodingValue(RegN);
-  if (!isSPVFP)
-    Binary |=   RegN               << ARMII::RegRnShift;
-  else {
-    Binary |= ((RegN & 0x1E) >> 1) << ARMII::RegRnShift;
-    Binary |=  (RegN & 0x01)       << ARMII::N_BitShift;
-  }
-  return Binary;
-}
-
-unsigned ARMCodeEmitter::encodeVFPRm(const MachineInstr &MI,
-                                     unsigned OpIdx) const {
-  unsigned RegM = MI.getOperand(OpIdx).getReg();
-  unsigned Binary = 0;
-  bool isSPVFP = ARM::SPRRegClass.contains(RegM);
-  RegM = II->getRegisterInfo().getEncodingValue(RegM);
-  if (!isSPVFP)
-    Binary |=   RegM;
-  else {
-    Binary |= ((RegM & 0x1E) >> 1);
-    Binary |=  (RegM & 0x01)       << ARMII::M_BitShift;
-  }
-  return Binary;
-}
-
-void ARMCodeEmitter::emitVFPArithInstruction(const MachineInstr &MI) {
-  const MCInstrDesc &MCID = MI.getDesc();
-
-  // Part of binary is determined by TableGn.
-  unsigned Binary = getBinaryCodeForInstr(MI);
-
-  // Set the conditional execution predicate
-  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
-  unsigned OpIdx = 0;
-  assert((Binary & ARMII::D_BitShift) == 0 &&
-         (Binary & ARMII::N_BitShift) == 0 &&
-         (Binary & ARMII::M_BitShift) == 0 && "VFP encoding bug!");
-
-  // Encode Dd / Sd.
-  Binary |= encodeVFPRd(MI, OpIdx++);
-
-  // If this is a two-address operand, skip it, e.g. FMACD.
-  if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
-    ++OpIdx;
-
-  // Encode Dn / Sn.
-  if ((MCID.TSFlags & ARMII::FormMask) == ARMII::VFPBinaryFrm)
-    Binary |= encodeVFPRn(MI, OpIdx++);
-
-  if (OpIdx == MCID.getNumOperands() ||
-      MCID.OpInfo[OpIdx].isPredicate() ||
-      MCID.OpInfo[OpIdx].isOptionalDef()) {
-    // FCMPEZD etc. has only one operand.
-    emitWordLE(Binary);
-    return;
-  }
-
-  // Encode Dm / Sm.
-  Binary |= encodeVFPRm(MI, OpIdx);
-
-  emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitVFPConversionInstruction(const MachineInstr &MI) {
-  const MCInstrDesc &MCID = MI.getDesc();
-  unsigned Form = MCID.TSFlags & ARMII::FormMask;
-
-  // Part of binary is determined by TableGn.
-  unsigned Binary = getBinaryCodeForInstr(MI);
-
-  // Set the conditional execution predicate
-  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
-  switch (Form) {
-  default: break;
-  case ARMII::VFPConv1Frm:
-  case ARMII::VFPConv2Frm:
-  case ARMII::VFPConv3Frm:
-    // Encode Dd / Sd.
-    Binary |= encodeVFPRd(MI, 0);
-    break;
-  case ARMII::VFPConv4Frm:
-    // Encode Dn / Sn.
-    Binary |= encodeVFPRn(MI, 0);
-    break;
-  case ARMII::VFPConv5Frm:
-    // Encode Dm / Sm.
-    Binary |= encodeVFPRm(MI, 0);
-    break;
-  }
-
-  switch (Form) {
-  default: break;
-  case ARMII::VFPConv1Frm:
-    // Encode Dm / Sm.
-    Binary |= encodeVFPRm(MI, 1);
-    break;
-  case ARMII::VFPConv2Frm:
-  case ARMII::VFPConv3Frm:
-    // Encode Dn / Sn.
-    Binary |= encodeVFPRn(MI, 1);
-    break;
-  case ARMII::VFPConv4Frm:
-  case ARMII::VFPConv5Frm:
-    // Encode Dd / Sd.
-    Binary |= encodeVFPRd(MI, 1);
-    break;
-  }
-
-  if (Form == ARMII::VFPConv5Frm)
-    // Encode Dn / Sn.
-    Binary |= encodeVFPRn(MI, 2);
-  else if (Form == ARMII::VFPConv3Frm)
-    // Encode Dm / Sm.
-    Binary |= encodeVFPRm(MI, 2);
-
-  emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitVFPLoadStoreInstruction(const MachineInstr &MI) {
-  // Part of binary is determined by TableGn.
-  unsigned Binary = getBinaryCodeForInstr(MI);
-
-  // Set the conditional execution predicate
-  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
-  unsigned OpIdx = 0;
-
-  // Encode Dd / Sd.
-  Binary |= encodeVFPRd(MI, OpIdx++);
-
-  // Encode address base.
-  const MachineOperand &Base = MI.getOperand(OpIdx++);
-  Binary |= getMachineOpValue(MI, Base) << ARMII::RegRnShift;
-
-  // If there is a non-zero immediate offset, encode it.
-  if (Base.isReg()) {
-    const MachineOperand &Offset = MI.getOperand(OpIdx);
-    if (unsigned ImmOffs = ARM_AM::getAM5Offset(Offset.getImm())) {
-      if (ARM_AM::getAM5Op(Offset.getImm()) == ARM_AM::add)
-        Binary |= 1 << ARMII::U_BitShift;
-      Binary |= ImmOffs;
-      emitWordLE(Binary);
-      return;
-    }
-  }
-
-  // If immediate offset is omitted, default to +0.
-  Binary |= 1 << ARMII::U_BitShift;
-
-  emitWordLE(Binary);
-}
-
-void
-ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) {
-  const MCInstrDesc &MCID = MI.getDesc();
-  bool IsUpdating = (MCID.TSFlags & ARMII::IndexModeMask) != 0;
-
-  // Part of binary is determined by TableGn.
-  unsigned Binary = getBinaryCodeForInstr(MI);
-
-  // Set the conditional execution predicate
-  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
-  // Skip operand 0 of an instruction with base register update.
-  unsigned OpIdx = 0;
-  if (IsUpdating)
-    ++OpIdx;
-
-  // Set base address operand
-  Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
-
-  // Set addressing mode by modifying bits U(23) and P(24)
-  ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(MI.getOpcode());
-  Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(Mode));
-
-  // Set bit W(21)
-  if (IsUpdating)
-    Binary |= 0x1 << ARMII::W_BitShift;
-
-  // First register is encoded in Dd.
-  Binary |= encodeVFPRd(MI, OpIdx+2);
-
-  // Count the number of registers.
-  unsigned NumRegs = 1;
-  for (unsigned i = OpIdx+3, e = MI.getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = MI.getOperand(i);
-    if (!MO.isReg() || MO.isImplicit())
-      break;
-    ++NumRegs;
-  }
-  // Bit 8 will be set if <list> is consecutive 64-bit registers (e.g., D0)
-  // Otherwise, it will be 0, in the case of 32-bit registers.
-  if(Binary & 0x100)
-    Binary |= NumRegs * 2;
-  else
-    Binary |= NumRegs;
-
-  emitWordLE(Binary);
-}
-
-unsigned ARMCodeEmitter::encodeNEONRd(const MachineInstr &MI,
-                                      unsigned OpIdx) const {
-  unsigned RegD = MI.getOperand(OpIdx).getReg();
-  unsigned Binary = 0;
-  RegD = II->getRegisterInfo().getEncodingValue(RegD);
-  Binary |= (RegD & 0xf) << ARMII::RegRdShift;
-  Binary |= ((RegD >> 4) & 1) << ARMII::D_BitShift;
-  return Binary;
-}
-
-unsigned ARMCodeEmitter::encodeNEONRn(const MachineInstr &MI,
-                                      unsigned OpIdx) const {
-  unsigned RegN = MI.getOperand(OpIdx).getReg();
-  unsigned Binary = 0;
-  RegN = II->getRegisterInfo().getEncodingValue(RegN);
-  Binary |= (RegN & 0xf) << ARMII::RegRnShift;
-  Binary |= ((RegN >> 4) & 1) << ARMII::N_BitShift;
-  return Binary;
-}
-
-unsigned ARMCodeEmitter::encodeNEONRm(const MachineInstr &MI,
-                                      unsigned OpIdx) const {
-  unsigned RegM = MI.getOperand(OpIdx).getReg();
-  unsigned Binary = 0;
-  RegM = II->getRegisterInfo().getEncodingValue(RegM);
-  Binary |= (RegM & 0xf);
-  Binary |= ((RegM >> 4) & 1) << ARMII::M_BitShift;
-  return Binary;
-}
-
-/// convertNEONDataProcToThumb - Convert the ARM mode encoding for a NEON
-/// data-processing instruction to the corresponding Thumb encoding.
-static unsigned convertNEONDataProcToThumb(unsigned Binary) {
-  assert((Binary & 0xfe000000) == 0xf2000000 &&
-         "not an ARM NEON data-processing instruction");
-  unsigned UBit = (Binary >> 24) & 1;
-  return 0xef000000 | (UBit << 28) | (Binary & 0xffffff);
-}
-
-void ARMCodeEmitter::emitNEONLaneInstruction(const MachineInstr &MI) {
-  unsigned Binary = getBinaryCodeForInstr(MI);
-
-  unsigned RegTOpIdx, RegNOpIdx, LnOpIdx;
-  const MCInstrDesc &MCID = MI.getDesc();
-  if ((MCID.TSFlags & ARMII::FormMask) == ARMII::NGetLnFrm) {
-    RegTOpIdx = 0;
-    RegNOpIdx = 1;
-    LnOpIdx = 2;
-  } else { // ARMII::NSetLnFrm
-    RegTOpIdx = 2;
-    RegNOpIdx = 0;
-    LnOpIdx = 3;
-  }
-
-  // Set the conditional execution predicate
-  Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift;
-
-  unsigned RegT = MI.getOperand(RegTOpIdx).getReg();
-  RegT = II->getRegisterInfo().getEncodingValue(RegT);
-  Binary |= (RegT << ARMII::RegRdShift);
-  Binary |= encodeNEONRn(MI, RegNOpIdx);
-
-  unsigned LaneShift;
-  if ((Binary & (1 << 22)) != 0)
-    LaneShift = 0; // 8-bit elements
-  else if ((Binary & (1 << 5)) != 0)
-    LaneShift = 1; // 16-bit elements
-  else
-    LaneShift = 2; // 32-bit elements
-
-  unsigned Lane = MI.getOperand(LnOpIdx).getImm() << LaneShift;
-  unsigned Opc1 = Lane >> 2;
-  unsigned Opc2 = Lane & 3;
-  assert((Opc1 & 3) == 0 && "out-of-range lane number operand");
-  Binary |= (Opc1 << 21);
-  Binary |= (Opc2 << 5);
-
-  emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitNEONDupInstruction(const MachineInstr &MI) {
-  unsigned Binary = getBinaryCodeForInstr(MI);
-
-  // Set the conditional execution predicate
-  Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift;
-
-  unsigned RegT = MI.getOperand(1).getReg();
-  RegT = II->getRegisterInfo().getEncodingValue(RegT);
-  Binary |= (RegT << ARMII::RegRdShift);
-  Binary |= encodeNEONRn(MI, 0);
-  emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitNEON1RegModImmInstruction(const MachineInstr &MI) {
-  unsigned Binary = getBinaryCodeForInstr(MI);
-  // Destination register is encoded in Dd.
-  Binary |= encodeNEONRd(MI, 0);
-  // Immediate fields: Op, Cmode, I, Imm3, Imm4
-  unsigned Imm = MI.getOperand(1).getImm();
-  unsigned Op = (Imm >> 12) & 1;
-  unsigned Cmode = (Imm >> 8) & 0xf;
-  unsigned I = (Imm >> 7) & 1;
-  unsigned Imm3 = (Imm >> 4) & 0x7;
-  unsigned Imm4 = Imm & 0xf;
-  Binary |= (I << 24) | (Imm3 << 16) | (Cmode << 8) | (Op << 5) | Imm4;
-  if (IsThumb)
-    Binary = convertNEONDataProcToThumb(Binary);
-  emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitNEON2RegInstruction(const MachineInstr &MI) {
-  const MCInstrDesc &MCID = MI.getDesc();
-  unsigned Binary = getBinaryCodeForInstr(MI);
-  // Destination register is encoded in Dd; source register in Dm.
-  unsigned OpIdx = 0;
-  Binary |= encodeNEONRd(MI, OpIdx++);
-  if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
-    ++OpIdx;
-  Binary |= encodeNEONRm(MI, OpIdx);
-  if (IsThumb)
-    Binary = convertNEONDataProcToThumb(Binary);
-  // FIXME: This does not handle VDUPfdf or VDUPfqf.
-  emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitNEON3RegInstruction(const MachineInstr &MI) {
-  const MCInstrDesc &MCID = MI.getDesc();
-  unsigned Binary = getBinaryCodeForInstr(MI);
-  // Destination register is encoded in Dd; source registers in Dn and Dm.
-  unsigned OpIdx = 0;
-  Binary |= encodeNEONRd(MI, OpIdx++);
-  if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
-    ++OpIdx;
-  Binary |= encodeNEONRn(MI, OpIdx++);
-  if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
-    ++OpIdx;
-  Binary |= encodeNEONRm(MI, OpIdx);
-  if (IsThumb)
-    Binary = convertNEONDataProcToThumb(Binary);
-  // FIXME: This does not handle VMOVDneon or VMOVQ.
-  emitWordLE(Binary);
-}
-
-#include "ARMGenCodeEmitter.inc"
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index ce264ee..29405eb 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -275,6 +275,7 @@ namespace {
 
   private:
     void doInitialPlacement(std::vector<MachineInstr*> &CPEMIs);
+    bool BBHasFallthrough(MachineBasicBlock *MBB);
     CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
     unsigned getCPELogAlign(const MachineInstr *CPEMI);
     void scanFunctionJumpTables();
@@ -382,7 +383,9 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
                << MCP->getConstants().size() << " CP entries, aligned to "
                << MCP->getConstantPoolAlignment() << " bytes *****\n");
 
-  TII = (const ARMBaseInstrInfo*)MF->getTarget().getInstrInfo();
+  TII = (const ARMBaseInstrInfo *)MF->getTarget()
+            .getSubtargetImpl()
+            ->getInstrInfo();
   AFI = MF->getInfo<ARMFunctionInfo>();
   STI = &MF->getTarget().getSubtarget<ARMSubtarget>();
 
@@ -529,7 +532,7 @@ ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
   // identity mapping of CPI's to CPE's.
   const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants();
 
-  const DataLayout &TD = *MF->getTarget().getDataLayout();
+  const DataLayout &TD = *MF->getSubtarget().getDataLayout();
   for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
     unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
     assert(Size >= 4 && "Too small constant pool entry");
@@ -554,9 +557,7 @@ ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
         InsPoint[a] = CPEMI;
 
     // Add a new CPEntry, but no corresponding CPUser yet.
-    std::vector<CPEntry> CPEs;
-    CPEs.push_back(CPEntry(CPEMI, i));
-    CPEntries.push_back(CPEs);
+    CPEntries.emplace_back(1, CPEntry(CPEMI, i));
     ++NumCPEs;
     DEBUG(dbgs() << "Moved CPI#" << i << " to end of function, size = "
                  << Size << ", align = " << Align <<'\n');
@@ -566,7 +567,7 @@ ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
 
 /// BBHasFallthrough - Return true if the specified basic block can fallthrough
 /// into the block immediately after it.
-static bool BBHasFallthrough(MachineBasicBlock *MBB) {
+bool ARMConstantIslands::BBHasFallthrough(MachineBasicBlock *MBB) {
   // Get the next machine basic block in the function.
   MachineFunction::iterator MBBI = MBB;
   // Can't fall off end of function.
@@ -574,12 +575,15 @@ static bool BBHasFallthrough(MachineBasicBlock *MBB) {
     return false;
 
   MachineBasicBlock *NextBB = std::next(MBBI);
-  for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
-       E = MBB->succ_end(); I != E; ++I)
-    if (*I == NextBB)
-      return true;
+  if (std::find(MBB->succ_begin(), MBB->succ_end(), NextBB) == MBB->succ_end())
+    return false;
 
-  return false;
+  // Try to analyze the end of the block. A potential fallthrough may already
+  // have an unconditional branch for whatever reason.
+  MachineBasicBlock *TBB, *FBB;
+  SmallVector<MachineOperand, 4> Cond;
+  bool TooDifficult = TII->AnalyzeBranch(*MBB, TBB, FBB, Cond);
+  return TooDifficult || FBB == nullptr;
 }
 
 /// findConstPoolEntry - Given the constpool index and CONSTPOOL_ENTRY MI,
@@ -1203,7 +1207,8 @@ bool ARMConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset,
     unsigned Growth;
     if (isWaterInRange(UserOffset, WaterBB, U, Growth) &&
         (WaterBB->getNumber() < U.HighWaterMark->getNumber() ||
-         NewWaterList.count(WaterBB)) && Growth < BestGrowth) {
+         NewWaterList.count(WaterBB) || WaterBB == U.MI->getParent()) &&
+        Growth < BestGrowth) {
       // This is the least amount of required padding seen so far.
       BestGrowth = Growth;
       WaterIter = IP;
@@ -1309,7 +1314,12 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
   // Back past any possible branches (allow for a conditional and a maximally
   // long unconditional).
   if (BaseInsertOffset + 8 >= UserBBI.postOffset()) {
-    BaseInsertOffset = UserBBI.postOffset() - UPad - 8;
+    // Ensure BaseInsertOffset is larger than the offset of the instruction
+    // following UserMI so that the loop which searches for the split point
+    // iterates at least once.
+    BaseInsertOffset =
+        std::max(UserBBI.postOffset() - UPad - 8,
+                 UserOffset + TII->GetInstSizeInBytes(UserMI) + 1);
     DEBUG(dbgs() << format("Move inside block: %#x\n", BaseInsertOffset));
   }
   unsigned EndInsertOffset = BaseInsertOffset + 4 + UPad +
@@ -1352,6 +1362,11 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
     if (CC != ARMCC::AL)
       MI = LastIT;
   }
+
+  // We really must not split an IT block.
+  DEBUG(unsigned PredReg;
+        assert(!isThumb || getITInstrPredicate(MI, PredReg) == ARMCC::AL));
+
   NewMBB = splitBlockBeforeInstr(MI);
 }
 
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
index c7a8415..13bef54 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H
-#define LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMCONSTANTPOOLVALUE_H
+#define LLVM_LIB_TARGET_ARM_ARMCONSTANTPOOLVALUE_H
 
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/Support/Casting.h"
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 51d3dbb..2d80518 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -867,7 +867,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
       if (RI.hasBasePointer(MF)) {
         int32_t NumBytes = AFI->getFramePtrSpillOffset();
         unsigned FramePtr = RI.getFrameRegister(MF);
-        assert(MF.getTarget().getFrameLowering()->hasFP(MF) &&
+        assert(MF.getSubtarget().getFrameLowering()->hasFP(MF) &&
                "base pointer without frame pointer?");
 
         if (AFI->isThumb2Function()) {
@@ -1343,8 +1343,9 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
 
 bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
   const TargetMachine &TM = MF.getTarget();
-  TII = static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo());
-  TRI = TM.getRegisterInfo();
+  TII = static_cast<const ARMBaseInstrInfo *>(
+      TM.getSubtargetImpl()->getInstrInfo());
+  TRI = TM.getSubtargetImpl()->getRegisterInfo();
   STI = &TM.getSubtarget<ARMSubtarget>();
   AFI = MF.getInfo<ARMFunctionInfo>();
 
diff --git a/lib/Target/ARM/ARMFPUName.def b/lib/Target/ARM/ARMFPUName.def
index 1fef3b3..34ce85d 100644
--- a/lib/Target/ARM/ARMFPUName.def
+++ b/lib/Target/ARM/ARMFPUName.def
@@ -23,6 +23,7 @@ ARM_FPU_NAME("vfpv3", VFPV3)
 ARM_FPU_NAME("vfpv3-d16", VFPV3_D16)
 ARM_FPU_NAME("vfpv4", VFPV4)
 ARM_FPU_NAME("vfpv4-d16", VFPV4_D16)
+ARM_FPU_NAME("fpv5-d16", FPV5_D16)
 ARM_FPU_NAME("fp-armv8", FP_ARMV8)
 ARM_FPU_NAME("neon", NEON)
 ARM_FPU_NAME("neon-vfpv4", NEON_VFPV4)
diff --git a/lib/Target/ARM/ARMFPUName.h b/lib/Target/ARM/ARMFPUName.h
index 2a64cce..86acffb 100644
--- a/lib/Target/ARM/ARMFPUName.h
+++ b/lib/Target/ARM/ARMFPUName.h
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef ARMFPUNAME_H
-#define ARMFPUNAME_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMFPUNAME_H
+#define LLVM_LIB_TARGET_ARM_ARMFPUNAME_H
 
 namespace llvm {
 namespace ARM {
@@ -23,4 +23,4 @@ enum FPUKind {
 } // namespace ARM
 } // namespace llvm
 
-#endif // ARMFPUNAME_H
+#endif
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index e2d90cd..a5f635e 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -92,11 +92,11 @@ class ARMFastISel final : public FastISel {
   public:
     explicit ARMFastISel(FunctionLoweringInfo &funcInfo,
                          const TargetLibraryInfo *libInfo)
-    : FastISel(funcInfo, libInfo),
-      M(const_cast<Module&>(*funcInfo.Fn->getParent())),
-      TM(funcInfo.MF->getTarget()),
-      TII(*TM.getInstrInfo()),
-      TLI(*TM.getTargetLowering()) {
+        : FastISel(funcInfo, libInfo),
+          M(const_cast<Module &>(*funcInfo.Fn->getParent())),
+          TM(funcInfo.MF->getTarget()),
+          TII(*TM.getSubtargetImpl()->getInstrInfo()),
+          TLI(*TM.getSubtargetImpl()->getTargetLowering()) {
       Subtarget = &TM.getSubtarget<ARMSubtarget>();
       AFI = funcInfo.MF->getInfo<ARMFunctionInfo>();
       isThumb2 = AFI->isThumbFunction();
@@ -105,39 +105,39 @@ class ARMFastISel final : public FastISel {
 
     // Code from FastISel.cpp.
   private:
-    unsigned FastEmitInst_r(unsigned MachineInstOpcode,
+    unsigned fastEmitInst_r(unsigned MachineInstOpcode,
                             const TargetRegisterClass *RC,
                             unsigned Op0, bool Op0IsKill);
-    unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
+    unsigned fastEmitInst_rr(unsigned MachineInstOpcode,
                              const TargetRegisterClass *RC,
                              unsigned Op0, bool Op0IsKill,
                              unsigned Op1, bool Op1IsKill);
-    unsigned FastEmitInst_rrr(unsigned MachineInstOpcode,
+    unsigned fastEmitInst_rrr(unsigned MachineInstOpcode,
                               const TargetRegisterClass *RC,
                               unsigned Op0, bool Op0IsKill,
                               unsigned Op1, bool Op1IsKill,
                               unsigned Op2, bool Op2IsKill);
-    unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
+    unsigned fastEmitInst_ri(unsigned MachineInstOpcode,
                              const TargetRegisterClass *RC,
                              unsigned Op0, bool Op0IsKill,
                              uint64_t Imm);
-    unsigned FastEmitInst_rri(unsigned MachineInstOpcode,
+    unsigned fastEmitInst_rri(unsigned MachineInstOpcode,
                               const TargetRegisterClass *RC,
                               unsigned Op0, bool Op0IsKill,
                               unsigned Op1, bool Op1IsKill,
                               uint64_t Imm);
-    unsigned FastEmitInst_i(unsigned MachineInstOpcode,
+    unsigned fastEmitInst_i(unsigned MachineInstOpcode,
                             const TargetRegisterClass *RC,
                             uint64_t Imm);
 
     // Backend specific FastISel code.
   private:
-    bool TargetSelectInstruction(const Instruction *I) override;
-    unsigned TargetMaterializeConstant(const Constant *C) override;
-    unsigned TargetMaterializeAlloca(const AllocaInst *AI) override;
+    bool fastSelectInstruction(const Instruction *I) override;
+    unsigned fastMaterializeConstant(const Constant *C) override;
+    unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
     bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
                              const LoadInst *LI) override;
-    bool FastLowerArguments() override;
+    bool fastLowerArguments() override;
   private:
   #include "ARMGenFastISel.inc"
 
@@ -189,7 +189,9 @@ class ARMFastISel final : public FastISel {
     unsigned ARMSelectCallOp(bool UseReg);
     unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, MVT VT);
 
-    const TargetLowering *getTargetLowering() { return TM.getTargetLowering(); }
+    const TargetLowering *getTargetLowering() {
+      return TM.getSubtargetImpl()->getTargetLowering();
+    }
 
     // Call handling routines.
   private:
@@ -283,7 +285,7 @@ ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
   return MIB;
 }
 
-unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
+unsigned ARMFastISel::fastEmitInst_r(unsigned MachineInstOpcode,
                                      const TargetRegisterClass *RC,
                                      unsigned Op0, bool Op0IsKill) {
   unsigned ResultReg = createResultReg(RC);
@@ -305,7 +307,7 @@ unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
   return ResultReg;
 }
 
-unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
+unsigned ARMFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
                                       const TargetRegisterClass *RC,
                                       unsigned Op0, bool Op0IsKill,
                                       unsigned Op1, bool Op1IsKill) {
@@ -333,7 +335,7 @@ unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
   return ResultReg;
 }
 
-unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode,
+unsigned ARMFastISel::fastEmitInst_rrr(unsigned MachineInstOpcode,
                                        const TargetRegisterClass *RC,
                                        unsigned Op0, bool Op0IsKill,
                                        unsigned Op1, bool Op1IsKill,
@@ -365,7 +367,7 @@ unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode,
   return ResultReg;
 }
 
-unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
+unsigned ARMFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
                                       const TargetRegisterClass *RC,
                                       unsigned Op0, bool Op0IsKill,
                                       uint64_t Imm) {
@@ -391,7 +393,7 @@ unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
   return ResultReg;
 }
 
-unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
+unsigned ARMFastISel::fastEmitInst_rri(unsigned MachineInstOpcode,
                                        const TargetRegisterClass *RC,
                                        unsigned Op0, bool Op0IsKill,
                                        unsigned Op1, bool Op1IsKill,
@@ -421,7 +423,7 @@ unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
   return ResultReg;
 }
 
-unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode,
+unsigned ARMFastISel::fastEmitInst_i(unsigned MachineInstOpcode,
                                      const TargetRegisterClass *RC,
                                      uint64_t Imm) {
   unsigned ResultReg = createResultReg(RC);
@@ -511,7 +513,7 @@ unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, MVT VT) {
 unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
 
   if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1)
-    return false;
+    return 0;
 
   // If we can do this in a single instruction without a constant pool entry
   // do so now.
@@ -534,7 +536,9 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
       (ARM_AM::getSOImmVal(Imm) != -1);
     if (UseImm) {
       unsigned Opc = isThumb2 ? ARM::t2MVNi : ARM::MVNi;
-      unsigned ImmReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+      const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass :
+                                                 &ARM::GPRRegClass;
+      unsigned ImmReg = createResultReg(RC);
       AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
                               TII.get(Opc), ImmReg)
                       .addImm(Imm));
@@ -542,11 +546,16 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
     }
   }
 
+  unsigned ResultReg = 0;
+  if (Subtarget->useMovt(*FuncInfo.MF))
+    ResultReg = fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
+
+  if (ResultReg)
+    return ResultReg;
+
   // Load from constant pool.  For now 32-bit only.
   if (VT != MVT::i32)
-    return false;
-
-  unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+    return 0;
 
   // MachineConstantPool wants an explicit alignment.
   unsigned Align = DL.getPrefTypeAlignment(C->getType());
@@ -555,21 +564,20 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
     Align = DL.getTypeAllocSize(C->getType());
   }
   unsigned Idx = MCP.getConstantPoolIndex(C, Align);
-
+  ResultReg = createResultReg(TLI.getRegClassFor(VT));
   if (isThumb2)
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
-                            TII.get(ARM::t2LDRpci), DestReg)
-                    .addConstantPoolIndex(Idx));
+                            TII.get(ARM::t2LDRpci), ResultReg)
+                      .addConstantPoolIndex(Idx));
   else {
     // The extra immediate is for addrmode2.
-    DestReg = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg, 0);
+    ResultReg = constrainOperandRegClass(TII.get(ARM::LDRcp), ResultReg, 0);
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
-                            TII.get(ARM::LDRcp), DestReg)
-                    .addConstantPoolIndex(Idx)
-                    .addImm(0));
+                            TII.get(ARM::LDRcp), ResultReg)
+                      .addConstantPoolIndex(Idx)
+                      .addImm(0));
   }
-
-  return DestReg;
+  return ResultReg;
 }
 
 unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
@@ -679,7 +687,7 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
   return DestReg;
 }
 
-unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) {
+unsigned ARMFastISel::fastMaterializeConstant(const Constant *C) {
   EVT CEVT = TLI.getValueType(C->getType(), true);
 
   // Only handle simple types.
@@ -698,7 +706,7 @@ unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) {
 
 // TODO: unsigned ARMFastISel::TargetMaterializeFloatZero(const ConstantFP *CF);
 
-unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
+unsigned ARMFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
   // Don't handle dynamic allocas.
   if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
 
@@ -901,7 +909,7 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3) {
   // Since the offset is too large for the load/store instruction
   // get the reg+offset into a register.
   if (needsLowering) {
-    Addr.Base.Reg = FastEmit_ri_(MVT::i32, ISD::ADD, Addr.Base.Reg,
+    Addr.Base.Reg = fastEmit_ri_(MVT::i32, ISD::ADD, Addr.Base.Reg,
                                  /*Op0IsKill*/false, Addr.Offset, MVT::i32);
     Addr.Offset = 0;
   }
@@ -1074,7 +1082,7 @@ bool ARMFastISel::SelectLoad(const Instruction *I) {
   unsigned ResultReg;
   if (!ARMEmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlignment()))
     return false;
-  UpdateValueMap(I, ResultReg);
+  updateValueMap(I, ResultReg);
   return true;
 }
 
@@ -1276,7 +1284,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
       unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc))
       .addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR);
-      FastEmitBranch(FBB, DbgLoc);
+      fastEmitBranch(FBB, DbgLoc);
       FuncInfo.MBB->addSuccessor(TBB);
       return true;
     }
@@ -1301,7 +1309,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc))
       .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
 
-      FastEmitBranch(FBB, DbgLoc);
+      fastEmitBranch(FBB, DbgLoc);
       FuncInfo.MBB->addSuccessor(TBB);
       return true;
     }
@@ -1309,7 +1317,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
              dyn_cast<ConstantInt>(BI->getCondition())) {
     uint64_t Imm = CI->getZExtValue();
     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
-    FastEmitBranch(Target, DbgLoc);
+    fastEmitBranch(Target, DbgLoc);
     return true;
   }
 
@@ -1339,7 +1347,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
   unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc))
                   .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
-  FastEmitBranch(FBB, DbgLoc);
+  fastEmitBranch(FBB, DbgLoc);
   FuncInfo.MBB->addSuccessor(TBB);
   return true;
 }
@@ -1497,13 +1505,13 @@ bool ARMFastISel::SelectCmp(const Instruction *I) {
     (const TargetRegisterClass*)&ARM::GPRRegClass;
   unsigned DestReg = createResultReg(RC);
   Constant *Zero = ConstantInt::get(Type::getInt32Ty(*Context), 0);
-  unsigned ZeroReg = TargetMaterializeConstant(Zero);
+  unsigned ZeroReg = fastMaterializeConstant(Zero);
   // ARMEmitCmp emits a FMSTAT when necessary, so it's always safe to use CPSR.
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovCCOpc), DestReg)
           .addReg(ZeroReg).addImm(1)
           .addImm(ARMPred).addReg(ARM::CPSR);
 
-  UpdateValueMap(I, DestReg);
+  updateValueMap(I, DestReg);
   return true;
 }
 
@@ -1522,7 +1530,7 @@ bool ARMFastISel::SelectFPExt(const Instruction *I) {
   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
                           TII.get(ARM::VCVTDS), Result)
                   .addReg(Op));
-  UpdateValueMap(I, Result);
+  updateValueMap(I, Result);
   return true;
 }
 
@@ -1541,7 +1549,7 @@ bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
                           TII.get(ARM::VCVTSD), Result)
                   .addReg(Op));
-  UpdateValueMap(I, Result);
+  updateValueMap(I, Result);
   return true;
 }
 
@@ -1585,7 +1593,7 @@ bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
   unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT));
   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
                           TII.get(Opc), ResultReg).addReg(FP));
-  UpdateValueMap(I, ResultReg);
+  updateValueMap(I, ResultReg);
   return true;
 }
 
@@ -1617,7 +1625,7 @@ bool ARMFastISel::SelectFPToI(const Instruction *I, bool isSigned) {
   unsigned IntReg = ARMMoveToIntReg(DstVT, ResultReg);
   if (IntReg == 0) return false;
 
-  UpdateValueMap(I, IntReg);
+  updateValueMap(I, IntReg);
   return true;
 }
 
@@ -1693,7 +1701,7 @@ bool ARMFastISel::SelectSelect(const Instruction *I) {
         .addImm(ARMCC::EQ)
         .addReg(ARM::CPSR);
   }
-  UpdateValueMap(I, ResultReg);
+  updateValueMap(I, ResultReg);
   return true;
 }
 
@@ -1783,7 +1791,7 @@ bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
                           TII.get(Opc), ResultReg)
                   .addReg(SrcReg1).addReg(SrcReg2));
-  UpdateValueMap(I, ResultReg);
+  updateValueMap(I, ResultReg);
   return true;
 }
 
@@ -1825,7 +1833,7 @@ bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) {
   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
                           TII.get(Opc), ResultReg)
                   .addReg(Op1).addReg(Op2));
-  UpdateValueMap(I, ResultReg);
+  updateValueMap(I, ResultReg);
   return true;
 }
 
@@ -1883,7 +1891,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
                                   unsigned &NumBytes,
                                   bool isVarArg) {
   SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, ArgLocs, *Context);
+  CCState CCInfo(CC, isVarArg, *FuncInfo.MF, ArgLocs, *Context);
   CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags,
                              CCAssignFnForCall(CC, false, isVarArg));
 
@@ -1941,6 +1949,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
   // Process the args.
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
     CCValAssign &VA = ArgLocs[i];
+    const Value *ArgVal = Args[VA.getValNo()];
     unsigned Arg = ArgRegs[VA.getValNo()];
     MVT ArgVT = ArgVTs[VA.getValNo()];
 
@@ -1967,7 +1976,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
         break;
       }
       case CCValAssign::BCvt: {
-        unsigned BC = FastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, Arg,
+        unsigned BC = fastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, Arg,
                                  /*TODO: Kill=*/false);
         assert(BC != 0 && "Failed to emit a bitcast!");
         Arg = BC;
@@ -2001,6 +2010,11 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
     } else {
       assert(VA.isMemLoc());
       // Need to store on the stack.
+
+      // Don't emit stores for undef values.
+      if (isa<UndefValue>(ArgVal))
+        continue;
+
       Address Addr;
       Addr.BaseType = Address::RegBase;
       Addr.Base.Reg = ARM::SP;
@@ -2026,7 +2040,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
   // Now the return value.
   if (RetVT != MVT::isVoid) {
     SmallVector<CCValAssign, 16> RVLocs;
-    CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, RVLocs, *Context);
+    CCState CCInfo(CC, isVarArg, *FuncInfo.MF, RVLocs, *Context);
     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, isVarArg));
 
     // Copy all of the result registers out of their specified physreg.
@@ -2045,7 +2059,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
       UsedRegs.push_back(RVLocs[1].getLocReg());
 
       // Finally update the result.
-      UpdateValueMap(I, ResultReg);
+      updateValueMap(I, ResultReg);
     } else {
       assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!");
       MVT CopyVT = RVLocs[0].getValVT();
@@ -2063,7 +2077,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
       UsedRegs.push_back(RVLocs[0].getLocReg());
 
       // Finally update the result.
-      UpdateValueMap(I, ResultReg);
+      updateValueMap(I, ResultReg);
     }
   }
 
@@ -2087,7 +2101,7 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
 
     // Analyze operands of the call, assigning locations to each operand.
     SmallVector<CCValAssign, 16> ValLocs;
-    CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,I->getContext());
+    CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
     CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */,
                                                  F.isVarArg()));
 
@@ -2192,7 +2206,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
   // Can't handle non-double multi-reg retvals.
   if (RetVT != MVT::isVoid && RetVT != MVT::i32) {
     SmallVector<CCValAssign, 16> RVLocs;
-    CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context);
+    CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, false));
     if (RVLocs.size() >= 2 && RetVT != MVT::f64)
       return false;
@@ -2303,7 +2317,7 @@ bool ARMFastISel::SelectCall(const Instruction *I,
   if (RetVT != MVT::isVoid && RetVT != MVT::i1 && RetVT != MVT::i8 &&
       RetVT != MVT::i16 && RetVT != MVT::i32) {
     SmallVector<CCValAssign, 16> RVLocs;
-    CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, RVLocs, *Context);
+    CCState CCInfo(CC, isVarArg, *FuncInfo.MF, RVLocs, *Context);
     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, isVarArg));
     if (RVLocs.size() >= 2 && RetVT != MVT::f64)
       return false;
@@ -2487,7 +2501,8 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
     }
 
     const ARMBaseRegisterInfo *RegInfo =
-          static_cast<const ARMBaseRegisterInfo*>(TM.getRegisterInfo());
+        static_cast<const ARMBaseRegisterInfo *>(
+            TM.getSubtargetImpl()->getRegisterInfo());
     unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
     unsigned SrcReg = FramePtr;
 
@@ -2505,7 +2520,7 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
                       .addReg(SrcReg).addImm(0));
       SrcReg = DestReg;
     }
-    UpdateValueMap(&I, SrcReg);
+    updateValueMap(&I, SrcReg);
     return true;
   }
   case Intrinsic::memcpy:
@@ -2583,7 +2598,7 @@ bool ARMFastISel::SelectTrunc(const Instruction *I) {
 
   // Because the high bits are undefined, a truncate doesn't generate
   // any code.
-  UpdateValueMap(I, SrcReg);
+  updateValueMap(I, SrcReg);
   return true;
 }
 
@@ -2745,7 +2760,7 @@ bool ARMFastISel::SelectIntExt(const Instruction *I) {
   MVT DestVT = DestEVT.getSimpleVT();
   unsigned ResultReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
   if (ResultReg == 0) return false;
-  UpdateValueMap(I, ResultReg);
+  updateValueMap(I, ResultReg);
   return true;
 }
 
@@ -2800,12 +2815,12 @@ bool ARMFastISel::SelectShift(const Instruction *I,
   }
 
   AddOptionalDefs(MIB);
-  UpdateValueMap(I, ResultReg);
+  updateValueMap(I, ResultReg);
   return true;
 }
 
 // TODO: SoftFP support.
-bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
+bool ARMFastISel::fastSelectInstruction(const Instruction *I) {
 
   switch (I->getOpcode()) {
     case Instruction::Load:
@@ -2983,7 +2998,7 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
   return DestReg2;
 }
 
-bool ARMFastISel::FastLowerArguments() {
+bool ARMFastISel::fastLowerArguments() {
   if (!FuncInfo.CanLowerReturn)
     return false;
 
@@ -3050,7 +3065,7 @@ bool ARMFastISel::FastLowerArguments() {
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
             TII.get(TargetOpcode::COPY),
             ResultReg).addReg(DstReg, getKillRegState(true));
-    UpdateValueMap(I, ResultReg);
+    updateValueMap(I, ResultReg);
   }
 
   return true;
diff --git a/lib/Target/ARM/ARMFeatures.h b/lib/Target/ARM/ARMFeatures.h
index e191a3c..0c910ab 100644
--- a/lib/Target/ARM/ARMFeatures.h
+++ b/lib/Target/ARM/ARMFeatures.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef TARGET_ARM_FEATURES_H
-#define TARGET_ARM_FEATURES_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMFEATURES_H
+#define LLVM_LIB_TARGET_ARM_ARMFEATURES_H
 
 #include "MCTargetDesc/ARMMCTargetDesc.h"
 
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index a67b360..80add7a 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -47,7 +47,7 @@ ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti)
 /// pointer register.  This is true if the function has variable sized allocas
 /// or if frame pointer elimination is disabled.
 bool ARMFrameLowering::hasFP(const MachineFunction &MF) const {
-  const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+  const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
 
   // iOS requires FP not to be clobbered for backtracing purpose.
   if (STI.isTargetIOS())
@@ -137,12 +137,27 @@ static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,
 }
 
 static int sizeOfSPAdjustment(const MachineInstr *MI) {
-  assert(MI->getOpcode() == ARM::VSTMDDB_UPD);
+  int RegSize;
+  switch (MI->getOpcode()) {
+  case ARM::VSTMDDB_UPD:
+    RegSize = 8;
+    break;
+  case ARM::STMDB_UPD:
+  case ARM::t2STMDB_UPD:
+    RegSize = 4;
+    break;
+  case ARM::t2STR_PRE:
+  case ARM::STR_PRE_IMM:
+    return 4;
+  default:
+    llvm_unreachable("Unknown push or pop like instruction");
+  }
+
   int count = 0;
   // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
   // pred) so the list starts at 4.
   for (int i = MI->getNumOperands() - 1; i >= 4; --i)
-    count += 8;
+    count += RegSize;
   return count;
 }
 
@@ -154,6 +169,46 @@ static bool WindowsRequiresStackProbe(const MachineFunction &MF,
   return StackSizeInBytes >= 4096;
 }
 
+namespace {
+struct StackAdjustingInsts {
+  struct InstInfo {
+    MachineBasicBlock::iterator I;
+    unsigned SPAdjust;
+    bool BeforeFPSet;
+  };
+
+  SmallVector<InstInfo, 4> Insts;
+
+  void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust,
+               bool BeforeFPSet = false) {
+    InstInfo Info = {I, SPAdjust, BeforeFPSet};
+    Insts.push_back(Info);
+  }
+
+  void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) {
+    auto Info = std::find_if(Insts.begin(), Insts.end(),
+                             [&](InstInfo &Info) { return Info.I == I; });
+    assert(Info != Insts.end() && "invalid sp adjusting instruction");
+    Info->SPAdjust += ExtraBytes;
+  }
+
+  void emitDefCFAOffsets(MachineModuleInfo &MMI, MachineBasicBlock &MBB,
+                         DebugLoc dl, const ARMBaseInstrInfo &TII, bool HasFP) {
+    unsigned CFAOffset = 0;
+    for (auto &Info : Insts) {
+      if (HasFP && !Info.BeforeFPSet)
+        return;
+
+      CFAOffset -= Info.SPAdjust;
+      unsigned CFIIndex = MMI.addFrameInst(
+          MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
+      BuildMI(MBB, std::next(Info.I), dl,
+              TII.get(TargetOpcode::CFI_INSTRUCTION)).addCFIIndex(CFIIndex);
+    }
+  }
+};
+}
+
 void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
   MachineBasicBlock &MBB = MF.front();
   MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -163,20 +218,20 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
   MCContext &Context = MMI.getContext();
   const TargetMachine &TM = MF.getTarget();
   const MCRegisterInfo *MRI = Context.getRegisterInfo();
-  const ARMBaseRegisterInfo *RegInfo =
-    static_cast<const ARMBaseRegisterInfo*>(TM.getRegisterInfo());
-  const ARMBaseInstrInfo &TII =
-    *static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo());
+  const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
+      TM.getSubtargetImpl()->getRegisterInfo());
+  const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo *>(
+                                    TM.getSubtargetImpl()->getInstrInfo());
   assert(!AFI->isThumb1OnlyFunction() &&
          "This emitPrologue does not support Thumb1!");
   bool isARM = !AFI->isThumbFunction();
-  unsigned Align = TM.getFrameLowering()->getStackAlignment();
+  unsigned Align =
+      TM.getSubtargetImpl()->getFrameLowering()->getStackAlignment();
   unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
   unsigned NumBytes = MFI->getStackSize();
   const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
   DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
   unsigned FramePtr = RegInfo->getFrameRegister(MF);
-  int CFAOffset = 0;
 
   // Determine the sizes of each callee-save spill areas and record which frame
   // belongs to which callee-save spill areas.
@@ -189,15 +244,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
   if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
     return;
 
+  StackAdjustingInsts DefCFAOffsetCandidates;
+
   // Allocate the vararg register save area.
   if (ArgRegsSaveSize) {
     emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
                  MachineInstr::FrameSetup);
-    CFAOffset -= ArgRegsSaveSize;
-    unsigned CFIIndex = MMI.addFrameInst(
-        MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
-    BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
-        .addCFIIndex(CFIIndex);
+    DefCFAOffsetCandidates.addInst(std::prev(MBBI), ArgRegsSaveSize, true);
   }
 
   if (!AFI->hasStackFrame() &&
@@ -205,11 +258,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
     if (NumBytes - ArgRegsSaveSize != 0) {
       emitSPUpdate(isARM, MBB, MBBI, dl, TII, -(NumBytes - ArgRegsSaveSize),
                    MachineInstr::FrameSetup);
-      CFAOffset -= NumBytes - ArgRegsSaveSize;
-      unsigned CFIIndex = MMI.addFrameInst(
-          MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
-      BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
-          .addCFIIndex(CFIIndex);
+      DefCFAOffsetCandidates.addInst(std::prev(MBBI),
+                                     NumBytes - ArgRegsSaveSize, true);
     }
     return;
   }
@@ -252,21 +302,23 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
   }
 
   // Move past area 1.
-  MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push,
-      DPRCSPush;
-  if (GPRCS1Size > 0)
+  MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push;
+  if (GPRCS1Size > 0) {
     GPRCS1Push = LastPush = MBBI++;
+    DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true);
+  }
 
   // Determine starting offsets of spill areas.
   bool HasFP = hasFP(MF);
-  unsigned DPRCSOffset  = NumBytes - (ArgRegsSaveSize + GPRCS1Size
-                                      + GPRCS2Size + DPRCSSize);
-  unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
-  unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
+  unsigned GPRCS1Offset = NumBytes - ArgRegsSaveSize - GPRCS1Size;
+  unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
+  unsigned DPRAlign = DPRCSSize ? std::min(8U, Align) : 4U;
+  unsigned DPRGapSize = (GPRCS1Size + GPRCS2Size + ArgRegsSaveSize) % DPRAlign;
+  unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
   int FramePtrOffsetInPush = 0;
   if (HasFP) {
-    FramePtrOffsetInPush = MFI->getObjectOffset(FramePtrSpillFI)
-                           + GPRCS1Size + ArgRegsSaveSize;
+    FramePtrOffsetInPush =
+        MFI->getObjectOffset(FramePtrSpillFI) + ArgRegsSaveSize;
     AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) +
                                 NumBytes);
   }
@@ -275,16 +327,32 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
   AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
 
   // Move past area 2.
-  if (GPRCS2Size > 0)
+  if (GPRCS2Size > 0) {
     GPRCS2Push = LastPush = MBBI++;
+    DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
+  }
+
+  // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
+  // .cfi_offset operations will reflect that.
+  if (DPRGapSize) {
+    assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs");
+    if (tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, DPRGapSize))
+      DefCFAOffsetCandidates.addExtraBytes(LastPush, DPRGapSize);
+    else {
+      emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize,
+                   MachineInstr::FrameSetup);
+      DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize);
+    }
+  }
 
   // Move past area 3.
   if (DPRCSSize > 0) {
-    DPRCSPush = MBBI;
     // Since vpush register list cannot have gaps, there may be multiple vpush
     // instructions in the prologue.
-    while (MBBI->getOpcode() == ARM::VSTMDDB_UPD)
+    while (MBBI->getOpcode() == ARM::VSTMDDB_UPD) {
+      DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(MBBI));
       LastPush = MBBI++;
+    }
   }
 
   // Move past the aligned DPRCS2 area.
@@ -343,18 +411,15 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
     NumBytes = 0;
   }
 
-  unsigned adjustedGPRCS1Size = GPRCS1Size;
   if (NumBytes) {
     // Adjust SP after all the callee-save spills.
-    if (tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, NumBytes)) {
-      if (LastPush == GPRCS1Push) {
-        FramePtrOffsetInPush += NumBytes;
-        adjustedGPRCS1Size += NumBytes;
-        NumBytes = 0;
-      }
-    } else
+    if (tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, NumBytes))
+      DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes);
+    else {
       emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
                    MachineInstr::FrameSetup);
+      DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes);
+    }
 
     if (HasFP && isARM)
       // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
@@ -368,13 +433,40 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
       AFI->setShouldRestoreSPFromFP(true);
   }
 
-  if (adjustedGPRCS1Size > 0) {
-    CFAOffset -= adjustedGPRCS1Size;
-    unsigned CFIIndex = MMI.addFrameInst(
-        MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
-    MachineBasicBlock::iterator Pos = ++GPRCS1Push;
-    BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
-        .addCFIIndex(CFIIndex);
+  // Set FP to point to the stack slot that contains the previous FP.
+  // For iOS, FP is R7, which has now been stored in spill area 1.
+  // Otherwise, if this is not iOS, all the callee-saved registers go
+  // into spill area 1, including the FP in R11.  In either case, it
+  // is in area one and the adjustment needs to take place just after
+  // that push.
+  if (HasFP) {
+    MachineBasicBlock::iterator AfterPush = std::next(GPRCS1Push);
+    unsigned PushSize = sizeOfSPAdjustment(GPRCS1Push);
+    emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush,
+                         dl, TII, FramePtr, ARM::SP,
+                         PushSize + FramePtrOffsetInPush,
+                         MachineInstr::FrameSetup);
+    if (FramePtrOffsetInPush + PushSize != 0) {
+      unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa(
+          nullptr, MRI->getDwarfRegNum(FramePtr, true),
+          -(ArgRegsSaveSize - FramePtrOffsetInPush)));
+      BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+          .addCFIIndex(CFIIndex);
+    } else {
+      unsigned CFIIndex =
+          MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(
+              nullptr, MRI->getDwarfRegNum(FramePtr, true)));
+      BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+          .addCFIIndex(CFIIndex);
+    }
+  }
+
+  // Now that the prologue's actual instructions are finalised, we can insert
+  // the necessary DWARF cf instructions to describe the situation. Start by
+  // recording where each register ended up:
+  if (GPRCS1Size > 0) {
+    MachineBasicBlock::iterator Pos = std::next(GPRCS1Push);
+    int CFIIndex;
     for (const auto &Entry : CSI) {
       unsigned Reg = Entry.getReg();
       int FI = Entry.getFrameIdx();
@@ -405,41 +497,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
     }
   }
 
-  // Set FP to point to the stack slot that contains the previous FP.
-  // For iOS, FP is R7, which has now been stored in spill area 1.
-  // Otherwise, if this is not iOS, all the callee-saved registers go
-  // into spill area 1, including the FP in R11.  In either case, it
-  // is in area one and the adjustment needs to take place just after
-  // that push.
-  if (HasFP) {
-    emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, GPRCS1Push, dl, TII,
-                         FramePtr, ARM::SP, FramePtrOffsetInPush,
-                         MachineInstr::FrameSetup);
-    if (FramePtrOffsetInPush) {
-      CFAOffset += FramePtrOffsetInPush;
-      unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa(
-          nullptr, MRI->getDwarfRegNum(FramePtr, true), CFAOffset));
-      BuildMI(MBB, GPRCS1Push, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
-          .addCFIIndex(CFIIndex);
-
-    } else {
-      unsigned CFIIndex =
-          MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(
-              nullptr, MRI->getDwarfRegNum(FramePtr, true)));
-      BuildMI(MBB, GPRCS1Push, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
-          .addCFIIndex(CFIIndex);
-    }
-  }
-
   if (GPRCS2Size > 0) {
-    MachineBasicBlock::iterator Pos = ++GPRCS2Push;
-    if (!HasFP) {
-      CFAOffset -= GPRCS2Size;
-      unsigned CFIIndex = MMI.addFrameInst(
-          MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
-      BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
-          .addCFIIndex(CFIIndex);
-    }
+    MachineBasicBlock::iterator Pos = std::next(GPRCS2Push);
     for (const auto &Entry : CSI) {
       unsigned Reg = Entry.getReg();
       int FI = Entry.getFrameIdx();
@@ -465,17 +524,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
   if (DPRCSSize > 0) {
     // Since vpush register list cannot have gaps, there may be multiple vpush
     // instructions in the prologue.
-    do {
-      MachineBasicBlock::iterator Push = DPRCSPush++;
-      if (!HasFP) {
-        CFAOffset -= sizeOfSPAdjustment(Push);
-        unsigned CFIIndex = MMI.addFrameInst(
-            MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
-        BuildMI(MBB, DPRCSPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
-            .addCFIIndex(CFIIndex);
-      }
-    } while (DPRCSPush->getOpcode() == ARM::VSTMDDB_UPD);
-
+    MachineBasicBlock::iterator Pos = std::next(LastPush);
     for (const auto &Entry : CSI) {
       unsigned Reg = Entry.getReg();
       int FI = Entry.getFrameIdx();
@@ -485,21 +534,17 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
         unsigned Offset = MFI->getObjectOffset(FI);
         unsigned CFIIndex = MMI.addFrameInst(
             MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
-        BuildMI(MBB, DPRCSPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+        BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
             .addCFIIndex(CFIIndex);
       }
     }
   }
 
-  if (NumBytes) {
-    if (!HasFP) {
-      CFAOffset -= NumBytes;
-      unsigned CFIIndex = MMI.addFrameInst(
-          MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
-      BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
-          .addCFIIndex(CFIIndex);
-    }
-  }
+  // Now we can emit descriptions of where the canonical frame address was
+  // throughout the process. If we have a frame pointer, it takes over the job
+  // half-way through, so only the first few .cfi_def_cfa_offset instructions
+  // actually get emitted.
+  DefCFAOffsetCandidates.emitDefCFAOffsets(MMI, MBB, dl, TII, HasFP);
 
   if (STI.isTargetELF() && hasFP(MF))
     MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
@@ -507,6 +552,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
 
   AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
   AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
+  AFI->setDPRCalleeSavedGapSize(DPRGapSize);
   AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
 
   // If we need dynamic stack realignment, do it here. Be paranoid and make
@@ -574,14 +620,17 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
   DebugLoc dl = MBBI->getDebugLoc();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+  const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
   const ARMBaseInstrInfo &TII =
-    *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+      *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
   assert(!AFI->isThumb1OnlyFunction() &&
          "This emitEpilogue does not support Thumb1!");
   bool isARM = !AFI->isThumbFunction();
 
-  unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
+  unsigned Align = MF.getTarget()
+                       .getSubtargetImpl()
+                       ->getFrameLowering()
+                       ->getStackAlignment();
   unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
   int NumBytes = (int)MFI->getStackSize();
   unsigned FramePtr = RegInfo->getFrameRegister(MF);
@@ -609,6 +658,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
     NumBytes -= (ArgRegsSaveSize +
                  AFI->getGPRCalleeSavedArea1Size() +
                  AFI->getGPRCalleeSavedArea2Size() +
+                 AFI->getDPRCalleeSavedGapSize() +
                  AFI->getDPRCalleeSavedAreaSize());
 
     // Reset SP based on frame pointer only if the stack frame extends beyond
@@ -657,6 +707,12 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
       while (MBBI->getOpcode() == ARM::VLDMDIA_UPD)
         MBBI++;
     }
+    if (AFI->getDPRCalleeSavedGapSize()) {
+      assert(AFI->getDPRCalleeSavedGapSize() == 4 &&
+             "unexpected DPR alignment gap");
+      emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedGapSize());
+    }
+
     if (AFI->getGPRCalleeSavedArea2Size()) MBBI++;
     if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
   }
@@ -717,8 +773,8 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
                                              int FI, unsigned &FrameReg,
                                              int SPAdj) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
-  const ARMBaseRegisterInfo *RegInfo =
-    static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
+  const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
+      MF.getSubtarget().getRegisterInfo());
   const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();
   int FPOffset = Offset - AFI->getFramePtrSpillOffset();
@@ -803,7 +859,7 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
                                     unsigned NumAlignedDPRCS2Regs,
                                     unsigned MIFlags) const {
   MachineFunction &MF = *MBB.getParent();
-  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
 
   DebugLoc DL;
   if (MI != MBB.end()) DL = MI->getDebugLoc();
@@ -876,7 +932,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
                                    bool(*Func)(unsigned, bool),
                                    unsigned NumAlignedDPRCS2Regs) const {
   MachineFunction &MF = *MBB.getParent();
-  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   DebugLoc DL = MI->getDebugLoc();
   unsigned RetOpcode = MI->getOpcode();
@@ -966,7 +1022,7 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
   MachineFunction &MF = *MBB.getParent();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   DebugLoc DL = MI->getDebugLoc();
-  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
   MachineFrameInfo &MFI = *MF.getFrameInfo();
 
   // Mark the D-register spill slots as properly aligned.  Since MFI computes
@@ -1125,7 +1181,7 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
   MachineFunction &MF = *MBB.getParent();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   DebugLoc DL = MI->getDebugLoc();
-  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
 
   // Find the frame index assigned to d8.
   int D8SpillFI = 0;
@@ -1340,12 +1396,15 @@ static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) {
     return;
 
   // Don't bother if the default stack alignment is sufficiently high.
-  if (MF.getTarget().getFrameLowering()->getStackAlignment() >= 8)
+  if (MF.getTarget()
+          .getSubtargetImpl()
+          ->getFrameLowering()
+          ->getStackAlignment() >= 8)
     return;
 
   // Aligned spills require stack realignment.
-  const ARMBaseRegisterInfo *RegInfo =
-    static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
+  const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
+      MF.getSubtarget().getRegisterInfo());
   if (!RegInfo->canRealignStack(MF))
     return;
 
@@ -1384,10 +1443,10 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
   unsigned NumGPRSpills = 0;
   SmallVector<unsigned, 4> UnspilledCS1GPRs;
   SmallVector<unsigned, 4> UnspilledCS2GPRs;
-  const ARMBaseRegisterInfo *RegInfo =
-    static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
+  const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
+      MF.getSubtarget().getRegisterInfo());
   const ARMBaseInstrInfo &TII =
-    *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+      *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -1550,7 +1609,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
     // of GPRs, spill one extra callee save GPR so we won't have to pad between
     // the integer and double callee save areas.
     unsigned TargetAlign = getStackAlignment();
-    if (TargetAlign == 8 && (NumGPRSpills & 1)) {
+    if (TargetAlign >= 8 && (NumGPRSpills & 1)) {
       if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
         for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
           unsigned Reg = UnspilledCS1GPRs[i];
@@ -1628,7 +1687,7 @@ void ARMFrameLowering::
 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator I) const {
   const ARMBaseInstrInfo &TII =
-    *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+      *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
   if (!hasReservedCallFrame(MF)) {
     // If we have alloca, convert as follows:
     // ADJCALLSTACKDOWN -> sub, sp, sp, amount
@@ -1746,7 +1805,7 @@ void ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
   MCContext &Context = MMI.getContext();
   const MCRegisterInfo *MRI = Context.getRegisterInfo();
   const ARMBaseInstrInfo &TII =
-      *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+      *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
   ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>();
   DebugLoc DL;
 
diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h
index 709afbc..a83b773 100644
--- a/lib/Target/ARM/ARMFrameLowering.h
+++ b/lib/Target/ARM/ARMFrameLowering.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef ARM_FRAMEINFO_H
-#define ARM_FRAMEINFO_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMFRAMELOWERING_H
+#define LLVM_LIB_TARGET_ARM_ARMFRAMELOWERING_H
 
 #include "llvm/Target/TargetFrameLowering.h"
 
diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp
index 0885c4e..0e4f81c 100644
--- a/lib/Target/ARM/ARMHazardRecognizer.cpp
+++ b/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -46,8 +46,8 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
       const MCInstrDesc &LastMCID = LastMI->getDesc();
       const TargetMachine &TM =
         MI->getParent()->getParent()->getTarget();
-      const ARMBaseInstrInfo &TII =
-        *static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo());
+      const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo *>(
+                                        TM.getSubtargetImpl()->getInstrInfo());
 
       // Skip over one non-VFP / NEON instruction.
       if (!LastMI->isBarrier() &&
diff --git a/lib/Target/ARM/ARMHazardRecognizer.h b/lib/Target/ARM/ARMHazardRecognizer.h
index a8198e2..ccf09db 100644
--- a/lib/Target/ARM/ARMHazardRecognizer.h
+++ b/lib/Target/ARM/ARMHazardRecognizer.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef ARMHAZARDRECOGNIZER_H
-#define ARMHAZARDRECOGNIZER_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMHAZARDRECOGNIZER_H
+#define LLVM_LIB_TARGET_ARM_ARMHAZARDRECOGNIZER_H
 
 #include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
 
@@ -46,4 +46,4 @@ public:
 
 } // end namespace llvm
 
-#endif // ARMHAZARDRECOGNIZER_H
+#endif
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 38547cf..6941579 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -425,7 +425,7 @@ bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
     return true;
   if (Use->isMachineOpcode()) {
     const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
-        CurDAG->getTarget().getInstrInfo());
+        CurDAG->getSubtarget().getInstrInfo());
 
     const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
     if (MCID.mayStore())
@@ -526,8 +526,7 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
     if (N.getOpcode() == ISD::FrameIndex) {
       // Match frame index.
       int FI = cast<FrameIndexSDNode>(N)->getIndex();
-      Base = CurDAG->getTargetFrameIndex(FI,
-                                         getTargetLowering()->getPointerTy());
+      Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
       OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
       return true;
     }
@@ -542,16 +541,15 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
   }
 
   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
-    int RHSC = (int)RHS->getZExtValue();
+    int RHSC = (int)RHS->getSExtValue();
     if (N.getOpcode() == ISD::SUB)
       RHSC = -RHSC;
 
-    if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
+    if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
       Base   = N.getOperand(0);
       if (Base.getOpcode() == ISD::FrameIndex) {
         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
-        Base = CurDAG->getTargetFrameIndex(FI,
-                                           getTargetLowering()->getPointerTy());
+        Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
       }
       OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
       return true;
@@ -697,8 +695,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
     Base = N;
     if (N.getOpcode() == ISD::FrameIndex) {
       int FI = cast<FrameIndexSDNode>(N)->getIndex();
-      Base = CurDAG->getTargetFrameIndex(FI,
-                                         getTargetLowering()->getPointerTy());
+      Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
     } else if (N.getOpcode() == ARMISD::Wrapper &&
                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
       Base = N.getOperand(0);
@@ -718,8 +715,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
       Base = N.getOperand(0);
       if (Base.getOpcode() == ISD::FrameIndex) {
         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
-        Base = CurDAG->getTargetFrameIndex(FI,
-                                           getTargetLowering()->getPointerTy());
+        Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
       }
       Offset = CurDAG->getRegister(0, MVT::i32);
 
@@ -896,8 +892,7 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
     Base = N;
     if (N.getOpcode() == ISD::FrameIndex) {
       int FI = cast<FrameIndexSDNode>(N)->getIndex();
-      Base = CurDAG->getTargetFrameIndex(FI,
-                                         getTargetLowering()->getPointerTy());
+      Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
     }
     Offset = CurDAG->getRegister(0, MVT::i32);
     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0),MVT::i32);
@@ -911,8 +906,7 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
     Base = N.getOperand(0);
     if (Base.getOpcode() == ISD::FrameIndex) {
       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
-      Base = CurDAG->getTargetFrameIndex(FI,
-                                         getTargetLowering()->getPointerTy());
+      Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
     }
     Offset = CurDAG->getRegister(0, MVT::i32);
 
@@ -957,8 +951,7 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
     Base = N;
     if (N.getOpcode() == ISD::FrameIndex) {
       int FI = cast<FrameIndexSDNode>(N)->getIndex();
-      Base = CurDAG->getTargetFrameIndex(FI,
-                                         getTargetLowering()->getPointerTy());
+      Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
     } else if (N.getOpcode() == ARMISD::Wrapper &&
                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
       Base = N.getOperand(0);
@@ -975,8 +968,7 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
     Base = N.getOperand(0);
     if (Base.getOpcode() == ISD::FrameIndex) {
       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
-      Base = CurDAG->getTargetFrameIndex(FI,
-                                         getTargetLowering()->getPointerTy());
+      Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
     }
 
     ARM_AM::AddrOpc AddSub = ARM_AM::add;
@@ -1199,8 +1191,7 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
                                             SDValue &Base, SDValue &OffImm) {
   if (N.getOpcode() == ISD::FrameIndex) {
     int FI = cast<FrameIndexSDNode>(N)->getIndex();
-    Base = CurDAG->getTargetFrameIndex(FI,
-                                       getTargetLowering()->getPointerTy());
+    Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
     OffImm = CurDAG->getTargetConstant(0, MVT::i32);
     return true;
   }
@@ -1217,8 +1208,7 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
       Base = N.getOperand(0);
       if (Base.getOpcode() == ISD::FrameIndex) {
         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
-        Base = CurDAG->getTargetFrameIndex(FI,
-                                           getTargetLowering()->getPointerTy());
+        Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
       }
       OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
       return true;
@@ -1266,8 +1256,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
     if (N.getOpcode() == ISD::FrameIndex) {
       // Match frame index.
       int FI = cast<FrameIndexSDNode>(N)->getIndex();
-      Base = CurDAG->getTargetFrameIndex(FI,
-                                         getTargetLowering()->getPointerTy());
+      Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
       OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
       return true;
     }
@@ -1296,8 +1285,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
       Base   = N.getOperand(0);
       if (Base.getOpcode() == ISD::FrameIndex) {
         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
-        Base = CurDAG->getTargetFrameIndex(FI,
-                                           getTargetLowering()->getPointerTy());
+        Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
       }
       OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
       return true;
@@ -1326,8 +1314,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
       Base = N.getOperand(0);
       if (Base.getOpcode() == ISD::FrameIndex) {
         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
-        Base = CurDAG->getTargetFrameIndex(FI,
-                                           getTargetLowering()->getPointerTy());
+        Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
       }
       OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
       return true;
@@ -1392,10 +1379,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
         OffReg = OffReg.getOperand(0);
       else {
         ShAmt = 0;
-        ShOpcVal = ARM_AM::no_shift;
       }
-    } else {
-      ShOpcVal = ARM_AM::no_shift;
     }
   }
 
@@ -1425,7 +1409,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
   Base = N.getOperand(0);
   if (Base.getOpcode() == ISD::FrameIndex) {
     int FI = cast<FrameIndexSDNode>(Base)->getIndex();
-    Base = CurDAG->getTargetFrameIndex(FI, getTargetLowering()->getPointerTy());
+    Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
   }
 
   OffImm = CurDAG->getTargetConstant(RHSC / 4, MVT::i32);
@@ -2361,6 +2345,25 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
       return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
     }
   }
+
+  if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
+    unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
+    unsigned LSB = 0;
+    if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
+        !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
+      return nullptr;
+
+    if (LSB + Width > 32)
+      return nullptr;
+
+    SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+    SDValue Ops[] = { N->getOperand(0).getOperand(0),
+                      CurDAG->getTargetConstant(LSB, MVT::i32),
+                      CurDAG->getTargetConstant(Width - 1, MVT::i32),
+                      getAL(CurDAG), Reg0 };
+    return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
+  }
+
   return nullptr;
 }
 
@@ -2457,10 +2460,9 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     }
 
     if (UseCP) {
-      SDValue CPIdx =
-        CurDAG->getTargetConstantPool(ConstantInt::get(
-                                  Type::getInt32Ty(*CurDAG->getContext()), Val),
-                                      getTargetLowering()->getPointerTy());
+      SDValue CPIdx = CurDAG->getTargetConstantPool(
+          ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
+          TLI->getPointerTy());
 
       SDNode *ResNode;
       if (Subtarget->isThumb()) {
@@ -2490,12 +2492,10 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
   case ISD::FrameIndex: {
     // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
     int FI = cast<FrameIndexSDNode>(N)->getIndex();
-    SDValue TFI = CurDAG->getTargetFrameIndex(FI,
-                                           getTargetLowering()->getPointerTy());
+    SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
     if (Subtarget->isThumb1Only()) {
-      SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
-                        getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
-      return CurDAG->SelectNodeTo(N, ARM::tADDrSPi, MVT::i32, Ops);
+      return CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
+                                  CurDAG->getTargetConstant(0, MVT::i32));
     } else {
       unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
                       ARM::t2ADDri : ARM::ADDri);
@@ -2509,6 +2509,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
       return I;
     break;
+  case ISD::SIGN_EXTEND_INREG:
   case ISD::SRA:
     if (SDNode *I = SelectV6T2BitfieldExtractOp(N, true))
       return I;
@@ -3393,7 +3394,6 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){
       std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
       Ops.push_back(T1.getValue(1));
       CurDAG->UpdateNodeOperands(GU, Ops);
-      GU = T1.getNode();
     }
     else {
       // For Kind  == InlineAsm::Kind_RegUse, we first copy two GPRs into a
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 4bfa5a8..0d0d81f 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -29,6 +29,7 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
@@ -70,9 +71,9 @@ namespace {
   class ARMCCState : public CCState {
   public:
     ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
-               const TargetMachine &TM, SmallVectorImpl<CCValAssign> &locs,
-               LLVMContext &C, ParmContext PC)
-        : CCState(CC, isVarArg, MF, TM, locs, C) {
+               SmallVectorImpl<CCValAssign> &locs, LLVMContext &C,
+               ParmContext PC)
+        : CCState(CC, isVarArg, MF, locs, C) {
       assert(((PC == Call) || (PC == Prologue)) &&
              "ARMCCState users must specify whether their context is call"
              "or prologue generation.");
@@ -155,19 +156,11 @@ void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
   addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
 }
 
-static TargetLoweringObjectFile *createTLOF(const Triple &TT) {
-  if (TT.isOSBinFormatMachO())
-    return new TargetLoweringObjectFileMachO();
-  if (TT.isOSWindows())
-    return new TargetLoweringObjectFileCOFF();
-  return new ARMElfTargetObjectFile();
-}
-
-ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
-    : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))) {
+ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM)
+    : TargetLowering(TM) {
   Subtarget = &TM.getSubtarget<ARMSubtarget>();
-  RegInfo = TM.getRegisterInfo();
-  Itins = TM.getInstrItineraryData();
+  RegInfo = TM.getSubtargetImpl()->getRegisterInfo();
+  Itins = TM.getSubtargetImpl()->getInstrItineraryData();
 
   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
 
@@ -312,6 +305,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
       // Conversions between floating types.
       // RTABI chapter 4.1.2, Table 7
       { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+      { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
       { RTLIB::FPEXT_F32_F64,   "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
 
       // Integer to floating-point conversions.
@@ -387,6 +381,19 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
   }
 
+  // The half <-> float conversion functions are always soft-float, but are
+  // needed for some targets which use a hard-float calling convention by
+  // default.
+  if (Subtarget->isAAPCS_ABI()) {
+    setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
+  } else {
+    setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
+    setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
+    setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
+  }
+
   if (Subtarget->isThumb1Only())
     addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
   else
@@ -394,10 +401,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
       !Subtarget->isThumb1Only()) {
     addRegisterClass(MVT::f32, &ARM::SPRRegClass);
-    if (!Subtarget->isFPOnlySP())
-      addRegisterClass(MVT::f64, &ARM::DPRRegClass);
-
-    setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+    addRegisterClass(MVT::f64, &ARM::DPRRegClass);
   }
 
   for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
@@ -579,11 +583,50 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   if (!Subtarget->isThumb1Only())
     setTargetDAGCombine(ISD::ADDC);
 
+  if (Subtarget->isFPOnlySP()) {
+    // When targetting a floating-point unit with only single-precision
+    // operations, f64 is legal for the few double-precision instructions which
+    // are present However, no double-precision operations other than moves,
+    // loads and stores are provided by the hardware.
+    setOperationAction(ISD::FADD,       MVT::f64, Expand);
+    setOperationAction(ISD::FSUB,       MVT::f64, Expand);
+    setOperationAction(ISD::FMUL,       MVT::f64, Expand);
+    setOperationAction(ISD::FMA,        MVT::f64, Expand);
+    setOperationAction(ISD::FDIV,       MVT::f64, Expand);
+    setOperationAction(ISD::FREM,       MVT::f64, Expand);
+    setOperationAction(ISD::FCOPYSIGN,  MVT::f64, Expand);
+    setOperationAction(ISD::FGETSIGN,   MVT::f64, Expand);
+    setOperationAction(ISD::FNEG,       MVT::f64, Expand);
+    setOperationAction(ISD::FABS,       MVT::f64, Expand);
+    setOperationAction(ISD::FSQRT,      MVT::f64, Expand);
+    setOperationAction(ISD::FSIN,       MVT::f64, Expand);
+    setOperationAction(ISD::FCOS,       MVT::f64, Expand);
+    setOperationAction(ISD::FPOWI,      MVT::f64, Expand);
+    setOperationAction(ISD::FPOW,       MVT::f64, Expand);
+    setOperationAction(ISD::FLOG,       MVT::f64, Expand);
+    setOperationAction(ISD::FLOG2,      MVT::f64, Expand);
+    setOperationAction(ISD::FLOG10,     MVT::f64, Expand);
+    setOperationAction(ISD::FEXP,       MVT::f64, Expand);
+    setOperationAction(ISD::FEXP2,      MVT::f64, Expand);
+    setOperationAction(ISD::FCEIL,      MVT::f64, Expand);
+    setOperationAction(ISD::FTRUNC,     MVT::f64, Expand);
+    setOperationAction(ISD::FRINT,      MVT::f64, Expand);
+    setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
+    setOperationAction(ISD::FFLOOR,     MVT::f64, Expand);
+    setOperationAction(ISD::FP_ROUND,   MVT::f32, Custom);
+    setOperationAction(ISD::FP_EXTEND,  MVT::f64, Custom);
+  }
 
   computeRegisterProperties();
 
-  // ARM does not have f32 extending load.
+  // ARM does not have floating-point extending loads.
   setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+  setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand);
+
+  // ... or truncating stores
+  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+  setTruncStoreAction(MVT::f32, MVT::f16, Expand);
+  setTruncStoreAction(MVT::f64, MVT::f16, Expand);
 
   // ARM does not have i1 sign extending load.
   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
@@ -716,8 +759,12 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
 
   // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
-  // the default expansion.
-  if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) {
+  // the default expansion. If we are targeting a single threaded system,
+  // then set them all for expand so we can lower them later into their
+  // non-atomic form.
+  if (TM.Options.ThreadModel == ThreadModel::Single)
+    setOperationAction(ISD::ATOMIC_FENCE,   MVT::Other, Expand);
+  else if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) {
     // ATOMIC_FENCE needs custom lowering; the others should have been expanded
     // to ldrex/strex loops already.
     setOperationAction(ISD::ATOMIC_FENCE,     MVT::Other, Custom);
@@ -725,7 +772,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     // On v8, we have particularly efficient implementations of atomic fences
     // if they can be combined with nearby atomic loads and stores.
     if (!Subtarget->hasV8Ops()) {
-      // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc.
+      // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
       setInsertFencesForAtomic(true);
     }
   } else {
@@ -825,10 +872,17 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
       setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
     }
-    // Special handling for half-precision FP.
+
+    // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
+    if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) {
+      setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
+      setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
+    }
+
+    // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
     if (!Subtarget->hasFP16()) {
-      setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand);
-      setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
+      setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
+      setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
     }
   }
 
@@ -836,7 +890,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   if (Subtarget->hasSinCos()) {
     setLibcallName(RTLIB::SINCOS_F32, "sincosf");
     setLibcallName(RTLIB::SINCOS_F64, "sincos");
-    if (Subtarget->getTargetTriple().getOS() == Triple::IOS) {
+    if (Subtarget->getTargetTriple().isiOS()) {
       // For iOS, we don't want to the normal expansion of a libcall to
       // sincos. We want to issue a libcall to __sincos_stret.
       setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
@@ -844,6 +898,23 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     }
   }
 
+  // FP-ARMv8 implements a lot of rounding-like FP operations.
+  if (Subtarget->hasFPARMv8()) {
+    setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
+    setOperationAction(ISD::FCEIL, MVT::f32, Legal);
+    setOperationAction(ISD::FROUND, MVT::f32, Legal);
+    setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
+    setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
+    setOperationAction(ISD::FRINT, MVT::f32, Legal);
+    if (!Subtarget->isFPOnlySP()) {
+      setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
+      setOperationAction(ISD::FCEIL, MVT::f64, Legal);
+      setOperationAction(ISD::FROUND, MVT::f64, Legal);
+      setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
+      setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
+      setOperationAction(ISD::FRINT, MVT::f64, Legal);
+    }
+  }
   // We have target-specific dag combine patterns for the following nodes:
   // ARMISD::VMOVRRD  - No need to call setTargetDAGCombine
   setTargetDAGCombine(ISD::ADD);
@@ -1119,7 +1190,8 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
 
   // Load are scheduled for latency even if there instruction itinerary
   // is not available.
-  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  const TargetInstrInfo *TII =
+      getTargetMachine().getSubtargetImpl()->getInstrInfo();
   const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
 
   if (MCID.getNumDefs() == 0)
@@ -1256,8 +1328,8 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
 
   // Assign locations to each value returned by this call.
   SmallVector<CCValAssign, 16> RVLocs;
-  ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
-                    getTargetMachine(), RVLocs, *DAG.getContext(), Call);
+  ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+                    *DAG.getContext(), Call);
   CCInfo.AnalyzeCallResult(Ins,
                            CCAssignFnForNode(CallConv, /* Return*/ true,
                                              isVarArg));
@@ -1417,8 +1489,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
   // Analyze operands of the call, assigning locations to each operand.
   SmallVector<CCValAssign, 16> ArgLocs;
-  ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
-                 getTargetMachine(), ArgLocs, *DAG.getContext(), Call);
+  ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+                    *DAG.getContext(), Call);
   CCInfo.AnalyzeCallOperands(Outs,
                              CCAssignFnForNode(CallConv, /* Return*/ false,
                                                isVarArg));
@@ -1510,7 +1582,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       // True if this byval aggregate will be split between registers
       // and memory.
       unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
-      unsigned CurByValIdx = CCInfo.getInRegsParamsProceed();
+      unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
 
       if (CurByValIdx < ByValArgsCount) {
 
@@ -1646,14 +1718,17 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     bool isExt = GV->isDeclaration() || GV->isWeakForLinker();
     bool isStub = (isExt && Subtarget->isTargetMachO()) &&
                    getTargetMachine().getRelocationModel() != Reloc::Static;
-    isARMFunc = !Subtarget->isThumb() || isStub;
+    isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
     // ARM call to a local ARM function is predicable.
     isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking);
     // tBX takes a register source operand.
     if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
       assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
       Callee = DAG.getNode(ARMISD::WrapperPIC, dl, getPointerTy(),
-                           DAG.getTargetGlobalAddress(GV, dl, getPointerTy()));
+                           DAG.getTargetGlobalAddress(GV, dl, getPointerTy(),
+                                                      0, ARMII::MO_NONLAZY));
+      Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee,
+                           MachinePointerInfo::getGOT(), false, false, true, 0);
     } else if (Subtarget->isTargetCOFF()) {
       assert(Subtarget->isTargetWindows() &&
              "Windows is the only supported COFF target");
@@ -1679,7 +1754,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     isDirect = true;
     bool isStub = Subtarget->isTargetMachO() &&
                   getTargetMachine().getRelocationModel() != Reloc::Static;
-    isARMFunc = !Subtarget->isThumb() || isStub;
+    isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
     // tBX takes a register source operand.
     const char *Sym = S->getSymbol();
     if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
@@ -1740,7 +1815,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   // Add a register mask operand representing the call-preserved registers.
   if (!isTailCall) {
     const uint32_t *Mask;
-    const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+    const TargetRegisterInfo *TRI =
+        getTargetMachine().getSubtargetImpl()->getRegisterInfo();
     const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo*>(TRI);
     if (isThisReturn) {
       // For 'this' returns, use the R0-preserving mask if applicable
@@ -1940,17 +2016,30 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
   if (Subtarget->isThumb1Only())
     return false;
 
+  // Externally-defined functions with weak linkage should not be
+  // tail-called on ARM when the OS does not support dynamic
+  // pre-emption of symbols, as the AAELF spec requires normal calls
+  // to undefined weak functions to be replaced with a NOP or jump to the
+  // next instruction. The behaviour of branch instructions in this
+  // situation (as used for tail calls) is implementation-defined, so we
+  // cannot rely on the linker replacing the tail call with a return.
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+    const GlobalValue *GV = G->getGlobal();
+    if (GV->hasExternalWeakLinkage())
+      return false;
+  }
+
   // If the calling conventions do not match, then we'd better make sure the
   // results are returned in the same way as what the caller expects.
   if (!CCMatch) {
     SmallVector<CCValAssign, 16> RVLocs1;
-    ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
-                       getTargetMachine(), RVLocs1, *DAG.getContext(), Call);
+    ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), RVLocs1,
+                       *DAG.getContext(), Call);
     CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg));
 
     SmallVector<CCValAssign, 16> RVLocs2;
-    ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
-                       getTargetMachine(), RVLocs2, *DAG.getContext(), Call);
+    ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), RVLocs2,
+                       *DAG.getContext(), Call);
     CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg));
 
     if (RVLocs1.size() != RVLocs2.size())
@@ -1984,8 +2073,8 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
     // Check if stack adjustment is needed. For now, do not do this if any
     // argument is passed on the stack.
     SmallVector<CCValAssign, 16> ArgLocs;
-    ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
-                      getTargetMachine(), ArgLocs, *DAG.getContext(), Call);
+    ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs,
+                      *DAG.getContext(), Call);
     CCInfo.AnalyzeCallOperands(Outs,
                                CCAssignFnForNode(CalleeCC, false, isVarArg));
     if (CCInfo.getNextStackOffset()) {
@@ -1995,7 +2084,8 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
       // the caller's fixed stack objects.
       MachineFrameInfo *MFI = MF.getFrameInfo();
       const MachineRegisterInfo *MRI = &MF.getRegInfo();
-      const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+      const TargetInstrInfo *TII =
+          getTargetMachine().getSubtargetImpl()->getInstrInfo();
       for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
            i != e;
            ++i, ++realArgIdx) {
@@ -2038,7 +2128,7 @@ ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
                                   LLVMContext &Context) const {
   SmallVector<CCValAssign, 16> RVLocs;
-  CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context);
+  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
   return CCInfo.CheckReturn(Outs, CCAssignFnForNode(CallConv, /*Return=*/true,
                                                     isVarArg));
 }
@@ -2086,8 +2176,8 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
   SmallVector<CCValAssign, 16> RVLocs;
 
   // CCState - Info about the registers and stack slots.
-  ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
-                    getTargetMachine(), RVLocs, *DAG.getContext(), Call);
+  ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+                    *DAG.getContext(), Call);
 
   // Analyze outgoing return values.
   CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,
@@ -2098,6 +2188,10 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
   RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
   bool isLittleEndian = Subtarget->isLittle();
 
+  MachineFunction &MF = DAG.getMachineFunction();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  AFI->setReturnRegsCount(RVLocs.size());
+
   // Copy the result values into the output registers.
   for (unsigned i = 0, realRVLocIdx = 0;
        i != RVLocs.size();
@@ -2216,9 +2310,15 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
       if (Copies.count(UseChain.getNode()))
         // Second CopyToReg
         Copy = *UI;
-      else
+      else {
+        // We are at the top of this chain.
+        // If the copy has a glue operand, we conservatively assume it
+        // isn't safe to perform a tail call.
+        if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue)
+          return false;
         // First CopyToReg
         TCChain = UseChain;
+      }
     }
   } else if (Copy->getOpcode() == ISD::BITCAST) {
     // f32 returned in a single GPR.
@@ -2227,6 +2327,10 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
     Copy = *Copy->use_begin();
     if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
       return false;
+    // If the copy has a glue operand, we conservatively assume it isn't safe to
+    // perform a tail call.
+    if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
+      return false;
     TCChain = Copy->getOperand(0);
   } else {
     return false;
@@ -2567,9 +2671,9 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
   switch (IntNo) {
   default: return SDValue();    // Don't custom lower most intrinsics.
   case Intrinsic::arm_rbit: {
-    assert(Op.getOperand(0).getValueType() == MVT::i32 &&
+    assert(Op.getOperand(1).getValueType() == MVT::i32 &&
            "RBIT intrinsic must have i32 type!");
-    return DAG.getNode(ARMISD::RBIT, dl, MVT::i32, Op.getOperand(0));
+    return DAG.getNode(ARMISD::RBIT, dl, MVT::i32, Op.getOperand(1));
   }
   case Intrinsic::arm_thread_pointer: {
     EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
@@ -2626,7 +2730,7 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
 
   ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
   AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
-  unsigned Domain = ARM_MB::ISH;
+  ARM_MB::MemBOpt Domain = ARM_MB::ISH;
   if (Subtarget->isMClass()) {
     // Only a full system barrier exists in the M-class architectures.
     Domain = ARM_MB::SY;
@@ -2739,7 +2843,10 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
     NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0;
   }
 
-  unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
+  unsigned Align = MF.getTarget()
+                       .getSubtargetImpl()
+                       ->getFrameLowering()
+                       ->getStackAlignment();
   ArgRegsSize = NumGPRs * 4;
 
   // If parameter is split between stack and GPRs...
@@ -2916,8 +3023,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
 
   // Assign locations to all of the incoming arguments.
   SmallVector<CCValAssign, 16> ArgLocs;
-  ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
-                    getTargetMachine(), ArgLocs, *DAG.getContext(), Prologue);
+  ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+                    *DAG.getContext(), Prologue);
   CCInfo.AnalyzeFormalArguments(Ins,
                                 CCAssignFnForNode(CallConv, /* Return*/ false,
                                                   isVarArg));
@@ -2951,7 +3058,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
         if (Flags.isByVal()) {
           unsigned ExtraArgRegsSize;
           unsigned ExtraArgRegsSaveSize;
-          computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsProceed(),
+          computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsProcessed(),
                          Flags.getByValSize(),
                          ExtraArgRegsSize, ExtraArgRegsSaveSize);
 
@@ -2966,7 +3073,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
   }
   CCInfo.rewindByValRegsInfo();
   lastInsIndex = -1;
-  if (isVarArg) {
+  if (isVarArg && MFI->hasVAStart()) {
     unsigned ExtraArgRegsSize;
     unsigned ExtraArgRegsSaveSize;
     computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsCount(), 0,
@@ -3075,7 +3182,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
           // Since they could be overwritten by lowering of arguments in case of
           // a tail call.
           if (Flags.isByVal()) {
-            unsigned CurByValIndex = CCInfo.getInRegsParamsProceed();
+            unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
 
             ByValStoreOffset = RoundUpToAlignment(ByValStoreOffset, Flags.getByValAlign());
             int FrameIndex = StoreByValRegs(
@@ -3108,7 +3215,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
   }
 
   // varargs
-  if (isVarArg)
+  if (isVarArg && MFI->hasVAStart())
     VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
                          CCInfo.getNextStackOffset(),
                          TotalArgRegsSaveSize);
@@ -3130,6 +3237,18 @@ static bool isFloatingPointZero(SDValue Op) {
         if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
           return CFP->getValueAPF().isPosZero();
     }
+  } else if (Op->getOpcode() == ISD::BITCAST &&
+             Op->getValueType(0) == MVT::f64) {
+    // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
+    // created by LowerConstantFP().
+    SDValue BitcastOp = Op->getOperand(0);
+    if (BitcastOp->getOpcode() == ARMISD::VMOVIMM) {
+      SDValue MoveOp = BitcastOp->getOperand(0);
+      if (MoveOp->getOpcode() == ISD::TargetConstant &&
+          cast<ConstantSDNode>(MoveOp)->getZExtValue() == 0) {
+        return true;
+      }
+    }
   }
   return false;
 }
@@ -3198,6 +3317,7 @@ ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
 SDValue
 ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
                              SDLoc dl) const {
+  assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64);
   SDValue Cmp;
   if (!isFloatingPointZero(RHS))
     Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS);
@@ -3313,9 +3433,8 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
     SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
     EVT VT = Op.getValueType();
 
-    return DAG.getNode(ARMISD::CMOV, SDLoc(Op), VT, SelectTrue, SelectFalse,
-                       ARMcc, CCR, OverflowCmp);
-
+    return getCMOV(SDLoc(Op), VT, SelectTrue, SelectFalse, ARMcc, CCR,
+                   OverflowCmp, DAG);
   }
 
   // Convert:
@@ -3349,7 +3468,7 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
         SDValue CCR = Cond.getOperand(3);
         SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
         assert(True.getValueType() == VT);
-        return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp);
+        return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG);
       }
     }
   }
@@ -3419,6 +3538,32 @@ static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
   }
 }
 
+SDValue ARMTargetLowering::getCMOV(SDLoc dl, EVT VT, SDValue FalseVal,
+                                   SDValue TrueVal, SDValue ARMcc, SDValue CCR,
+                                   SDValue Cmp, SelectionDAG &DAG) const {
+  if (Subtarget->isFPOnlySP() && VT == MVT::f64) {
+    FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl,
+                           DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
+    TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl,
+                          DAG.getVTList(MVT::i32, MVT::i32), TrueVal);
+
+    SDValue TrueLow = TrueVal.getValue(0);
+    SDValue TrueHigh = TrueVal.getValue(1);
+    SDValue FalseLow = FalseVal.getValue(0);
+    SDValue FalseHigh = FalseVal.getValue(1);
+
+    SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,
+                              ARMcc, CCR, Cmp);
+    SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,
+                               ARMcc, CCR, duplicateCmp(Cmp, DAG));
+
+    return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);
+  } else {
+    return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
+                       Cmp);
+  }
+}
+
 SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   SDValue LHS = Op.getOperand(0);
@@ -3428,6 +3573,18 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   SDValue FalseVal = Op.getOperand(3);
   SDLoc dl(Op);
 
+  if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
+    DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
+                                                    dl);
+
+    // If softenSetCCOperands only returned one value, we should compare it to
+    // zero.
+    if (!RHS.getNode()) {
+      RHS = DAG.getConstant(0, LHS.getValueType());
+      CC = ISD::SETNE;
+    }
+  }
+
   if (LHS.getValueType() == MVT::i32) {
     // Try to generate VSEL on ARMv8.
     // The VSEL instruction can't use all the usual ARM condition
@@ -3452,8 +3609,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
     SDValue ARMcc;
     SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
     SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
-    return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
-                       Cmp);
+    return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
   }
 
   ARMCC::CondCodes CondCode, CondCode2;
@@ -3468,12 +3624,18 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
     //   select c, a, b
     // We only do this in unsafe-fp-math, because signed zeros and NaNs are
     // handled differently than the original code sequence.
-    if (getTargetMachine().Options.UnsafeFPMath && LHS == TrueVal &&
-        RHS == FalseVal) {
-      if (CC == ISD::SETOGT || CC == ISD::SETUGT)
-        return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal);
-      if (CC == ISD::SETOLT || CC == ISD::SETULT)
-        return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal);
+    if (getTargetMachine().Options.UnsafeFPMath) {
+      if (LHS == TrueVal && RHS == FalseVal) {
+        if (CC == ISD::SETOGT || CC == ISD::SETUGT)
+          return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal);
+        if (CC == ISD::SETOLT || CC == ISD::SETULT)
+          return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal);
+      } else if (LHS == FalseVal && RHS == TrueVal) {
+        if (CC == ISD::SETOLT || CC == ISD::SETULT)
+          return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal);
+        if (CC == ISD::SETOGT || CC == ISD::SETUGT)
+          return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal);
+      }
     }
 
     bool swpCmpOps = false;
@@ -3492,14 +3654,12 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
   SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
-  SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
-                               ARMcc, CCR, Cmp);
+  SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
   if (CondCode2 != ARMCC::AL) {
     SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32);
     // FIXME: Needs another CMP because flag can have but one use.
     SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
-    Result = DAG.getNode(ARMISD::CMOV, dl, VT,
-                         Result, TrueVal, ARMcc2, CCR, Cmp2);
+    Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
   }
   return Result;
 }
@@ -3632,6 +3792,18 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
   SDValue Dest = Op.getOperand(4);
   SDLoc dl(Op);
 
+  if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
+    DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
+                                                    dl);
+
+    // If softenSetCCOperands only returned one value, we should compare it to
+    // zero.
+    if (!RHS.getNode()) {
+      RHS = DAG.getConstant(0, LHS.getValueType());
+      CC = ISD::SETNE;
+    }
+  }
+
   if (LHS.getValueType() == MVT::i32) {
     SDValue ARMcc;
     SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
@@ -3724,11 +3896,23 @@ static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
   return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
 }
 
-static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
+SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   if (VT.isVector())
     return LowerVectorFP_TO_INT(Op, DAG);
 
+  if (Subtarget->isFPOnlySP() && Op.getOperand(0).getValueType() == MVT::f64) {
+    RTLIB::Libcall LC;
+    if (Op.getOpcode() == ISD::FP_TO_SINT)
+      LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(),
+                              Op.getValueType());
+    else
+      LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(),
+                              Op.getValueType());
+    return makeLibCall(DAG, LC, Op.getValueType(), &Op.getOperand(0), 1,
+                       /*isSigned*/ false, SDLoc(Op)).first;
+  }
+
   SDLoc dl(Op);
   unsigned Opc;
 
@@ -3778,11 +3962,23 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
   return DAG.getNode(Opc, dl, VT, Op);
 }
 
-static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   if (VT.isVector())
     return LowerVectorINT_TO_FP(Op, DAG);
 
+  if (Subtarget->isFPOnlySP() && Op.getValueType() == MVT::f64) {
+    RTLIB::Libcall LC;
+    if (Op.getOpcode() == ISD::SINT_TO_FP)
+      LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(),
+                              Op.getValueType());
+    else
+      LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(),
+                              Op.getValueType());
+    return makeLibCall(DAG, LC, Op.getValueType(), &Op.getOperand(0), 1,
+                       /*isSigned*/ false, SDLoc(Op)).first;
+  }
+
   SDLoc dl(Op);
   unsigned Opc;
 
@@ -4291,7 +4487,7 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
   ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
   SDLoc dl(Op);
 
-  if (Op.getOperand(1).getValueType().isFloatingPoint()) {
+  if (Op1.getValueType().isFloatingPoint()) {
     switch (SetCCOpcode) {
     default: llvm_unreachable("Illegal FP comparison");
     case ISD::SETUNE:
@@ -4555,6 +4751,11 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
   bool IsDouble = Op.getValueType() == MVT::f64;
   ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
 
+  // Use the default (constant pool) lowering for double constants when we have
+  // an SP-only FPU
+  if (IsDouble && Subtarget->isFPOnlySP())
+    return SDValue();
+
   // Try splatting with a VMOV.f32...
   APFloat FPVal = CFP->getValueAPF();
   int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
@@ -5733,7 +5934,7 @@ static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) {
   // operation legalization where we can't create illegal types.
   return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,
                         LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
-                        LD->getMemoryVT(), LD->isVolatile(),
+                        LD->getMemoryVT(), LD->isVolatile(), LD->isInvariant(),
                         LD->isNonTemporal(), LD->getAlignment());
 }
 
@@ -6088,7 +6289,7 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 
   // Pair of floats / doubles used to pass the result.
-  StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL);
+  StructType *RetTy = StructType::get(ArgTy, ArgTy, nullptr);
 
   // Create stack object for sret.
   const uint64_t ByteSize = TLI.getDataLayout()->getTypeAllocSize(RetTy);
@@ -6258,6 +6459,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
     if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
       return LowerDYNAMIC_STACKALLOC(Op, DAG);
     llvm_unreachable("Don't know how to custom lower this!");
+  case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
+  case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
   }
 }
 
@@ -6294,7 +6497,8 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
 void ARMTargetLowering::
 SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
                        MachineBasicBlock *DispatchBB, int FI) const {
-  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  const TargetInstrInfo *TII =
+      getTargetMachine().getSubtargetImpl()->getInstrInfo();
   DebugLoc dl = MI->getDebugLoc();
   MachineFunction *MF = MBB->getParent();
   MachineRegisterInfo *MRI = &MF->getRegInfo();
@@ -6377,9 +6581,9 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
                    .addReg(NewVReg2, RegState::Kill)
                    .addReg(NewVReg3, RegState::Kill));
     unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
-    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tADDrSPi), NewVReg5)
-                   .addFrameIndex(FI)
-                   .addImm(36)); // &jbuf[1] :: pc
+    BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5)
+            .addFrameIndex(FI)
+            .addImm(36); // &jbuf[1] :: pc
     AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
                    .addReg(NewVReg4, RegState::Kill)
                    .addReg(NewVReg5, RegState::Kill)
@@ -6409,7 +6613,8 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
 
 MachineBasicBlock *ARMTargetLowering::
 EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
-  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  const TargetInstrInfo *TII =
+      getTargetMachine().getSubtargetImpl()->getInstrInfo();
   DebugLoc dl = MI->getDebugLoc();
   MachineFunction *MF = MBB->getParent();
   MachineRegisterInfo *MRI = &MF->getRegInfo();
@@ -6738,16 +6943,14 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
   for (std::vector<MachineBasicBlock*>::iterator
          I = LPadList.begin(), E = LPadList.end(); I != E; ++I) {
     MachineBasicBlock *CurMBB = *I;
-    if (SeenMBBs.insert(CurMBB))
+    if (SeenMBBs.insert(CurMBB).second)
       DispContBB->addSuccessor(CurMBB);
   }
 
   // N.B. the order the invoke BBs are processed in doesn't matter here.
   const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF);
   SmallVector<MachineBasicBlock*, 64> MBBLPads;
-  for (SmallPtrSet<MachineBasicBlock*, 64>::iterator
-         I = InvokeBBs.begin(), E = InvokeBBs.end(); I != E; ++I) {
-    MachineBasicBlock *BB = *I;
+  for (MachineBasicBlock *BB : InvokeBBs) {
 
     // Remove the landing pad successor from the invoke block and replace it
     // with the new dispatch block.
@@ -6926,7 +7129,8 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI,
   // This pseudo instruction has 3 operands: dst, src, size
   // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
   // Otherwise, we will generate unrolled scalar copies.
-  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  const TargetInstrInfo *TII =
+      getTargetMachine().getSubtargetImpl()->getInstrInfo();
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
   MachineFunction::iterator It = BB;
   ++It;
@@ -7160,7 +7364,7 @@ MachineBasicBlock *
 ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI,
                                        MachineBasicBlock *MBB) const {
   const TargetMachine &TM = getTargetMachine();
-  const TargetInstrInfo &TII = *TM.getInstrInfo();
+  const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo();
   DebugLoc DL = MI->getDebugLoc();
 
   assert(Subtarget->isTargetWindows() &&
@@ -7216,8 +7420,7 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI,
 
   AddDefaultCC(AddDefaultPred(BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr),
                                       ARM::SP)
-                              .addReg(ARM::SP, RegState::Define)
-                              .addReg(ARM::R4, RegState::Kill)));
+                              .addReg(ARM::SP).addReg(ARM::R4)));
 
   MI->eraseFromParent();
   return MBB;
@@ -7226,7 +7429,8 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI,
 MachineBasicBlock *
 ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
                                                MachineBasicBlock *BB) const {
-  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  const TargetInstrInfo *TII =
+      getTargetMachine().getSubtargetImpl()->getInstrInfo();
   DebugLoc dl = MI->getDebugLoc();
   bool isThumb2 = Subtarget->isThumb2();
   switch (MI->getOpcode()) {
@@ -7479,12 +7683,6 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
 
 void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
                                                       SDNode *Node) const {
-  if (!MI->hasPostISelHook()) {
-    assert(!convertAddSubFlagsOpcode(MI->getOpcode()) &&
-           "Pseudo flag-setting opcodes must be marked with 'hasPostISelHook'");
-    return;
-  }
-
   const MCInstrDesc *MCID = &MI->getDesc();
   // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
   // RSC. Coming out of isel, they have an implicit CPSR def, but the optional
@@ -7496,8 +7694,8 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
   // Rename pseudo opcodes.
   unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode());
   if (NewOpc) {
-    const ARMBaseInstrInfo *TII =
-      static_cast<const ARMBaseInstrInfo*>(getTargetMachine().getInstrInfo());
+    const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
+        getTargetMachine().getSubtargetImpl()->getInstrInfo());
     MCID = &TII->get(NewOpc);
 
     assert(MCID->getNumOperands() == MI->getDesc().getNumOperands() + 1 &&
@@ -8398,10 +8596,11 @@ static SDValue PerformBFICombine(SDNode *N,
 /// PerformVMOVRRDCombine - Target-specific dag combine xforms for
 /// ARMISD::VMOVRRD.
 static SDValue PerformVMOVRRDCombine(SDNode *N,
-                                     TargetLowering::DAGCombinerInfo &DCI) {
+                                     TargetLowering::DAGCombinerInfo &DCI,
+                                     const ARMSubtarget *Subtarget) {
   // vmovrrd(vmovdrr x, y) -> x,y
   SDValue InDouble = N->getOperand(0);
-  if (InDouble.getOpcode() == ARMISD::VMOVDRR)
+  if (InDouble.getOpcode() == ARMISD::VMOVDRR && !Subtarget->isFPOnlySP())
     return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
 
   // vmovrrd(load f64) -> (load i32), (load i32)
@@ -8432,8 +8631,6 @@ static SDValue PerformVMOVRRDCombine(SDNode *N,
     if (DCI.DAG.getTargetLoweringInfo().isBigEndian())
       std::swap (NewLD1, NewLD2);
     SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
-    DCI.RemoveFromWorklist(LD);
-    DAG.DeleteNode(LD);
     return Result;
   }
 
@@ -8596,7 +8793,7 @@ static SDValue PerformSTORECombine(SDNode *N,
   return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
                       St->getPointerInfo(), St->isVolatile(),
                       St->isNonTemporal(), St->getAlignment(),
-                      St->getTBAAInfo());
+                      St->getAAInfo());
 }
 
 /// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
@@ -8616,7 +8813,8 @@ static bool hasNormalLoadOperand(SDNode *N) {
 /// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
 /// ISD::BUILD_VECTOR.
 static SDValue PerformBUILD_VECTORCombine(SDNode *N,
-                                          TargetLowering::DAGCombinerInfo &DCI){
+                                          TargetLowering::DAGCombinerInfo &DCI,
+                                          const ARMSubtarget *Subtarget) {
   // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
   // VMOVRRD is introduced when legalizing i64 types.  It forces the i64 value
   // into a pair of GPRs, which is fine when the value is used as a scalar,
@@ -8922,7 +9120,7 @@ static SDValue CombineBaseUpdate(SDNode *N,
       Tys[n] = VecTy;
     Tys[n++] = MVT::i32;
     Tys[n] = MVT::Other;
-    SDVTList SDTys = DAG.getVTList(ArrayRef<EVT>(Tys, NumResultVecs+2));
+    SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs+2));
     SmallVector<SDValue, 8> Ops;
     Ops.push_back(N->getOperand(0)); // incoming chain
     Ops.push_back(N->getOperand(AddrOpIdx));
@@ -9001,7 +9199,7 @@ static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
   for (n = 0; n < NumVecs; ++n)
     Tys[n] = VT;
   Tys[n] = MVT::Other;
-  SDVTList SDTys = DAG.getVTList(ArrayRef<EVT>(Tys, NumVecs+1));
+  SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumVecs+1));
   SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
   MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
   SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys,
@@ -9631,10 +9829,10 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::XOR:        return PerformXORCombine(N, DCI, Subtarget);
   case ISD::AND:        return PerformANDCombine(N, DCI, Subtarget);
   case ARMISD::BFI:     return PerformBFICombine(N, DCI);
-  case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
+  case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget);
   case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
   case ISD::STORE:      return PerformSTORECombine(N, DCI);
-  case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI);
+  case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget);
   case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
   case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
   case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
@@ -9686,8 +9884,10 @@ bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc,
   return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
 }
 
-bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, unsigned,
-                                                      bool *Fast) const {
+bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
+                                                       unsigned,
+                                                       unsigned,
+                                                       bool *Fast) const {
   // The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus
   bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
 
@@ -9741,11 +9941,12 @@ EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size,
     bool Fast;
     if (Size >= 16 &&
         (memOpAlign(SrcAlign, DstAlign, 16) ||
-         (allowsUnalignedMemoryAccesses(MVT::v2f64, 0, &Fast) && Fast))) {
+         (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, 1, &Fast) && Fast))) {
       return MVT::v2f64;
     } else if (Size >= 8 &&
                (memOpAlign(SrcAlign, DstAlign, 8) ||
-                (allowsUnalignedMemoryAccesses(MVT::f64, 0, &Fast) && Fast))) {
+                (allowsMisalignedMemoryAccesses(MVT::f64, 0, 1, &Fast) &&
+                 Fast))) {
       return MVT::f64;
     }
   }
@@ -10348,6 +10549,8 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
         return RCPair(0U, &ARM::hGPRRegClass);
       break;
     case 'r':
+      if (Subtarget->isThumb1Only())
+        return RCPair(0U, &ARM::tGPRRegClass);
       return RCPair(0U, &ARM::GPRRegClass);
     case 'w':
       if (VT == MVT::Other)
@@ -10552,7 +10755,7 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
   assert(Subtarget->isTargetAEABI() && "Register-based DivRem lowering only");
   unsigned Opcode = Op->getOpcode();
   assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
-      "Invalid opcode for Div/Rem lowering");
+         "Invalid opcode for Div/Rem lowering");
   bool isSigned = (Opcode == ISD::SDIVREM);
   EVT VT = Op->getValueType(0);
   Type *Ty = VT.getTypeForEVT(*DAG.getContext());
@@ -10560,10 +10763,10 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
   RTLIB::Libcall LC;
   switch (VT.getSimpleVT().SimpleTy) {
   default: llvm_unreachable("Unexpected request for libcall!");
-  case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
-  case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
-  case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
-  case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
+  case MVT::i8:  LC = isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
+  case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
+  case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
+  case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
   }
 
   SDValue InChain = DAG.getEntryNode();
@@ -10583,7 +10786,7 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
   SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
                                          getPointerTy());
 
-  Type *RetTy = (Type*)StructType::get(Ty, Ty, NULL);
+  Type *RetTy = (Type*)StructType::get(Ty, Ty, nullptr);
 
   SDLoc dl(Op);
   TargetLowering::CallLoweringInfo CLI(DAG);
@@ -10611,7 +10814,7 @@ ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
   Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Flag);
   Flag = Chain.getValue(1);
 
-  SDVTList NodeTys = DAG.getVTList(MVT::i32, MVT::Glue);
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
   Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Flag);
 
   SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
@@ -10621,6 +10824,31 @@ ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
   return DAG.getMergeValues(Ops, DL);
 }
 
+SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
+  assert(Op.getValueType() == MVT::f64 && Subtarget->isFPOnlySP() &&
+         "Unexpected type for custom-lowering FP_EXTEND");
+
+  RTLIB::Libcall LC;
+  LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
+
+  SDValue SrcVal = Op.getOperand(0);
+  return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
+                     /*isSigned*/ false, SDLoc(Op)).first;
+}
+
+SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
+  assert(Op.getOperand(0).getValueType() == MVT::f64 &&
+         Subtarget->isFPOnlySP() &&
+         "Unexpected type for custom-lowering FP_ROUND");
+
+  RTLIB::Libcall LC;
+  LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());
+
+  SDValue SrcVal = Op.getOperand(0);
+  return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
+                     /*isSigned*/ false, SDLoc(Op)).first;
+}
+
 bool
 ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
   // The ARM target isn't yet aware of offsets.
@@ -10648,7 +10876,7 @@ bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
     return false;
   if (VT == MVT::f32)
     return ARM_AM::getFP32Imm(Imm) != -1;
-  if (VT == MVT::f64)
+  if (VT == MVT::f64 && !Subtarget->isFPOnlySP())
     return ARM_AM::getFP64Imm(Imm) != -1;
   return false;
 }
@@ -10775,31 +11003,154 @@ bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
   return true;
 }
 
-bool ARMTargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const {
-  // Loads and stores less than 64-bits are already atomic; ones above that
-  // are doomed anyway, so defer to the default libcall and blame the OS when
-  // things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
-  // anything for those.
-  bool IsMClass = Subtarget->isMClass();
-  if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
-    unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
-    return Size == 64 && !IsMClass;
-  } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
-    return LI->getType()->getPrimitiveSizeInBits() == 64 && !IsMClass;
+bool ARMTargetLowering::hasLoadLinkedStoreConditional() const { return true; }
+
+Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder,
+                                        ARM_MB::MemBOpt Domain) const {
+  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+
+  // First, if the target has no DMB, see what fallback we can use.
+  if (!Subtarget->hasDataBarrier()) {
+    // Some ARMv6 cpus can support data barriers with an mcr instruction.
+    // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
+    // here.
+    if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) {
+      Function *MCR = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_mcr);
+      Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0),
+                        Builder.getInt32(0), Builder.getInt32(7),
+                        Builder.getInt32(10), Builder.getInt32(5)};
+      return Builder.CreateCall(MCR, args);
+    } else {
+      // Instead of using barriers, atomic accesses on these subtargets use
+      // libcalls.
+      llvm_unreachable("makeDMB on a target so old that it has no barriers");
+    }
+  } else {
+    Function *DMB = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_dmb);
+    // Only a full system barrier exists in the M-class architectures.
+    Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain;
+    Constant *CDomain = Builder.getInt32(Domain);
+    return Builder.CreateCall(DMB, CDomain);
+  }
+}
+
+// Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
+Instruction* ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
+                                         AtomicOrdering Ord, bool IsStore,
+                                         bool IsLoad) const {
+  if (!getInsertFencesForAtomic())
+    return nullptr;
+
+  switch (Ord) {
+  case NotAtomic:
+  case Unordered:
+    llvm_unreachable("Invalid fence: unordered/non-atomic");
+  case Monotonic:
+  case Acquire:
+    return nullptr; // Nothing to do
+  case SequentiallyConsistent:
+    if (!IsStore)
+      return nullptr; // Nothing to do
+    /*FALLTHROUGH*/
+  case Release:
+  case AcquireRelease:
+    if (Subtarget->isSwift())
+      return makeDMB(Builder, ARM_MB::ISHST);
+    // FIXME: add a comment with a link to documentation justifying this.
+    else
+      return makeDMB(Builder, ARM_MB::ISH);
+  }
+  llvm_unreachable("Unknown fence ordering in emitLeadingFence");
+}
+
+Instruction* ARMTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
+                                          AtomicOrdering Ord, bool IsStore,
+                                          bool IsLoad) const {
+  if (!getInsertFencesForAtomic())
+    return nullptr;
+
+  switch (Ord) {
+  case NotAtomic:
+  case Unordered:
+    llvm_unreachable("Invalid fence: unordered/not-atomic");
+  case Monotonic:
+  case Release:
+    return nullptr; // Nothing to do
+  case Acquire:
+  case AcquireRelease:
+  case SequentiallyConsistent:
+    return makeDMB(Builder, ARM_MB::ISH);
   }
+  llvm_unreachable("Unknown fence ordering in emitTrailingFence");
+}
+
+// Loads and stores less than 64-bits are already atomic; ones above that
+// are doomed anyway, so defer to the default libcall and blame the OS when
+// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
+// anything for those.
+bool ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
+  unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
+  return (Size == 64) && !Subtarget->isMClass();
+}
+
+// Loads and stores less than 64-bits are already atomic; ones above that
+// are doomed anyway, so defer to the default libcall and blame the OS when
+// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
+// anything for those.
+// FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that
+// guarantee, see DDI0406C ARM architecture reference manual,
+// sections A8.8.72-74 LDRD)
+bool ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
+  unsigned Size = LI->getType()->getPrimitiveSizeInBits();
+  return (Size == 64) && !Subtarget->isMClass();
+}
+
+// For the real atomic operations, we have ldrex/strex up to 32 bits,
+// and up to 64 bits on the non-M profiles
+bool ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+  unsigned Size = AI->getType()->getPrimitiveSizeInBits();
+  return Size <= (Subtarget->isMClass() ? 32U : 64U);
+}
+
+// This has so far only been implemented for MachO.
+bool ARMTargetLowering::useLoadStackGuardNode() const {
+  return Subtarget->getTargetTriple().getObjectFormat() == Triple::MachO;
+}
+
+bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
+                                                  unsigned &Cost) const {
+  // If we do not have NEON, vector types are not natively supported.
+  if (!Subtarget->hasNEON())
+    return false;
 
-  // For the real atomic operations, we have ldrex/strex up to 32 bits,
-  // and up to 64 bits on the non-M profiles
-  unsigned AtomicLimit = IsMClass ? 32 : 64;
-  return Inst->getType()->getPrimitiveSizeInBits() <= AtomicLimit;
+  // Floating point values and vector values map to the same register file.
+  // Therefore, althought we could do a store extract of a vector type, this is
+  // better to leave at float as we have more freedom in the addressing mode for
+  // those.
+  if (VectorTy->isFPOrFPVectorTy())
+    return false;
+
+  // If the index is unknown at compile time, this is very expensive to lower
+  // and it is not possible to combine the store with the extract.
+  if (!isa<ConstantInt>(Idx))
+    return false;
+
+  assert(VectorTy->isVectorTy() && "VectorTy is not a vector type");
+  unsigned BitWidth = cast<VectorType>(VectorTy)->getBitWidth();
+  // We can do a store + vector extract on any vector that fits perfectly in a D
+  // or Q register.
+  if (BitWidth == 64 || BitWidth == 128) {
+    Cost = 0;
+    return true;
+  }
+  return false;
 }
 
 Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
                                          AtomicOrdering Ord) const {
   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
   Type *ValTy = cast<PointerType>(Addr->getType())->getElementType();
-  bool IsAcquire =
-      Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent;
+  bool IsAcquire = isAtLeastAcquire(Ord);
 
   // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd
   // intrinsic must return {i32, i32} and we have to recombine them into a
@@ -10835,8 +11186,7 @@ Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val,
                                                Value *Addr,
                                                AtomicOrdering Ord) const {
   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
-  bool IsRelease =
-      Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent;
+  bool IsRelease = isAtLeastRelease(Ord);
 
   // Since the intrinsics must have legal type, the i64 intrinsics take two
   // parameters: "i32, i32". We must marshal Val into the appropriate form
@@ -10934,6 +11284,6 @@ bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters(
   HABaseType Base = HA_UNKNOWN;
   uint64_t Members = 0;
   bool result = isHomogeneousAggregate(Ty, Base, Members);
-  DEBUG(dbgs() << "isHA: " << result << " "; Ty->dump(); dbgs() << "\n");
+  DEBUG(dbgs() << "isHA: " << result << " "; Ty->dump());
   return result;
 }
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 1ace0f3..89b0c31 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef ARMISELLOWERING_H
-#define ARMISELLOWERING_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMISELLOWERING_H
+#define LLVM_LIB_TARGET_ARM_ARMISELLOWERING_H
 
 #include "MCTargetDesc/ARMBaseInfo.h"
 #include "llvm/CodeGen/CallingConvLower.h"
@@ -232,7 +232,7 @@ namespace llvm {
 
   class ARMTargetLowering : public TargetLowering {
   public:
-    explicit ARMTargetLowering(TargetMachine &TM);
+    explicit ARMTargetLowering(const TargetMachine &TM);
 
     unsigned getJumpTableEncoding() const override;
 
@@ -266,11 +266,12 @@ namespace llvm {
 
     bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override;
 
-    /// allowsUnalignedMemoryAccesses - Returns true if the target allows
+    /// allowsMisalignedMemoryAccesses - Returns true if the target allows
     /// unaligned memory accesses of the specified type. Returns whether it
     /// is "fast" by reference in the second argument.
-    bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
-                                       bool *Fast) const override;
+    bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
+                                        unsigned Align,
+                                        bool *Fast) const override;
 
     EVT getOptimalMemOpType(uint64_t Size,
                             unsigned DstAlign, unsigned SrcAlign,
@@ -391,12 +392,26 @@ namespace llvm {
     bool functionArgumentNeedsConsecutiveRegisters(
         Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override;
 
+    bool hasLoadLinkedStoreConditional() const override;
+    Instruction *makeDMB(IRBuilder<> &Builder, ARM_MB::MemBOpt Domain) const;
     Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
                           AtomicOrdering Ord) const override;
     Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
                                 Value *Addr, AtomicOrdering Ord) const override;
 
-    bool shouldExpandAtomicInIR(Instruction *Inst) const override;
+    Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
+                          bool IsStore, bool IsLoad) const override;
+    Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
+                           bool IsStore, bool IsLoad) const override;
+
+    bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
+    bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
+    bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+
+    bool useLoadStackGuardNode() const override;
+
+    bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
+                                   unsigned &Cost) const override;
 
   protected:
     std::pair<const TargetRegisterClass*, uint8_t>
@@ -473,6 +488,10 @@ namespace llvm {
     SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
 
     unsigned getRegisterByName(const char* RegName, EVT VT) const override;
 
@@ -562,6 +581,9 @@ namespace llvm {
 
     bool mayBeEmittedAsTailCall(CallInst *CI) const override;
 
+    SDValue getCMOV(SDLoc dl, EVT VT, SDValue FalseVal, SDValue TrueVal,
+                    SDValue ARMcc, SDValue CCR, SDValue Cmp,
+                    SelectionDAG &DAG) const;
     SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
                       SDValue &ARMcc, SelectionDAG &DAG, SDLoc dl) const;
     SDValue getVFPCmp(SDValue LHS, SDValue RHS,
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 59e9260..7d27cf3 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -203,6 +203,16 @@ def msr_mask : Operand<i32> {
   let ParserMatchClass = MSRMaskOperand;
 }
 
+def BankedRegOperand : AsmOperandClass {
+  let Name = "BankedReg";
+  let ParserMethod = "parseBankedRegOperand";
+}
+def banked_reg : Operand<i32> {
+  let PrintMethod = "printBankedRegOperand";
+  let DecoderMethod = "DecodeBankedReg";
+  let ParserMatchClass = BankedRegOperand;
+}
+
 // Shift Right Immediate - A shift right immediate is encoded differently from
 // other shift immediates. The imm6 field is encoded like so:
 //
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index f235ac2..17d1ffa 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -90,6 +90,49 @@ unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const {
   return 0;
 }
 
+void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI,
+                                        Reloc::Model RM) const {
+  MachineFunction &MF = *MI->getParent()->getParent();
+  const ARMSubtarget &Subtarget = MF.getTarget().getSubtarget<ARMSubtarget>();
+
+  if (!Subtarget.useMovt(MF)) {
+    if (RM == Reloc::PIC_)
+      expandLoadStackGuardBase(MI, ARM::LDRLIT_ga_pcrel, ARM::LDRi12, RM);
+    else
+      expandLoadStackGuardBase(MI, ARM::LDRLIT_ga_abs, ARM::LDRi12, RM);
+    return;
+  }
+
+  if (RM != Reloc::PIC_) {
+    expandLoadStackGuardBase(MI, ARM::MOVi32imm, ARM::LDRi12, RM);
+    return;
+  }
+
+  const GlobalValue *GV =
+      cast<GlobalValue>((*MI->memoperands_begin())->getValue());
+
+  if (!Subtarget.GVIsIndirectSymbol(GV, RM)) {
+    expandLoadStackGuardBase(MI, ARM::MOV_ga_pcrel, ARM::LDRi12, RM);
+    return;
+  }
+
+  MachineBasicBlock &MBB = *MI->getParent();
+  DebugLoc DL = MI->getDebugLoc();
+  unsigned Reg = MI->getOperand(0).getReg();
+  MachineInstrBuilder MIB;
+
+  MIB = BuildMI(MBB, MI, DL, get(ARM::MOV_ga_pcrel_ldr), Reg)
+            .addGlobalAddress(GV, 0, ARMII::MO_NONLAZY);
+  unsigned Flag = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant;
+  MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
+      MachinePointerInfo::getGOT(), Flag, 4, 4);
+  MIB.addMemOperand(MMO);
+  MIB = BuildMI(MBB, MI, DL, get(ARM::LDRi12), Reg);
+  MIB.addReg(Reg, RegState::Kill).addImm(0);
+  MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+  AddDefaultPred(MIB);
+}
+
 namespace {
   /// ARMCGBR - Create Global Base Reg pass. This initializes the PIC
   /// global base register for ARM ELF.
@@ -113,8 +156,9 @@ namespace {
       ARMConstantPoolValue *CPV = ARMConstantPoolSymbol::Create(
           *Context, "_GLOBAL_OFFSET_TABLE_", ARMPCLabelIndex, PCAdj);
 
-      unsigned Align = TM->getDataLayout()
-          ->getPrefTypeAlignment(Type::getInt32PtrTy(*Context));
+      unsigned Align =
+          TM->getSubtargetImpl()->getDataLayout()->getPrefTypeAlignment(
+              Type::getInt32PtrTy(*Context));
       unsigned Idx = MF.getConstantPool()->getConstantPoolIndex(CPV, Align);
 
       MachineBasicBlock &FirstMBB = MF.front();
@@ -124,7 +168,7 @@ namespace {
           MF.getRegInfo().createVirtualRegister(&ARM::rGPRRegClass);
       unsigned Opc = TM->getSubtarget<ARMSubtarget>().isThumb2() ?
                      ARM::t2LDRpci : ARM::LDRcp;
-      const TargetInstrInfo &TII = *TM->getInstrInfo();
+      const TargetInstrInfo &TII = *TM->getSubtargetImpl()->getInstrInfo();
       MachineInstrBuilder MIB = BuildMI(FirstMBB, MBBI, DL,
                                         TII.get(Opc), TempReg)
                                 .addConstantPoolIndex(Idx);
diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h
index b09958a..90f34ea 100644
--- a/lib/Target/ARM/ARMInstrInfo.h
+++ b/lib/Target/ARM/ARMInstrInfo.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef ARMINSTRUCTIONINFO_H
-#define ARMINSTRUCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMINSTRINFO_H
+#define LLVM_LIB_TARGET_ARM_ARMINSTRINFO_H
 
 #include "ARMBaseInstrInfo.h"
 #include "ARMRegisterInfo.h"
@@ -37,6 +37,10 @@ public:
   /// always be able to get register info as well (through this method).
   ///
   const ARMRegisterInfo &getRegisterInfo() const override { return RI; }
+
+private:
+  void expandLoadStackGuard(MachineBasicBlock::iterator MI,
+                            Reloc::Model RM) const override;
 };
 
 }
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 2bb8976..3177114 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -241,6 +241,9 @@ def HasDB            : Predicate<"Subtarget->hasDataBarrier()">,
 def HasMP            : Predicate<"Subtarget->hasMPExtension()">,
                                  AssemblerPredicate<"FeatureMP",
                                                     "mp-extensions">;
+def HasVirtualization: Predicate<"false">,
+                                 AssemblerPredicate<"FeatureVirtualization",
+                                                   "virtualization-extensions">;
 def HasTrustZone     : Predicate<"Subtarget->hasTrustZone()">,
                                  AssemblerPredicate<"FeatureTrustZone",
                                                     "TrustZone">;
@@ -633,6 +636,8 @@ def imm32 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 32; }]> {
   let ParserMatchClass = Imm32AsmOperand;
 }
 
+def imm8_or_16 : ImmLeaf<i32, [{ return Imm == 8 || Imm == 16;}]>;
+
 /// imm1_7 predicate - Immediate in the range [1,7].
 def Imm1_7AsmOperand: ImmAsmOperand { let Name = "Imm1_7"; }
 def imm1_7 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 8; }]> {
@@ -1961,7 +1966,7 @@ def SETEND : AXI<(outs), (ins setend_op:$end), MiscFrm, NoItinerary,
 }
 
 def DBG : AI<(outs), (ins imm0_15:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt",
-             []>, Requires<[IsARM, HasV7]> {
+             [(int_arm_dbg imm0_15:$opt)]>, Requires<[IsARM, HasV7]> {
   bits<4> opt;
   let Inst{27-4} = 0b001100100000111100001111;
   let Inst{3-0} = opt;
@@ -2708,7 +2713,8 @@ multiclass AI2_stridx<bit isByte, string opc,
   def _PRE_IMM : AI2ldstidx<0, isByte, 1, (outs GPR:$Rn_wb),
                             (ins GPR:$Rt, addrmode_imm12_pre:$addr), IndexModePre,
                             StFrm, iii,
-                            opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
+                            opc, "\t$Rt, $addr!",
+                            "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> {
     bits<17> addr;
     let Inst{25} = 0;
     let Inst{23}    = addr{12};     // U (add = ('U' == 1))
@@ -2720,7 +2726,8 @@ multiclass AI2_stridx<bit isByte, string opc,
   def _PRE_REG  : AI2ldstidx<0, isByte, 1, (outs GPR:$Rn_wb),
                       (ins GPR:$Rt, ldst_so_reg:$addr),
                       IndexModePre, StFrm, iir,
-                      opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
+                      opc, "\t$Rt, $addr!",
+                      "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> {
     bits<17> addr;
     let Inst{25} = 1;
     let Inst{23}    = addr{12};    // U (add = ('U' == 1))
@@ -2733,7 +2740,7 @@ multiclass AI2_stridx<bit isByte, string opc,
                 (ins GPR:$Rt, addr_offset_none:$addr, am2offset_reg:$offset),
                 IndexModePost, StFrm, iir,
                 opc, "\t$Rt, $addr, $offset",
-                "$addr.base = $Rn_wb", []> {
+                "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> {
      // {12}     isAdd
      // {11-0}   imm12/Rm
      bits<14> offset;
@@ -2751,7 +2758,7 @@ multiclass AI2_stridx<bit isByte, string opc,
                 (ins GPR:$Rt, addr_offset_none:$addr, am2offset_imm:$offset),
                 IndexModePost, StFrm, iii,
                 opc, "\t$Rt, $addr, $offset",
-                "$addr.base = $Rn_wb", []> {
+                "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> {
     // {12}     isAdd
     // {11-0}   imm12/Rm
     bits<14> offset;
@@ -2828,7 +2835,8 @@ def STRH_preidx: ARMPseudoInst<(outs GPR:$Rn_wb),
 def STRH_PRE  : AI3ldstidx<0b1011, 0, 1, (outs GPR:$Rn_wb),
                            (ins GPR:$Rt, addrmode3_pre:$addr), IndexModePre,
                            StMiscFrm, IIC_iStore_bh_ru,
-                           "strh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
+                           "strh", "\t$Rt, $addr!",
+                           "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> {
   bits<14> addr;
   let Inst{23}    = addr{8};      // U bit
   let Inst{22}    = addr{13};     // 1 == imm8, 0 == Rm
@@ -2841,7 +2849,8 @@ def STRH_PRE  : AI3ldstidx<0b1011, 0, 1, (outs GPR:$Rn_wb),
 def STRH_POST : AI3ldstidx<0b1011, 0, 0, (outs GPR:$Rn_wb),
                        (ins GPR:$Rt, addr_offset_none:$addr, am3offset:$offset),
                        IndexModePost, StMiscFrm, IIC_iStore_bh_ru,
-                       "strh", "\t$Rt, $addr, $offset", "$addr.base = $Rn_wb",
+                       "strh", "\t$Rt, $addr, $offset",
+                       "$addr.base = $Rn_wb,@earlyclobber $Rn_wb",
                    [(set GPR:$Rn_wb, (post_truncsti16 GPR:$Rt,
                                                       addr_offset_none:$addr,
                                                       am3offset:$offset))]> {
@@ -3417,7 +3426,8 @@ def : ARMPat<(ARMaddc GPR:$src, imm0_65535_neg:$imm),
 def : ARMPat<(ARMadde GPR:$src, so_imm_not:$imm, CPSR),
              (SBCri   GPR:$src, so_imm_not:$imm)>;
 def : ARMPat<(ARMadde GPR:$src, imm0_65535_neg:$imm, CPSR),
-             (SBCrr   GPR:$src, (MOVi16 (imm_not_XFORM imm:$imm)))>;
+             (SBCrr   GPR:$src, (MOVi16 (imm_not_XFORM imm:$imm)))>,
+             Requires<[IsARM, HasV6T2]>;
 
 // Note: These are implemented in C++ code, because they have to generate
 // ADD/SUBrs instructions, which use a complex pattern that a xform function
@@ -3932,14 +3942,12 @@ multiclass AI_smul<string opc, PatFrag opnode> {
 
   def WB : AMulxyI<0b0001001, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
               IIC_iMUL16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm",
-              [(set GPR:$Rd, (sra (opnode GPR:$Rn,
-                                    (sext_inreg GPR:$Rm, i16)), (i32 16)))]>,
+              []>,
            Requires<[IsARM, HasV5TE]>;
 
   def WT : AMulxyI<0b0001001, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
               IIC_iMUL16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm",
-              [(set GPR:$Rd, (sra (opnode GPR:$Rn,
-                                    (sra GPR:$Rm, (i32 16))), (i32 16)))]>,
+              []>,
             Requires<[IsARM, HasV5TE]>;
 }
 
@@ -3981,17 +3989,13 @@ multiclass AI_smla<string opc, PatFrag opnode> {
   def WB : AMulxyIa<0b0001001, 0b00, (outs GPRnopc:$Rd),
               (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
               IIC_iMAC16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra",
-              [(set GPRnopc:$Rd,
-                    (add GPR:$Ra, (sra (opnode GPRnopc:$Rn,
-                                  (sext_inreg GPRnopc:$Rm, i16)), (i32 16))))]>,
+              []>,
            Requires<[IsARM, HasV5TE, UseMulOps]>;
 
   def WT : AMulxyIa<0b0001001, 0b10, (outs GPRnopc:$Rd),
               (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
               IIC_iMAC16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
-              [(set GPRnopc:$Rd,
-                 (add GPR:$Ra, (sra (opnode GPRnopc:$Rn,
-                                    (sra GPRnopc:$Rm, (i32 16))), (i32 16))))]>,
+              []>,
             Requires<[IsARM, HasV5TE, UseMulOps]>;
   }
 }
@@ -4111,7 +4115,7 @@ def UDIV : ADivA1I<0b011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV,
 //  Misc. Arithmetic Instructions.
 //
 
-def CLZ  : AMiscA1I<0b000010110, 0b0001, (outs GPR:$Rd), (ins GPR:$Rm),
+def CLZ  : AMiscA1I<0b00010110, 0b0001, (outs GPR:$Rd), (ins GPR:$Rm),
               IIC_iUNAr, "clz", "\t$Rd, $Rm",
               [(set GPR:$Rd, (ctlz GPR:$Rm))]>, Requires<[IsARM, HasV5T]>,
            Sched<[WriteALU]>;
@@ -4629,7 +4633,7 @@ def : ARMPat<(stlex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr),
 class acquiring_load<PatFrag base>
   : PatFrag<(ops node:$ptr), (base node:$ptr), [{
   AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
-  return Ordering == Acquire || Ordering == SequentiallyConsistent;
+  return isAtLeastAcquire(Ordering);
 }]>;
 
 def atomic_load_acquire_8  : acquiring_load<atomic_load_8>;
@@ -4639,7 +4643,7 @@ def atomic_load_acquire_32 : acquiring_load<atomic_load_32>;
 class releasing_store<PatFrag base>
   : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
   AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
-  return Ordering == Release || Ordering == SequentiallyConsistent;
+  return isAtLeastRelease(Ordering);
 }]>;
 
 def atomic_store_release_8  : releasing_store<atomic_store_8>;
@@ -5060,12 +5064,31 @@ def MRSsys : ABI<0b0001, (outs GPRnopc:$Rd), (ins), NoItinerary,
   let Unpredictable{11-0} = 0b110100001111;
 }
 
+// However, the MRS (banked register) system instruction (ARMv7VE) *does* have a
+// separate encoding (distinguished by bit 5.
+def MRSbanked : ABI<0b0001, (outs GPRnopc:$Rd), (ins banked_reg:$banked),
+                    NoItinerary, "mrs", "\t$Rd, $banked", []>,
+                Requires<[IsARM, HasVirtualization]> {
+  bits<6> banked;
+  bits<4> Rd;
+
+  let Inst{23} = 0;
+  let Inst{22} = banked{5}; // R bit
+  let Inst{21-20} = 0b10;
+  let Inst{19-16} = banked{3-0};
+  let Inst{15-12} = Rd;
+  let Inst{11-9} = 0b001;
+  let Inst{8} = banked{4};
+  let Inst{7-0} = 0b00000000;
+}
+
 // Move from ARM core register to Special Register
 //
-// No need to have both system and application versions, the encodings are the
-// same and the assembly parser has no way to distinguish between them. The mask
-// operand contains the special register (R Bit) in bit 4 and bits 3-0 contains
-// the mask with the fields to be accessed in the special register.
+// No need to have both system and application versions of MSR (immediate) or
+// MSR (register), the encodings are the same and the assembly parser has no way
+// to distinguish between them. The mask operand contains the special register
+// (R Bit) in bit 4 and bits 3-0 contains the mask with the fields to be
+// accessed in the special register.
 def MSR : ABI<0b0001, (outs), (ins msr_mask:$mask, GPR:$Rn), NoItinerary,
               "msr", "\t$mask, $Rn", []> {
   bits<5> mask;
@@ -5093,6 +5116,25 @@ def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask,  so_imm:$a), NoItinerary,
   let Inst{11-0} = a;
 }
 
+// However, the MSR (banked register) system instruction (ARMv7VE) *does* have a
+// separate encoding (distinguished by bit 5.
+def MSRbanked : ABI<0b0001, (outs), (ins banked_reg:$banked, GPRnopc:$Rn),
+                    NoItinerary, "msr", "\t$banked, $Rn", []>,
+                Requires<[IsARM, HasVirtualization]> {
+  bits<6> banked;
+  bits<4> Rn;
+
+  let Inst{23} = 0;
+  let Inst{22} = banked{5}; // R bit
+  let Inst{21-20} = 0b10;
+  let Inst{19-16} = banked{3-0};
+  let Inst{15-12} = 0b1111;
+  let Inst{11-9} = 0b001;
+  let Inst{8} = banked{4};
+  let Inst{7-4} = 0b0000;
+  let Inst{3-0} = Rn;
+}
+
 // Dynamic stack allocation yields a _chkstk for Windows targets.  These calls
 // are needed to probe the stack when allocating more than
 // 4k bytes in one go. Touching the stack at 4K increments is necessary to
@@ -5278,11 +5320,6 @@ def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)),
                  (SMULTB GPR:$a, GPR:$b)>;
 def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), sext_16_node:$b),
                 (SMULTB GPR:$a, GPR:$b)>;
-def : ARMV5TEPat<(sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))),
-                      (i32 16)),
-                 (SMULWB GPR:$a, GPR:$b)>;
-def : ARMV5TEPat<(sra (mul GPR:$a, sext_16_node:$b), (i32 16)),
-                 (SMULWB GPR:$a, GPR:$b)>;
 
 def : ARMV5MOPat<(add GPR:$acc,
                       (mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
@@ -5305,13 +5342,6 @@ def : ARMV5MOPat<(add GPR:$acc,
 def : ARMV5MOPat<(add GPR:$acc,
                       (mul (sra GPR:$a, (i32 16)), sext_16_node:$b)),
                  (SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5MOPat<(add GPR:$acc,
-                      (sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))),
-                           (i32 16))),
-                 (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5MOPat<(add GPR:$acc,
-                      (sra (mul GPR:$a, sext_16_node:$b), (i32 16))),
-                 (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
 
 
 // Pre-v7 uses MCR for synchronization barriers.
@@ -5591,3 +5621,8 @@ def : InstAlias<"umull${s}${p} $RdLo, $RdHi, $Rn, $Rm",
 // is discarded.
 def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>,
          ComplexDeprecationPredicate<"IT">;
+
+let mayLoad = 1, mayStore =1, hasSideEffects = 1 in
+def SPACE : PseudoInst<(outs GPR:$Rd), (ins i32imm:$size, GPR:$Rn),
+                       NoItinerary,
+                       [(set GPR:$Rd, (int_arm_space imm:$size, GPR:$Rn))]>;
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index c02bb3b..a0c627c 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -34,6 +34,14 @@ def nImmSplatI32 : Operand<i32> {
   let PrintMethod = "printNEONModImmOperand";
   let ParserMatchClass = nImmSplatI32AsmOperand;
 }
+def nImmSplatNotI16AsmOperand : AsmOperandClass { let Name = "NEONi16splatNot"; }
+def nImmSplatNotI16 : Operand<i32> {
+  let ParserMatchClass = nImmSplatNotI16AsmOperand;
+}
+def nImmSplatNotI32AsmOperand : AsmOperandClass { let Name = "NEONi32splatNot"; }
+def nImmSplatNotI32 : Operand<i32> {
+  let ParserMatchClass = nImmSplatNotI32AsmOperand;
+}
 def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; }
 def nImmVMOVI32 : Operand<i32> {
   let PrintMethod = "printNEONModImmOperand";
@@ -4376,7 +4384,7 @@ defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>;
 //   VQDMLSL  : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
 defm VQDMLSL  : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
                             "vqdmlsl", "s", null_frag>;
-defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", null_frag>;
+defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b0111, "vqdmlsl", "s", null_frag>;
 
 def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1),
                      (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
@@ -5429,7 +5437,7 @@ def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
                           IIC_VMOVSI, "vmov", "32", "$R, $V$lane",
                           [(set GPR:$R, (extractelt (v2i32 DPR:$V),
                                            imm:$lane))]>,
-                Requires<[HasNEON, HasFastVGETLNi32]> {
+                Requires<[HasVFP2, HasFastVGETLNi32]> {
   let Inst{21} = lane{0};
 }
 // def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
@@ -5497,8 +5505,12 @@ def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V),
                           (ins DPR:$src1, GPR:$R, VectorIndex32:$lane),
                           IIC_VMOVISL, "vmov", "32", "$V$lane, $R",
                           [(set DPR:$V, (insertelt (v2i32 DPR:$src1),
-                                           GPR:$R, imm:$lane))]> {
+                                           GPR:$R, imm:$lane))]>,
+                Requires<[HasVFP2]> {
   let Inst{21} = lane{0};
+  // This instruction is equivalent as
+  // $V = INSERT_SUBREG $src1, $R, translateImmToSubIdx($imm)
+  let isInsertSubreg = 1;
 }
 }
 def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
@@ -6635,6 +6647,16 @@ defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
                          (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
 defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
                          (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+// ... immediates
+def : NEONInstAlias<"vand${p}.i16 $Vd, $imm",
+                    (VBICiv4i16 DPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>;
+def : NEONInstAlias<"vand${p}.i32 $Vd, $imm",
+                    (VBICiv2i32 DPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>;
+def : NEONInstAlias<"vand${p}.i16 $Vd, $imm",
+                    (VBICiv8i16 QPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>;
+def : NEONInstAlias<"vand${p}.i32 $Vd, $imm",
+                    (VBICiv4i32 QPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>;
+
 
 // VLD1 single-lane pseudo-instructions. These need special handling for
 // the lane index that an InstAlias can't handle, so we use these instead.
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index e17f73a..a867844 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -311,7 +311,7 @@ def tHLT : T1I<(outs), (ins imm0_63:$val), NoItinerary, "hlt\t$val",
 }
 
 def tSETEND : T1I<(outs), (ins setend_op:$end), NoItinerary, "setend\t$end",
-                  []>, T1Encoding<0b101101>, Deprecated<HasV8Ops> {
+                  []>, T1Encoding<0b101101>, Requires<[IsNotMClass]>, Deprecated<HasV8Ops> {
   bits<1> end;
   // A8.6.156
   let Inst{9-5} = 0b10010;
@@ -360,6 +360,14 @@ def tADDrSPi : T1pI<(outs tGPR:$dst), (ins GPRsp:$sp, t_imm0_1020s4:$imm),
   let DecoderMethod = "DecodeThumbAddSpecialReg";
 }
 
+// Thumb1 frame lowering is rather fragile, we hope to be able to use
+// tADDrSPi, but we may need to insert a sequence that clobbers CPSR.
+def tADDframe : PseudoInst<(outs tGPR:$dst), (ins i32imm:$base, i32imm:$offset),
+                           NoItinerary, []>,
+                Requires<[IsThumb, IsThumb1Only]> {
+  let Defs = [CPSR];
+}
+
 // ADD sp, sp, #<imm7>
 def tADDspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm),
                      IIC_iALUi, "add", "\t$Rdn, $imm", []>,
@@ -466,7 +474,7 @@ let isCall = 1,
                  (outs), (ins pred:$p, t_blxtarget:$func), IIC_Br,
                    "blx${p}\t$func",
                    [(ARMcall tglobaladdr:$func)]>,
-              Requires<[IsThumb, HasV5T]>, Sched<[WriteBrL]> {
+              Requires<[IsThumb, HasV5T, IsNotMClass]>, Sched<[WriteBrL]> {
     bits<24> func;
     let Inst{26} = func{23};
     let Inst{25-16} = func{20-11};
@@ -1355,7 +1363,7 @@ def : T1Pat<(ARMtcall texternalsym:$func), (tBL texternalsym:$func)>,
       Requires<[IsThumb]>;
 
 def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi texternalsym:$func)>,
-      Requires<[IsThumb, HasV5T]>;
+      Requires<[IsThumb, HasV5T, IsNotMClass]>;
 
 // Indirect calls to ARM routines
 def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr GPR:$dst)>,
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 85e9351..807c252 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -1262,15 +1262,15 @@ defm t2LDR   : T2I_ld<0, 0b10, "ldr", IIC_iLoad_i, IIC_iLoad_si, GPR,
 
 // Loads with zero extension
 defm t2LDRH  : T2I_ld<0, 0b01, "ldrh", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
-                      GPR, UnOpFrag<(zextloadi16 node:$Src)>>;
+                      GPRnopc, UnOpFrag<(zextloadi16 node:$Src)>>;
 defm t2LDRB  : T2I_ld<0, 0b00, "ldrb", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
-                      GPR, UnOpFrag<(zextloadi8  node:$Src)>>;
+                      GPRnopc, UnOpFrag<(zextloadi8  node:$Src)>>;
 
 // Loads with sign extension
 defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
-                      GPR, UnOpFrag<(sextloadi16 node:$Src)>>;
+                      GPRnopc, UnOpFrag<(sextloadi16 node:$Src)>>;
 defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
-                      GPR, UnOpFrag<(sextloadi8  node:$Src)>>;
+                      GPRnopc, UnOpFrag<(sextloadi8  node:$Src)>>;
 
 let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
 // Load doubleword
@@ -1973,6 +1973,16 @@ def t2SXTAH : T2I_exta_rrot<0b000, "sxtah",
                         BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
 def t2SXTAB16 : T2I_exta_rrot_np<0b010, "sxtab16">;
 
+// A simple right-shift can also be used in most cases (the exception is the
+// SXTH operations with a rotate of 24: there the non-contiguous bits are
+// relevant).
+def : Pat<(add rGPR:$Rn, (sext_inreg (srl rGPR:$Rm, rot_imm:$rot), i8)),
+          (t2SXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>,
+      Requires<[HasT2ExtractPack, IsThumb2]>;
+def : Pat<(add rGPR:$Rn, (sext_inreg (srl rGPR:$Rm, imm8_or_16:$rot), i16)),
+          (t2SXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>,
+      Requires<[HasT2ExtractPack, IsThumb2]>;
+
 // Zero extenders
 
 let AddedComplexity = 16 in {
@@ -1999,8 +2009,16 @@ def t2UXTAB : T2I_exta_rrot<0b101, "uxtab",
 def t2UXTAH : T2I_exta_rrot<0b001, "uxtah",
                            BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
 def t2UXTAB16 : T2I_exta_rrot_np<0b011, "uxtab16">;
+
+def : Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, rot_imm:$rot), 0xFF)),
+          (t2UXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>,
+      Requires<[HasT2ExtractPack, IsThumb2]>;
+def : Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot), 0xFFFF)),
+          (t2UXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>,
+      Requires<[HasT2ExtractPack, IsThumb2]>;
 }
 
+
 //===----------------------------------------------------------------------===//
 //  Arithmetic Instructions.
 //
@@ -2708,8 +2726,7 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
 
   def WB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
               !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm",
-              [(set rGPR:$Rd, (sra (opnode rGPR:$Rn,
-                                    (sext_inreg rGPR:$Rm, i16)), (i32 16)))]>,
+              []>,
           Requires<[IsThumb2, HasThumb2DSP]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
@@ -2721,8 +2738,7 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
 
   def WT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
               !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm",
-              [(set rGPR:$Rd, (sra (opnode rGPR:$Rn,
-                                    (sra rGPR:$Rm, (i32 16))), (i32 16)))]>,
+              []>,
           Requires<[IsThumb2, HasThumb2DSP]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
@@ -2791,8 +2807,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
   def WB : T2FourReg<
         (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
               !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra",
-              [(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn,
-                                    (sext_inreg rGPR:$Rm, i16)), (i32 16))))]>,
+              []>,
            Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
@@ -2804,8 +2819,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
   def WT : T2FourReg<
         (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
               !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
-              [(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn,
-                                      (sra rGPR:$Rm, (i32 16))), (i32 16))))]>,
+              []>,
            Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
@@ -3291,7 +3305,8 @@ def t2LDREXD : T2I_ldrex<0b0111, (outs rGPR:$Rt, rGPR:$Rt2),
                          (ins addr_offset_none:$addr),
                          AddrModeNone, 4, NoItinerary,
                          "ldrexd", "\t$Rt, $Rt2, $addr", "",
-                         [], {?, ?, ?, ?}> {
+                         [], {?, ?, ?, ?}>,
+               Requires<[IsThumb2, IsNotMClass]> {
   bits<4> Rt2;
   let Inst{11-8} = Rt2;
 }
@@ -3367,7 +3382,8 @@ def t2STREXD : T2I_strex<0b0111, (outs rGPR:$Rd),
                          (ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr),
                          AddrModeNone, 4, NoItinerary,
                          "strexd", "\t$Rd, $Rt, $Rt2, $addr", "", [],
-                         {?, ?, ?, ?}> {
+                         {?, ?, ?, ?}>,
+               Requires<[IsThumb2, IsNotMClass]> {
   bits<4> Rt2;
   let Inst{11-8} = Rt2;
 }
@@ -3614,7 +3630,7 @@ def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
 // Branch and Exchange Jazelle -- for disassembly only
 // Rm = Inst{19-16}
 def t2BXJ : T2I<(outs), (ins rGPR:$func), NoItinerary, "bxj", "\t$func", []>,
-    Sched<[WriteBr]> {
+    Sched<[WriteBr]>, Requires<[IsThumb2, IsNotMClass, PreV8]> {
   bits<4> func;
   let Inst{31-27} = 0b11110;
   let Inst{26} = 0;
@@ -3656,7 +3672,8 @@ let isBranch = 1, isTerminator = 1 in {
 // operands, create 3 versions of the same instruction. Once there's a clean
 // framework to represent optional operands, change this behavior.
 class t2CPS<dag iops, string asm_op> : T2XI<(outs), iops, NoItinerary,
-            !strconcat("cps", asm_op), []> {
+            !strconcat("cps", asm_op), []>,
+          Requires<[IsThumb2, IsNotMClass]> {
   bits<2> imod;
   bits<3> iflags;
   bits<5> mode;
@@ -3702,7 +3719,8 @@ def : t2InstAlias<"sevl$p.w", (t2HINT 5, pred:$p)> {
   let Predicates = [IsThumb2, HasV8];
 }
 
-def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt", []> {
+def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt",
+                [(int_arm_dbg imm0_15:$opt)]> {
   bits<4> opt;
   let Inst{31-20} = 0b111100111010;
   let Inst{19-16} = 0b1111;
@@ -3739,7 +3757,8 @@ def t2DCPS3 : T2DCPS<0b11, "dcps3">;
 
 class T2SRS<bits<2> Op, bit W, dag oops, dag iops, InstrItinClass itin,
             string opc, string asm, list<dag> pattern>
-  : T2I<oops, iops, itin, opc, asm, pattern> {
+  : T2I<oops, iops, itin, opc, asm, pattern>,
+    Requires<[IsThumb2,IsNotMClass]> {
   bits<5> mode;
   let Inst{31-25} = 0b1110100;
   let Inst{24-23} = Op;
@@ -3770,7 +3789,8 @@ def : t2InstAlias<"srsia${p} $mode!", (t2SRSIA_UPD imm0_31:$mode, pred:$p)>;
 // Return From Exception is a system instruction.
 class T2RFE<bits<12> op31_20, dag oops, dag iops, InstrItinClass itin,
           string opc, string asm, list<dag> pattern>
-  : T2I<oops, iops, itin, opc, asm, pattern> {
+  : T2I<oops, iops, itin, opc, asm, pattern>,
+    Requires<[IsThumb2,IsNotMClass]> {
   let Inst{31-20} = op31_20{11-0};
 
   bits<4> Rn;
@@ -3797,7 +3817,7 @@ let isReturn = 1, isBarrier = 1, isTerminator = 1, Defs = [PC] in
 def t2SUBS_PC_LR : T2I <(outs), (ins imm0_255:$imm), NoItinerary,
                         "subs", "\tpc, lr, $imm",
                         [(ARMintretflag imm0_255:$imm)]>,
-                   Requires<[IsThumb2]> {
+                   Requires<[IsThumb2,IsNotMClass]> {
   let Inst{31-8} = 0b111100111101111010001111;
 
   bits<8> imm;
@@ -3941,10 +3961,10 @@ defm t2LDC   : t2LdStCop<0b1110, 1, 0, "ldc">;
 defm t2LDCL  : t2LdStCop<0b1110, 1, 1, "ldcl">;
 defm t2STC   : t2LdStCop<0b1110, 0, 0, "stc">;
 defm t2STCL  : t2LdStCop<0b1110, 0, 1, "stcl">;
-defm t2LDC2  : t2LdStCop<0b1111, 1, 0, "ldc2">, Requires<[PreV8]>;
-defm t2LDC2L : t2LdStCop<0b1111, 1, 1, "ldc2l">, Requires<[PreV8]>;
-defm t2STC2  : t2LdStCop<0b1111, 0, 0, "stc2">, Requires<[PreV8]>;
-defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l">, Requires<[PreV8]>;
+defm t2LDC2  : t2LdStCop<0b1111, 1, 0, "ldc2">, Requires<[PreV8,IsThumb2]>;
+defm t2LDC2L : t2LdStCop<0b1111, 1, 1, "ldc2l">, Requires<[PreV8,IsThumb2]>;
+defm t2STC2  : t2LdStCop<0b1111, 0, 0, "stc2">, Requires<[PreV8,IsThumb2]>;
+defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l">, Requires<[PreV8,IsThumb2]>;
 
 
 //===----------------------------------------------------------------------===//
@@ -3960,7 +3980,7 @@ def t2MRS_AR : T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, apsr",
   bits<4> Rd;
   let Inst{31-12} = 0b11110011111011111000;
   let Inst{11-8} = Rd;
-  let Inst{7-0} = 0b0000;
+  let Inst{7-0} = 0b00000000;
 }
 
 def : t2InstAlias<"mrs${p} $Rd, cpsr", (t2MRS_AR GPR:$Rd, pred:$p)>;
@@ -3970,22 +3990,41 @@ def t2MRSsys_AR: T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, spsr",
   bits<4> Rd;
   let Inst{31-12} = 0b11110011111111111000;
   let Inst{11-8} = Rd;
-  let Inst{7-0} = 0b0000;
+  let Inst{7-0} = 0b00000000;
+}
+
+def t2MRSbanked : T2I<(outs rGPR:$Rd), (ins banked_reg:$banked),
+                      NoItinerary, "mrs", "\t$Rd, $banked", []>,
+                  Requires<[IsThumb, HasVirtualization]> {
+  bits<6> banked;
+  bits<4> Rd;
+
+  let Inst{31-21} = 0b11110011111;
+  let Inst{20} = banked{5}; // R bit
+  let Inst{19-16} = banked{3-0};
+  let Inst{15-12} = 0b1000;
+  let Inst{11-8} = Rd;
+  let Inst{7-5} = 0b001;
+  let Inst{4} = banked{4};
+  let Inst{3-0} = 0b0000;
 }
 
+
 // M class MRS.
 //
 // This MRS has a mask field in bits 7-0 and can take more values than
 // the A/R class (a full msr_mask).
-def t2MRS_M : T2I<(outs rGPR:$Rd), (ins msr_mask:$mask), NoItinerary,
-                  "mrs", "\t$Rd, $mask", []>,
+def t2MRS_M : T2I<(outs rGPR:$Rd), (ins msr_mask:$SYSm), NoItinerary,
+                  "mrs", "\t$Rd, $SYSm", []>,
               Requires<[IsThumb,IsMClass]> {
   bits<4> Rd;
-  bits<8> mask;
+  bits<8> SYSm;
   let Inst{31-12} = 0b11110011111011111000;
   let Inst{11-8} = Rd;
-  let Inst{19-16} = 0b1111;
-  let Inst{7-0} = mask;
+  let Inst{7-0} = SYSm;
+
+  let Unpredictable{20-16} = 0b11111;
+  let Unpredictable{13} = 0b1;
 }
 
 
@@ -4010,6 +4049,25 @@ def t2MSR_AR : T2I<(outs), (ins msr_mask:$mask, rGPR:$Rn),
   let Inst{7-0}   = 0;
 }
 
+// However, the MSR (banked register) system instruction (ARMv7VE) *does* have a
+// separate encoding (distinguished by bit 5.
+def t2MSRbanked : T2I<(outs), (ins banked_reg:$banked, rGPR:$Rn),
+                      NoItinerary, "msr", "\t$banked, $Rn", []>,
+                  Requires<[IsThumb, HasVirtualization]> {
+  bits<6> banked;
+  bits<4> Rn;
+
+  let Inst{31-21} = 0b11110011100;
+  let Inst{20} = banked{5}; // R bit
+  let Inst{19-16} = Rn;
+  let Inst{15-12} = 0b1000;
+  let Inst{11-8} = banked{3-0};
+  let Inst{7-5} = 0b001;
+  let Inst{4} = banked{4};
+  let Inst{3-0} = 0b0000;
+}
+
+
 // M class MSR.
 //
 // Move from ARM core register to Special Register
@@ -4022,7 +4080,13 @@ def t2MSR_M : T2I<(outs), (ins msr_mask:$SYSm, rGPR:$Rn),
   let Inst{20}    = 0b0;
   let Inst{19-16} = Rn;
   let Inst{15-12} = 0b1000;
-  let Inst{11-0}  = SYSm;
+  let Inst{11-10} = SYSm{11-10};
+  let Inst{9-8}   = 0b00;
+  let Inst{7-0}   = SYSm{7-0};
+
+  let Unpredictable{20} = 0b1;
+  let Unpredictable{13} = 0b1;
+  let Unpredictable{9-8} = 0b11;
 }
 
 
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 1d7802a..d78f2ac 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -515,6 +515,8 @@ def VCVTDS  : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
   let Inst{5}     = Sm{0};
   let Inst{15-12} = Dd{3-0};
   let Inst{22}    = Dd{4};
+
+  let Predicates = [HasVFP2, HasDPVFP];
 }
 
 // Special case encoding: bits 11-8 is 0b1011.
@@ -551,12 +553,6 @@ def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
                  /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm",
                  [/* For disassembly only; pattern left blank */]>;
 
-def : Pat<(f32_to_f16 SPR:$a),
-          (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
-
-def : Pat<(f16_to_f32 GPR:$a),
-          (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
-
 def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
                  /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm",
                  [/* For disassembly only; pattern left blank */]>;
@@ -619,26 +615,42 @@ def VCVTTDH : ADuI<0b11101, 0b11, 0b0011, 0b11, 0,
   let Inst{5}     = Dm{4};
 }
 
-multiclass vcvt_inst<string opc, bits<2> rm> {
+def : Pat<(fp_to_f16 SPR:$a),
+          (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
+
+def : Pat<(fp_to_f16 (f64 DPR:$a)),
+          (i32 (COPY_TO_REGCLASS (VCVTBDH DPR:$a), GPR))>;
+
+def : Pat<(f16_to_fp GPR:$a),
+          (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
+
+def : Pat<(f64 (f16_to_fp GPR:$a)),
+          (VCVTBHD (COPY_TO_REGCLASS GPR:$a, SPR))>;
+
+multiclass vcvt_inst<string opc, bits<2> rm,
+                     SDPatternOperator node = null_frag> {
   let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in {
     def SS : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0,
                     (outs SPR:$Sd), (ins SPR:$Sm),
                     NoItinerary, !strconcat("vcvt", opc, ".s32.f32\t$Sd, $Sm"),
-                    []>, Requires<[HasFPARMv8]> {
+                    [(set SPR:$Sd, (arm_ftosi (node SPR:$Sm)))]>,
+                    Requires<[HasFPARMv8]> {
       let Inst{17-16} = rm;
     }
 
     def US : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0,
                     (outs SPR:$Sd), (ins SPR:$Sm),
                     NoItinerary, !strconcat("vcvt", opc, ".u32.f32\t$Sd, $Sm"),
-                    []>, Requires<[HasFPARMv8]> {
+                    [(set SPR:$Sd, (arm_ftoui (node SPR:$Sm)))]>,
+                    Requires<[HasFPARMv8]> {
       let Inst{17-16} = rm;
     }
 
     def SD : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0,
                     (outs SPR:$Sd), (ins DPR:$Dm),
                     NoItinerary, !strconcat("vcvt", opc, ".s32.f64\t$Sd, $Dm"),
-                    []>, Requires<[HasFPARMv8, HasDPVFP]> {
+                    [(set SPR:$Sd, (arm_ftosi (f64 (node (f64 DPR:$Dm)))))]>,
+                    Requires<[HasFPARMv8, HasDPVFP]> {
       bits<5> Dm;
 
       let Inst{17-16} = rm;
@@ -652,7 +664,8 @@ multiclass vcvt_inst<string opc, bits<2> rm> {
     def UD : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0,
                     (outs SPR:$Sd), (ins DPR:$Dm),
                     NoItinerary, !strconcat("vcvt", opc, ".u32.f64\t$Sd, $Dm"),
-                    []>, Requires<[HasFPARMv8, HasDPVFP]> {
+                    [(set SPR:$Sd, (arm_ftoui (f64 (node (f64 DPR:$Dm)))))]>,
+                    Requires<[HasFPARMv8, HasDPVFP]> {
       bits<5> Dm;
 
       let Inst{17-16} = rm;
@@ -665,10 +678,10 @@ multiclass vcvt_inst<string opc, bits<2> rm> {
   }
 }
 
-defm VCVTA : vcvt_inst<"a", 0b00>;
+defm VCVTA : vcvt_inst<"a", 0b00, frnd>;
 defm VCVTN : vcvt_inst<"n", 0b01>;
-defm VCVTP : vcvt_inst<"p", 0b10>;
-defm VCVTM : vcvt_inst<"m", 0b11>;
+defm VCVTP : vcvt_inst<"p", 0b10, fceil>;
+defm VCVTM : vcvt_inst<"m", 0b11, ffloor>;
 
 def VNEGD  : ADuI<0b11101, 0b11, 0b0001, 0b01, 0,
                   (outs DPR:$Dd), (ins DPR:$Dm),
@@ -684,18 +697,20 @@ def VNEGS  : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0,
   let D = VFPNeonA8Domain;
 }
 
-multiclass vrint_inst_zrx<string opc, bit op, bit op2> {
+multiclass vrint_inst_zrx<string opc, bit op, bit op2, SDPatternOperator node> {
   def S : ASuI<0b11101, 0b11, 0b0110, 0b11, 0,
                (outs SPR:$Sd), (ins SPR:$Sm),
                NoItinerary, !strconcat("vrint", opc), ".f32\t$Sd, $Sm",
-               []>, Requires<[HasFPARMv8]> {
+               [(set (f32 SPR:$Sd), (node (f32 SPR:$Sm)))]>,
+               Requires<[HasFPARMv8]> {
     let Inst{7} = op2;
     let Inst{16} = op;
   }
   def D : ADuI<0b11101, 0b11, 0b0110, 0b11, 0,
                 (outs DPR:$Dd), (ins DPR:$Dm),
                 NoItinerary, !strconcat("vrint", opc), ".f64\t$Dd, $Dm",
-                []>, Requires<[HasFPARMv8, HasDPVFP]> {
+                [(set (f64 DPR:$Dd), (node (f64 DPR:$Dm)))]>,
+                Requires<[HasFPARMv8, HasDPVFP]> {
     let Inst{7} = op2;
     let Inst{16} = op;
   }
@@ -708,22 +723,25 @@ multiclass vrint_inst_zrx<string opc, bit op, bit op2> {
         Requires<[HasFPARMv8,HasDPVFP]>;
 }
 
-defm VRINTZ : vrint_inst_zrx<"z", 0, 1>;
-defm VRINTR : vrint_inst_zrx<"r", 0, 0>;
-defm VRINTX : vrint_inst_zrx<"x", 1, 0>;
+defm VRINTZ : vrint_inst_zrx<"z", 0, 1, ftrunc>;
+defm VRINTR : vrint_inst_zrx<"r", 0, 0, fnearbyint>;
+defm VRINTX : vrint_inst_zrx<"x", 1, 0, frint>;
 
-multiclass vrint_inst_anpm<string opc, bits<2> rm> {
+multiclass vrint_inst_anpm<string opc, bits<2> rm,
+                           SDPatternOperator node = null_frag> {
   let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in {
     def S : ASuInp<0b11101, 0b11, 0b1000, 0b01, 0,
                    (outs SPR:$Sd), (ins SPR:$Sm),
                    NoItinerary, !strconcat("vrint", opc, ".f32\t$Sd, $Sm"),
-                   []>, Requires<[HasFPARMv8]> {
+                   [(set (f32 SPR:$Sd), (node (f32 SPR:$Sm)))]>,
+                   Requires<[HasFPARMv8]> {
       let Inst{17-16} = rm;
     }
     def D : ADuInp<0b11101, 0b11, 0b1000, 0b01, 0,
                    (outs DPR:$Dd), (ins DPR:$Dm),
                    NoItinerary, !strconcat("vrint", opc, ".f64\t$Dd, $Dm"),
-                   []>, Requires<[HasFPARMv8, HasDPVFP]> {
+                   [(set (f64 DPR:$Dd), (node (f64 DPR:$Dm)))]>,
+                   Requires<[HasFPARMv8, HasDPVFP]> {
       let Inst{17-16} = rm;
     }
   }
@@ -736,10 +754,10 @@ multiclass vrint_inst_anpm<string opc, bits<2> rm> {
         Requires<[HasFPARMv8,HasDPVFP]>;
 }
 
-defm VRINTA : vrint_inst_anpm<"a", 0b00>;
+defm VRINTA : vrint_inst_anpm<"a", 0b00, frnd>;
 defm VRINTN : vrint_inst_anpm<"n", 0b01>;
-defm VRINTP : vrint_inst_anpm<"p", 0b10>;
-defm VRINTM : vrint_inst_anpm<"m", 0b11>;
+defm VRINTP : vrint_inst_anpm<"p", 0b10, fceil>;
+defm VRINTM : vrint_inst_anpm<"m", 0b11, ffloor>;
 
 def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0,
                   (outs DPR:$Dd), (ins DPR:$Dm),
@@ -830,6 +848,11 @@ def VMOVRRD  : AVConv3I<0b11000101, 0b1011,
   // Some single precision VFP instructions may be executed on both NEON and VFP
   // pipelines.
   let D = VFPNeonDomain;
+
+  // This instruction is equivalent to
+  // $Rt = EXTRACT_SUBREG $Dm, ssub_0
+  // $Rt2 = EXTRACT_SUBREG $Dm, ssub_1
+  let isExtractSubreg = 1;
 }
 
 def VMOVRRS  : AVConv3I<0b11000101, 0b1010,
@@ -878,6 +901,10 @@ def VMOVDRR : AVConv5I<0b11000100, 0b1011,
   // Some single precision VFP instructions may be executed on both NEON and VFP
   // pipelines.
   let D = VFPNeonDomain;
+
+  // This instruction is equivalent to
+  // $Dm = REG_SEQUENCE $Rt, ssub_0, $Rt2, ssub_1
+  let isRegSequence = 1;
 }
 
 let neverHasSideEffects = 1 in
diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp
deleted file mode 100644
index 6d1114d..0000000
--- a/lib/Target/ARM/ARMJITInfo.cpp
+++ /dev/null
@@ -1,344 +0,0 @@
-//===-- ARMJITInfo.cpp - Implement the JIT interfaces for the ARM target --===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the JIT interfaces for the ARM target.
-//
-//===----------------------------------------------------------------------===//
-
-#include "ARMJITInfo.h"
-#include "ARMConstantPoolValue.h"
-#include "ARMMachineFunctionInfo.h"
-#include "ARMRelocations.h"
-#include "MCTargetDesc/ARMBaseInfo.h"
-#include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Memory.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cstdlib>
-using namespace llvm;
-
-#define DEBUG_TYPE "jit"
-
-void ARMJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
-  report_fatal_error("ARMJITInfo::replaceMachineCodeForFunction");
-}
-
-/// JITCompilerFunction - This contains the address of the JIT function used to
-/// compile a function lazily.
-static TargetJITInfo::JITCompilerFn JITCompilerFunction;
-
-// Get the ASMPREFIX for the current host.  This is often '_'.
-#ifndef __USER_LABEL_PREFIX__
-#define __USER_LABEL_PREFIX__
-#endif
-#define GETASMPREFIX2(X) #X
-#define GETASMPREFIX(X) GETASMPREFIX2(X)
-#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__)
-
-// CompilationCallback stub - We can't use a C function with inline assembly in
-// it, because the prolog/epilog inserted by GCC won't work for us. (We need
-// to preserve more context and manipulate the stack directly).  Instead,
-// write our own wrapper, which does things our way, so we have complete
-// control over register saving and restoring.
-extern "C" {
-#if defined(__arm__)
-  void ARMCompilationCallback();
-  asm(
-    ".text\n"
-    ".align 2\n"
-    ".globl " ASMPREFIX "ARMCompilationCallback\n"
-    ASMPREFIX "ARMCompilationCallback:\n"
-    // Save caller saved registers since they may contain stuff
-    // for the real target function right now. We have to act as if this
-    // whole compilation callback doesn't exist as far as the caller is
-    // concerned, so we can't just preserve the callee saved regs.
-    "stmdb sp!, {r0, r1, r2, r3, lr}\n"
-#if (defined(__VFP_FP__) && !defined(__SOFTFP__))
-    "vstmdb sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
-#endif
-    // The LR contains the address of the stub function on entry.
-    // pass it as the argument to the C part of the callback
-    "mov  r0, lr\n"
-    "sub  sp, sp, #4\n"
-    // Call the C portion of the callback
-    "bl   " ASMPREFIX "ARMCompilationCallbackC\n"
-    "add  sp, sp, #4\n"
-    // Restoring the LR to the return address of the function that invoked
-    // the stub and de-allocating the stack space for it requires us to
-    // swap the two saved LR values on the stack, as they're backwards
-    // for what we need since the pop instruction has a pre-determined
-    // order for the registers.
-    //      +--------+
-    //   0  | LR     | Original return address
-    //      +--------+
-    //   1  | LR     | Stub address (start of stub)
-    // 2-5  | R3..R0 | Saved registers (we need to preserve all regs)
-    // 6-20 | D0..D7 | Saved VFP registers
-    //      +--------+
-    //
-#if (defined(__VFP_FP__) && !defined(__SOFTFP__))
-    // Restore VFP caller-saved registers.
-    "vldmia sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
-#endif
-    //
-    //      We need to exchange the values in slots 0 and 1 so we can
-    //      return to the address in slot 1 with the address in slot 0
-    //      restored to the LR.
-    "ldr  r0, [sp,#20]\n"
-    "ldr  r1, [sp,#16]\n"
-    "str  r1, [sp,#20]\n"
-    "str  r0, [sp,#16]\n"
-    // Return to the (newly modified) stub to invoke the real function.
-    // The above twiddling of the saved return addresses allows us to
-    // deallocate everything, including the LR the stub saved, with two
-    // updating load instructions.
-    "ldmia  sp!, {r0, r1, r2, r3, lr}\n"
-    "ldr    pc, [sp], #4\n"
-      );
-#else  // Not an ARM host
-  void ARMCompilationCallback() {
-    llvm_unreachable("Cannot call ARMCompilationCallback() on a non-ARM arch!");
-  }
-#endif
-}
-
-/// ARMCompilationCallbackC - This is the target-specific function invoked
-/// by the function stub when we did not know the real target of a call.
-/// This function must locate the start of the stub or call site and pass
-/// it into the JIT compiler function.
-extern "C" void ARMCompilationCallbackC(intptr_t StubAddr) {
-  // Get the address of the compiled code for this function.
-  intptr_t NewVal = (intptr_t)JITCompilerFunction((void*)StubAddr);
-
-  // Rewrite the call target... so that we don't end up here every time we
-  // execute the call. We're replacing the first two instructions of the
-  // stub with:
-  //   ldr pc, [pc,#-4]
-  //   <addr>
-  if (!sys::Memory::setRangeWritable((void*)StubAddr, 8)) {
-    llvm_unreachable("ERROR: Unable to mark stub writable");
-  }
-  *(intptr_t *)StubAddr = 0xe51ff004;  // ldr pc, [pc, #-4]
-  *(intptr_t *)(StubAddr+4) = NewVal;
-  if (!sys::Memory::setRangeExecutable((void*)StubAddr, 8)) {
-    llvm_unreachable("ERROR: Unable to mark stub executable");
-  }
-}
-
-TargetJITInfo::LazyResolverFn
-ARMJITInfo::getLazyResolverFunction(JITCompilerFn F) {
-  JITCompilerFunction = F;
-  return ARMCompilationCallback;
-}
-
-void *ARMJITInfo::emitGlobalValueIndirectSym(const GlobalValue *GV, void *Ptr,
-                                             JITCodeEmitter &JCE) {
-  uint8_t Buffer[4];
-  uint8_t *Cur = Buffer;
-  MachineCodeEmitter::emitWordLEInto(Cur, (intptr_t)Ptr);
-  void *PtrAddr = JCE.allocIndirectGV(
-      GV, Buffer, sizeof(Buffer), /*Alignment=*/4);
-  addIndirectSymAddr(Ptr, (intptr_t)PtrAddr);
-  return PtrAddr;
-}
-
-TargetJITInfo::StubLayout ARMJITInfo::getStubLayout() {
-  // The stub contains up to 3 4-byte instructions, aligned at 4 bytes, and a
-  // 4-byte address.  See emitFunctionStub for details.
-  StubLayout Result = {16, 4};
-  return Result;
-}
-
-void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn,
-                                   JITCodeEmitter &JCE) {
-  void *Addr;
-  // If this is just a call to an external function, emit a branch instead of a
-  // call.  The code is the same except for one bit of the last instruction.
-  if (Fn != (void*)(intptr_t)ARMCompilationCallback) {
-    // Branch to the corresponding function addr.
-    if (IsPIC) {
-      // The stub is 16-byte size and 4-aligned.
-      intptr_t LazyPtr = getIndirectSymAddr(Fn);
-      if (!LazyPtr) {
-        // In PIC mode, the function stub is loading a lazy-ptr.
-        LazyPtr= (intptr_t)emitGlobalValueIndirectSym((const GlobalValue*)F, Fn, JCE);
-        DEBUG(if (F)
-                errs() << "JIT: Indirect symbol emitted at [" << LazyPtr
-                       << "] for GV '" << F->getName() << "'\n";
-              else
-                errs() << "JIT: Stub emitted at [" << LazyPtr
-                       << "] for external function at '" << Fn << "'\n");
-      }
-      JCE.emitAlignment(4);
-      Addr = (void*)JCE.getCurrentPCValue();
-      if (!sys::Memory::setRangeWritable(Addr, 16)) {
-        llvm_unreachable("ERROR: Unable to mark stub writable");
-      }
-      JCE.emitWordLE(0xe59fc004);            // ldr ip, [pc, #+4]
-      JCE.emitWordLE(0xe08fc00c);            // L_func$scv: add ip, pc, ip
-      JCE.emitWordLE(0xe59cf000);            // ldr pc, [ip]
-      JCE.emitWordLE(LazyPtr - (intptr_t(Addr)+4+8));  // func - (L_func$scv+8)
-      sys::Memory::InvalidateInstructionCache(Addr, 16);
-      if (!sys::Memory::setRangeExecutable(Addr, 16)) {
-        llvm_unreachable("ERROR: Unable to mark stub executable");
-      }
-    } else {
-      // The stub is 8-byte size and 4-aligned.
-      JCE.emitAlignment(4);
-      Addr = (void*)JCE.getCurrentPCValue();
-      if (!sys::Memory::setRangeWritable(Addr, 8)) {
-        llvm_unreachable("ERROR: Unable to mark stub writable");
-      }
-      JCE.emitWordLE(0xe51ff004);    // ldr pc, [pc, #-4]
-      JCE.emitWordLE((intptr_t)Fn);  // addr of function
-      sys::Memory::InvalidateInstructionCache(Addr, 8);
-      if (!sys::Memory::setRangeExecutable(Addr, 8)) {
-        llvm_unreachable("ERROR: Unable to mark stub executable");
-      }
-    }
-  } else {
-    // The compilation callback will overwrite the first two words of this
-    // stub with indirect branch instructions targeting the compiled code.
-    // This stub sets the return address to restart the stub, so that
-    // the new branch will be invoked when we come back.
-    //
-    // Branch and link to the compilation callback.
-    // The stub is 16-byte size and 4-byte aligned.
-    JCE.emitAlignment(4);
-    Addr = (void*)JCE.getCurrentPCValue();
-    if (!sys::Memory::setRangeWritable(Addr, 16)) {
-      llvm_unreachable("ERROR: Unable to mark stub writable");
-    }
-    // Save LR so the callback can determine which stub called it.
-    // The compilation callback is responsible for popping this prior
-    // to returning.
-    JCE.emitWordLE(0xe92d4000); // push {lr}
-    // Set the return address to go back to the start of this stub.
-    JCE.emitWordLE(0xe24fe00c); // sub lr, pc, #12
-    // Invoke the compilation callback.
-    JCE.emitWordLE(0xe51ff004); // ldr pc, [pc, #-4]
-    // The address of the compilation callback.
-    JCE.emitWordLE((intptr_t)ARMCompilationCallback);
-    sys::Memory::InvalidateInstructionCache(Addr, 16);
-    if (!sys::Memory::setRangeExecutable(Addr, 16)) {
-      llvm_unreachable("ERROR: Unable to mark stub executable");
-    }
-  }
-
-  return Addr;
-}
-
-intptr_t ARMJITInfo::resolveRelocDestAddr(MachineRelocation *MR) const {
-  ARM::RelocationType RT = (ARM::RelocationType)MR->getRelocationType();
-  switch (RT) {
-  default:
-    return (intptr_t)(MR->getResultPointer());
-  case ARM::reloc_arm_pic_jt:
-    // Destination address - jump table base.
-    return (intptr_t)(MR->getResultPointer()) - MR->getConstantVal();
-  case ARM::reloc_arm_jt_base:
-    // Jump table base address.
-    return getJumpTableBaseAddr(MR->getJumpTableIndex());
-  case ARM::reloc_arm_cp_entry:
-  case ARM::reloc_arm_vfp_cp_entry:
-    // Constant pool entry address.
-    return getConstantPoolEntryAddr(MR->getConstantPoolIndex());
-  case ARM::reloc_arm_machine_cp_entry: {
-    ARMConstantPoolValue *ACPV = (ARMConstantPoolValue*)MR->getConstantVal();
-    assert((!ACPV->hasModifier() && !ACPV->mustAddCurrentAddress()) &&
-           "Can't handle this machine constant pool entry yet!");
-    intptr_t Addr = (intptr_t)(MR->getResultPointer());
-    Addr -= getPCLabelAddr(ACPV->getLabelId()) + ACPV->getPCAdjustment();
-    return Addr;
-  }
-  }
-}
-
-/// relocate - Before the JIT can run a block of code that has been emitted,
-/// it must rewrite the code to contain the actual addresses of any
-/// referenced global symbols.
-void ARMJITInfo::relocate(void *Function, MachineRelocation *MR,
-                          unsigned NumRelocs, unsigned char* GOTBase) {
-  for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
-    void *RelocPos = (char*)Function + MR->getMachineCodeOffset();
-    intptr_t ResultPtr = resolveRelocDestAddr(MR);
-    switch ((ARM::RelocationType)MR->getRelocationType()) {
-    case ARM::reloc_arm_cp_entry:
-    case ARM::reloc_arm_vfp_cp_entry:
-    case ARM::reloc_arm_relative: {
-      // It is necessary to calculate the correct PC relative value. We
-      // subtract the base addr from the target addr to form a byte offset.
-      ResultPtr = ResultPtr - (intptr_t)RelocPos - 8;
-      // If the result is positive, set bit U(23) to 1.
-      if (ResultPtr >= 0)
-        *((intptr_t*)RelocPos) |= 1 << ARMII::U_BitShift;
-      else {
-        // Otherwise, obtain the absolute value and set bit U(23) to 0.
-        *((intptr_t*)RelocPos) &= ~(1 << ARMII::U_BitShift);
-        ResultPtr = - ResultPtr;
-      }
-      // Set the immed value calculated.
-      // VFP immediate offset is multiplied by 4.
-      if (MR->getRelocationType() == ARM::reloc_arm_vfp_cp_entry)
-        ResultPtr = ResultPtr >> 2;
-      *((intptr_t*)RelocPos) |= ResultPtr;
-      // Set register Rn to PC (which is register 15 on all architectures).
-      // FIXME: This avoids the need for register info in the JIT class.
-      *((intptr_t*)RelocPos) |= 15 << ARMII::RegRnShift;
-      break;
-    }
-    case ARM::reloc_arm_pic_jt:
-    case ARM::reloc_arm_machine_cp_entry:
-    case ARM::reloc_arm_absolute: {
-      // These addresses have already been resolved.
-      *((intptr_t*)RelocPos) |= (intptr_t)ResultPtr;
-      break;
-    }
-    case ARM::reloc_arm_branch: {
-      // It is necessary to calculate the correct value of signed_immed_24
-      // field. We subtract the base addr from the target addr to form a
-      // byte offset, which must be inside the range -33554432 and +33554428.
-      // Then, we set the signed_immed_24 field of the instruction to bits
-      // [25:2] of the byte offset. More details ARM-ARM p. A4-11.
-      ResultPtr = ResultPtr - (intptr_t)RelocPos - 8;
-      ResultPtr = (ResultPtr & 0x03FFFFFC) >> 2;
-      assert(ResultPtr >= -33554432 && ResultPtr <= 33554428);
-      *((intptr_t*)RelocPos) |= ResultPtr;
-      break;
-    }
-    case ARM::reloc_arm_jt_base: {
-      // JT base - (instruction addr + 8)
-      ResultPtr = ResultPtr - (intptr_t)RelocPos - 8;
-      *((intptr_t*)RelocPos) |= ResultPtr;
-      break;
-    }
-    case ARM::reloc_arm_movw: {
-      ResultPtr = ResultPtr & 0xFFFF;
-      *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF;
-      *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16;
-      break;
-    }
-    case ARM::reloc_arm_movt: {
-      ResultPtr = (ResultPtr >> 16) & 0xFFFF;
-      *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF;
-      *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16;
-      break;
-    }
-    }
-  }
-}
-
-void ARMJITInfo::Initialize(const MachineFunction &MF, bool isPIC) {
-  const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  ConstPoolId2AddrMap.resize(AFI->getNumPICLabels());
-  JumpTableId2AddrMap.resize(AFI->getNumJumpTables());
-  IsPIC = isPIC;
-}
diff --git a/lib/Target/ARM/ARMJITInfo.h b/lib/Target/ARM/ARMJITInfo.h
deleted file mode 100644
index 27e2a20..0000000
--- a/lib/Target/ARM/ARMJITInfo.h
+++ /dev/null
@@ -1,177 +0,0 @@
-//===-- ARMJITInfo.h - ARM implementation of the JIT interface  -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declaration of the ARMJITInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ARMJITINFO_H
-#define ARMJITINFO_H
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/Target/TargetJITInfo.h"
-
-namespace llvm {
-  class ARMTargetMachine;
-
-  class ARMJITInfo : public TargetJITInfo {
-    // ConstPoolId2AddrMap - A map from constant pool ids to the corresponding
-    // CONSTPOOL_ENTRY addresses.
-    SmallVector<intptr_t, 16> ConstPoolId2AddrMap;
-
-    // JumpTableId2AddrMap - A map from inline jumptable ids to the
-    // corresponding inline jump table bases.
-    SmallVector<intptr_t, 16> JumpTableId2AddrMap;
-
-    // PCLabelMap - A map from PC labels to addresses.
-    DenseMap<unsigned, intptr_t> PCLabelMap;
-
-    // Sym2IndirectSymMap - A map from symbol (GlobalValue and ExternalSymbol)
-    // addresses to their indirect symbol addresses.
-    DenseMap<void*, intptr_t> Sym2IndirectSymMap;
-
-    // IsPIC - True if the relocation model is PIC. This is used to determine
-    // how to codegen function stubs.
-    bool IsPIC;
-
-  public:
-    explicit ARMJITInfo() : IsPIC(false) { useGOT = false; }
-
-    /// replaceMachineCodeForFunction - Make it so that calling the function
-    /// whose machine code is at OLD turns into a call to NEW, perhaps by
-    /// overwriting OLD with a branch to NEW.  This is used for self-modifying
-    /// code.
-    ///
-    void replaceMachineCodeForFunction(void *Old, void *New) override;
-
-    /// emitGlobalValueIndirectSym - Use the specified JITCodeEmitter object
-    /// to emit an indirect symbol which contains the address of the specified
-    /// ptr.
-    void *emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr,
-                                    JITCodeEmitter &JCE) override;
-
-    // getStubLayout - Returns the size and alignment of the largest call stub
-    // on ARM.
-    StubLayout getStubLayout() override;
-
-    /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a
-    /// small native function that simply calls the function at the specified
-    /// address.
-    void *emitFunctionStub(const Function* F, void *Fn,
-                           JITCodeEmitter &JCE) override;
-
-    /// getLazyResolverFunction - Expose the lazy resolver to the JIT.
-    LazyResolverFn getLazyResolverFunction(JITCompilerFn) override;
-
-    /// relocate - Before the JIT can run a block of code that has been emitted,
-    /// it must rewrite the code to contain the actual addresses of any
-    /// referenced global symbols.
-    void relocate(void *Function, MachineRelocation *MR,
-                  unsigned NumRelocs, unsigned char* GOTBase) override;
-
-    /// hasCustomConstantPool - Allows a target to specify that constant
-    /// pool address resolution is handled by the target.
-    bool hasCustomConstantPool() const override { return true; }
-
-    /// hasCustomJumpTables - Allows a target to specify that jumptables
-    /// are emitted by the target.
-    bool hasCustomJumpTables() const override { return true; }
-
-    /// allocateSeparateGVMemory - If true, globals should be placed in
-    /// separately allocated heap memory rather than in the same
-    /// code memory allocated by JITCodeEmitter.
-    bool allocateSeparateGVMemory() const override {
-#ifdef __APPLE__
-      return true;
-#else
-      return false;
-#endif
-    }
-
-    /// Initialize - Initialize internal stage for the function being JITted.
-    /// Resize constant pool ids to CONSTPOOL_ENTRY addresses map; resize
-    /// jump table ids to jump table bases map; remember if codegen relocation
-    /// model is PIC.
-    void Initialize(const MachineFunction &MF, bool isPIC);
-
-    /// getConstantPoolEntryAddr - The ARM target puts all constant
-    /// pool entries into constant islands. This returns the address of the
-    /// constant pool entry of the specified index.
-    intptr_t getConstantPoolEntryAddr(unsigned CPI) const {
-      assert(CPI < ConstPoolId2AddrMap.size());
-      return ConstPoolId2AddrMap[CPI];
-    }
-
-    /// addConstantPoolEntryAddr - Map a Constant Pool Index to the address
-    /// where its associated value is stored. When relocations are processed,
-    /// this value will be used to resolve references to the constant.
-    void addConstantPoolEntryAddr(unsigned CPI, intptr_t Addr) {
-      assert(CPI < ConstPoolId2AddrMap.size());
-      ConstPoolId2AddrMap[CPI] = Addr;
-    }
-
-    /// getJumpTableBaseAddr - The ARM target inline all jump tables within
-    /// text section of the function. This returns the address of the base of
-    /// the jump table of the specified index.
-    intptr_t getJumpTableBaseAddr(unsigned JTI) const {
-      assert(JTI < JumpTableId2AddrMap.size());
-      return JumpTableId2AddrMap[JTI];
-    }
-
-    /// addJumpTableBaseAddr - Map a jump table index to the address where
-    /// the corresponding inline jump table is emitted. When relocations are
-    /// processed, this value will be used to resolve references to the
-    /// jump table.
-    void addJumpTableBaseAddr(unsigned JTI, intptr_t Addr) {
-      assert(JTI < JumpTableId2AddrMap.size());
-      JumpTableId2AddrMap[JTI] = Addr;
-    }
-
-    /// getPCLabelAddr - Retrieve the address of the PC label of the
-    /// specified id.
-    intptr_t getPCLabelAddr(unsigned Id) const {
-      DenseMap<unsigned, intptr_t>::const_iterator I = PCLabelMap.find(Id);
-      assert(I != PCLabelMap.end());
-      return I->second;
-    }
-
-    /// addPCLabelAddr - Remember the address of the specified PC label.
-    void addPCLabelAddr(unsigned Id, intptr_t Addr) {
-      PCLabelMap.insert(std::make_pair(Id, Addr));
-    }
-
-    /// getIndirectSymAddr - Retrieve the address of the indirect symbol of the
-    /// specified symbol located at address. Returns 0 if the indirect symbol
-    /// has not been emitted.
-    intptr_t getIndirectSymAddr(void *Addr) const {
-      DenseMap<void*,intptr_t>::const_iterator I= Sym2IndirectSymMap.find(Addr);
-      if (I != Sym2IndirectSymMap.end())
-        return I->second;
-      return 0;
-    }
-
-    /// addIndirectSymAddr - Add a mapping from address of an emitted symbol to
-    /// its indirect symbol address.
-    void addIndirectSymAddr(void *SymAddr, intptr_t IndSymAddr) {
-      Sym2IndirectSymMap.insert(std::make_pair(SymAddr, IndSymAddr));
-    }
-
-  private:
-    /// resolveRelocDestAddr - Resolve the resulting address of the relocation
-    /// if it's not already solved. Constantpool entries must be resolved by
-    /// ARM target.
-    intptr_t resolveRelocDestAddr(MachineRelocation *MR) const;
-  };
-}
-
-#endif
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index a03bcdb..c429ac1 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -144,6 +144,46 @@ namespace {
   char ARMLoadStoreOpt::ID = 0;
 }
 
+static bool definesCPSR(const MachineInstr *MI) {
+  for (const auto &MO : MI->operands()) {
+    if (!MO.isReg())
+      continue;
+    if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
+      // If the instruction has live CPSR def, then it's not safe to fold it
+      // into load / store.
+      return true;
+  }
+
+  return false;
+}
+
+static int getMemoryOpOffset(const MachineInstr *MI) {
+  int Opcode = MI->getOpcode();
+  bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
+  unsigned NumOperands = MI->getDesc().getNumOperands();
+  unsigned OffField = MI->getOperand(NumOperands-3).getImm();
+
+  if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
+      Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
+      Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
+      Opcode == ARM::LDRi12   || Opcode == ARM::STRi12)
+    return OffField;
+
+  // Thumb1 immediate offsets are scaled by 4
+  if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi)
+    return OffField * 4;
+
+  int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
+    : ARM_AM::getAM5Offset(OffField) * 4;
+  ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
+    : ARM_AM::getAM5Op(OffField);
+
+  if (Op == ARM_AM::sub)
+    return -Offset;
+
+  return Offset;
+}
+
 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
   switch (Opcode) {
   default: llvm_unreachable("Unhandled opcode!");
@@ -335,40 +375,50 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
                                    unsigned WordOffset,
                                    ARMCC::CondCodes Pred, unsigned PredReg) {
   assert(isThumb1 && "Can only update base register uses for Thumb1!");
-
-  // Start updating any instructions with immediate offsets. Insert a sub before
+  // Start updating any instructions with immediate offsets. Insert a SUB before
   // the first non-updateable instruction (if any).
   for (; MBBI != MBB.end(); ++MBBI) {
+    bool InsertSub = false;
+    unsigned Opc = MBBI->getOpcode();
+
     if (MBBI->readsRegister(Base)) {
-      unsigned Opc = MBBI->getOpcode();
       int Offset;
-      bool InsertSub = false;
+      bool IsLoad =
+        Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
+      bool IsStore =
+        Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
 
-      if (Opc == ARM::tLDRi  || Opc == ARM::tSTRi  ||
-          Opc == ARM::tLDRHi || Opc == ARM::tSTRHi ||
-          Opc == ARM::tLDRBi || Opc == ARM::tSTRBi) {
+      if (IsLoad || IsStore) {
         // Loads and stores with immediate offsets can be updated, but only if
         // the new offset isn't negative.
         // The MachineOperand containing the offset immediate is the last one
         // before predicates.
         MachineOperand &MO =
           MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
-        // The offsets are scaled by 1, 2 or 4 depending on the Opcode
+        // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
         Offset = MO.getImm() - WordOffset * getImmScale(Opc);
-        if (Offset >= 0)
+
+        // If storing the base register, it needs to be reset first.
+        unsigned InstrSrcReg = MBBI->getOperand(0).getReg();
+
+        if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
           MO.setImm(Offset);
         else
           InsertSub = true;
 
-      } else if (Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) {
-        // SUB/ADD using this register. Merge it with the update.
-        // If the merged offset is too large, insert a new sub instead.
+      } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
+                 !definesCPSR(MBBI)) {
+        // SUBS/ADDS using this register, with a dead def of the CPSR.
+        // Merge it with the update; if the merged offset is too large,
+        // insert a new sub instead.
         MachineOperand &MO =
           MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
         Offset = (Opc == ARM::tSUBi8) ?
           MO.getImm() + WordOffset * 4 :
           MO.getImm() - WordOffset * 4 ;
-        if (TL->isLegalAddImmediate(Offset)) {
+        if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
+          // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
+          // Offset == 0.
           MO.setImm(Offset);
           // The base register has now been reset, so exit early.
           return;
@@ -381,13 +431,19 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
         InsertSub = true;
       }
 
-      if (InsertSub) {
-        // An instruction above couldn't be updated, so insert a sub.
-        AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base))
-          .addReg(Base, getKillRegState(true)).addImm(WordOffset * 4)
-          .addImm(Pred).addReg(PredReg);
-        return;
-      }
+    } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) {
+      // Since SUBS sets the condition flags, we can't place the base reset
+      // after an instruction that has a live CPSR def.
+      // The base register might also contain an argument for a function call.
+      InsertSub = true;
+    }
+
+    if (InsertSub) {
+      // An instruction above couldn't be updated, so insert a sub.
+      AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
+        .addReg(Base, getKillRegState(false)).addImm(WordOffset * 4)
+        .addImm(Pred).addReg(PredReg);
+      return;
     }
 
     if (MBBI->killsRegister(Base))
@@ -395,15 +451,18 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
       return;
   }
 
-  // The end of the block was reached. This means register liveness escapes the
-  // block, and it's necessary to insert a sub before the last instruction.
-  if (MBB.succ_size() > 0)
-    // But only insert the SUB if there is actually a successor block.
-    // FIXME: Check more carefully if register is live at this point, e.g. by
-    // also examining the successor block's register liveness information.
-    AddDefaultT1CC(BuildMI(MBB, --MBBI, dl, TII->get(ARM::tSUBi8), Base))
-      .addReg(Base, getKillRegState(true)).addImm(WordOffset * 4)
+  // End of block was reached.
+  if (MBB.succ_size() > 0) {
+    // FIXME: Because of a bug, live registers are sometimes missing from
+    // the successor blocks' live-in sets. This means we can't trust that
+    // information and *always* have to reset at the end of a block.
+    // See PR21029.
+    if (MBBI != MBB.end()) --MBBI;
+    AddDefaultT1CC(
+      BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
+      .addReg(Base, getKillRegState(false)).addImm(WordOffset * 4)
       .addImm(Pred).addReg(PredReg);
+  }
 }
 
 /// MergeOps - Create and insert a LDM or STM with Base as base register and
@@ -422,6 +481,28 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
   if (NumRegs <= 1)
     return false;
 
+  // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
+  // Compute liveness information for that register to make the decision.
+  bool SafeToClobberCPSR = !isThumb1 ||
+    (MBB.computeRegisterLiveness(TRI, ARM::CPSR, std::prev(MBBI), 15) ==
+     MachineBasicBlock::LQR_Dead);
+
+  bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
+
+  // Exception: If the base register is in the input reglist, Thumb1 LDM is
+  // non-writeback.
+  // It's also not possible to merge an STR of the base register in Thumb1.
+  if (isThumb1)
+    for (unsigned I = 0; I < NumRegs; ++I)
+      if (Base == Regs[I].first) {
+        if (Opcode == ARM::tLDRi) {
+          Writeback = false;
+          break;
+        } else if (Opcode == ARM::tSTRi) {
+          return false;
+        }
+      }
+
   ARM_AM::AMSubMode Mode = ARM_AM::ia;
   // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
   bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
@@ -445,6 +526,11 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
     if (NumRegs <= 2)
       return false;
 
+    // On Thumb1, it's not worth materializing a new base register without
+    // clobbering the CPSR (i.e. not using ADDS/SUBS).
+    if (!SafeToClobberCPSR)
+      return false;
+
     unsigned NewBase;
     if (isi32Load(Opcode)) {
       // If it is a load, then just use one of the destination register to
@@ -459,13 +545,15 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
 
     int BaseOpc =
       isThumb2 ? ARM::t2ADDri :
+      (isThumb1 && Offset < 8) ? ARM::tADDi3 :
       isThumb1 ? ARM::tADDi8  : ARM::ADDri;
 
     if (Offset < 0) {
+      Offset = - Offset;
       BaseOpc =
         isThumb2 ? ARM::t2SUBri :
+        (isThumb1 && Offset < 8) ? ARM::tSUBi3 :
         isThumb1 ? ARM::tSUBi8  : ARM::SUBri;
-      Offset = - Offset;
     }
 
     if (!TL->isLegalAddImmediate(Offset))
@@ -473,22 +561,28 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
       return false; // Probably not worth it then.
 
     if (isThumb1) {
-      if (Base != NewBase) {
+      // Thumb1: depending on immediate size, use either
+      //   ADDS NewBase, Base, #imm3
+      // or
+      //   MOV  NewBase, Base
+      //   ADDS NewBase, #imm8.
+      if (Base != NewBase && Offset >= 8) {
         // Need to insert a MOV to the new base first.
-        // FIXME: If the immediate fits in 3 bits, use ADD instead.
         BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase)
           .addReg(Base, getKillRegState(BaseKill))
           .addImm(Pred).addReg(PredReg);
+        // Set up BaseKill and Base correctly to insert the ADDS/SUBS below.
+        Base = NewBase;
+        BaseKill = false;
       }
-      AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase))
-        .addReg(NewBase, getKillRegState(true)).addImm(Offset)
+      AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true)
+        .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
         .addImm(Pred).addReg(PredReg);
     } else {
       BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
         .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
         .addImm(Pred).addReg(PredReg).addReg(0);
     }
-
     Base = NewBase;
     BaseKill = true; // New base is always killed straight away.
   }
@@ -501,16 +595,16 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
   Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
   if (!Opcode) return false;
 
-  bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
-
-  // Exception: If the base register is in the input reglist, Thumb1 LDM is
-  // non-writeback. Check for this.
-  if (Opcode == ARM::tLDMIA && isThumb1)
-    for (unsigned I = 0; I < NumRegs; ++I)
-      if (Base == Regs[I].first) {
-        Writeback = false;
-        break;
-      }
+  // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
+  // - There is no writeback (LDM of base register),
+  // - the base register is killed by the merged instruction,
+  // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
+  //   to reset the base register.
+  // Otherwise, don't merge.
+  // It's safe to return here since the code to materialize a new base register
+  // above is also conditional on SafeToClobberCPSR.
+  if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
+    return false;
 
   MachineInstrBuilder MIB;
 
@@ -525,11 +619,11 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
     MIB.addReg(Base, getDefRegState(true))
        .addReg(Base, getKillRegState(BaseKill));
 
-    // The base isn't dead after a merged instruction with writeback. Update
-    // future uses of the base with the added offset (if possible), or reset
-    // the base register as necessary.
+    // The base isn't dead after a merged instruction with writeback.
+    // Insert a sub instruction after the newly formed instruction to reset.
     if (!BaseKill)
       UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg);
+
   } else {
     // No writeback, simply build the MachineInstr.
     MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
@@ -700,6 +794,11 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
     memOps[i].MBBI = Merges.back();
     memOps[i].Position = insertPos;
   }
+
+  // Update memOps offsets, since they may have been modified by MergeOps.
+  for (auto &MemOp : memOps) {
+    MemOp.Offset = getMemoryOpOffset(MemOp.MBBI);
+  }
 }
 
 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
@@ -721,7 +820,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
   unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
   unsigned Count = 1;
   unsigned Limit = ~0U;
-
+  bool BaseKill = false;
   // vldm / vstm limit are 32 for S variants, 16 for D variants.
 
   switch (Opcode) {
@@ -760,36 +859,28 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
       ++Count;
     } else {
       // Can't merge this in. Try merge the earlier ones first.
-      MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
-                     Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
+      // We need to compute BaseKill here because the MemOps may have been
+      // reordered.
+      BaseKill = Loc->killsRegister(Base);
+
+      MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, Base,
+                     BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
       MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
                    MemOps, Merges);
       return;
     }
 
-    if (MemOps[i].Position > MemOps[insertAfter].Position)
+    if (MemOps[i].Position > MemOps[insertAfter].Position) {
       insertAfter = i;
+      Loc = MemOps[i].MBBI;
+    }
   }
 
-  bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
+  BaseKill =  Loc->killsRegister(Base);
   MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
                  Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
 }
 
-static bool definesCPSR(MachineInstr *MI) {
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = MI->getOperand(i);
-    if (!MO.isReg())
-      continue;
-    if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
-      // If the instruction has live CPSR def, then it's not safe to fold it
-      // into load / store.
-      return true;
-  }
-
-  return false;
-}
-
 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
                                 unsigned Bytes, unsigned Limit,
                                 ARMCC::CondCodes Pred, unsigned PredReg) {
@@ -1327,34 +1418,6 @@ void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
     RS->forward(std::prev(Loc));
 }
 
-static int getMemoryOpOffset(const MachineInstr *MI) {
-  int Opcode = MI->getOpcode();
-  bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
-  unsigned NumOperands = MI->getDesc().getNumOperands();
-  unsigned OffField = MI->getOperand(NumOperands-3).getImm();
-
-  if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
-      Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
-      Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
-      Opcode == ARM::LDRi12   || Opcode == ARM::STRi12)
-    return OffField;
-
-  // Thumb1 immediate offsets are scaled by 4
-  if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi)
-    return OffField * 4;
-
-  int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
-    : ARM_AM::getAM5Offset(OffField) * 4;
-  if (isAM3) {
-    if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
-      Offset = -Offset;
-  } else {
-    if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
-      Offset = -Offset;
-  }
-  return Offset;
-}
-
 static void InsertLDR_STR(MachineBasicBlock &MBB,
                           MachineBasicBlock::iterator &MBBI,
                           int Offset, bool isDef,
@@ -1725,21 +1788,15 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
 
 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
   const TargetMachine &TM = Fn.getTarget();
-  TL = TM.getTargetLowering();
+  TL = TM.getSubtargetImpl()->getTargetLowering();
   AFI = Fn.getInfo<ARMFunctionInfo>();
-  TII = TM.getInstrInfo();
-  TRI = TM.getRegisterInfo();
+  TII = TM.getSubtargetImpl()->getInstrInfo();
+  TRI = TM.getSubtargetImpl()->getRegisterInfo();
   STI = &TM.getSubtarget<ARMSubtarget>();
   RS = new RegScavenger();
   isThumb2 = AFI->isThumb2Function();
   isThumb1 = AFI->isThumbFunction() && !isThumb2;
 
-  // FIXME: Temporarily disabling for Thumb-1 due to miscompiles
-  if (isThumb1) {
-    delete RS;
-    return false;
-  }
-
   bool Modified = false;
   for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
        ++MFI) {
@@ -1793,10 +1850,10 @@ namespace {
 }
 
 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
-  TD  = Fn.getTarget().getDataLayout();
-  TII = Fn.getTarget().getInstrInfo();
-  TRI = Fn.getTarget().getRegisterInfo();
-  STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
+  TD = Fn.getSubtarget().getDataLayout();
+  TII = Fn.getSubtarget().getInstrInfo();
+  TRI = Fn.getSubtarget().getRegisterInfo();
+  STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
   MRI = &Fn.getRegInfo();
   MF  = &Fn;
 
@@ -1811,7 +1868,7 @@ bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
                                       MachineBasicBlock::iterator I,
                                       MachineBasicBlock::iterator E,
-                                      SmallPtrSet<MachineInstr*, 4> &MemOps,
+                                      SmallPtrSetImpl<MachineInstr*> &MemOps,
                                       SmallSet<unsigned, 4> &MemRegs,
                                       const TargetRegisterInfo *TRI) {
   // Are there stores / loads / calls between them?
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index 44a9e34..4e67fa1 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -11,14 +11,15 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef ARMMACHINEFUNCTIONINFO_H
-#define ARMMACHINEFUNCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMMACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_ARM_ARMMACHINEFUNCTIONINFO_H
 
 #include "ARMSubtarget.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/DenseMap.h"
 
 namespace llvm {
 
@@ -47,6 +48,9 @@ class ARMFunctionInfo : public MachineFunctionInfo {
   ///
   unsigned ArgRegsSaveSize;
 
+  /// ReturnRegsCount - Number of registers used up in the return.
+  unsigned ReturnRegsCount;
+
   /// HasStackFrame - True if this function has a stack frame. Set by
   /// processFunctionBeforeCalleeSavedScan().
   bool HasStackFrame;
@@ -82,6 +86,7 @@ class ARMFunctionInfo : public MachineFunctionInfo {
   /// areas.
   unsigned GPRCS1Size;
   unsigned GPRCS2Size;
+  unsigned DPRCSAlignGapSize;
   unsigned DPRCSSize;
 
   /// NumAlignedDPRCS2Regs - The number of callee-saved DPRs that are saved in
@@ -118,14 +123,19 @@ class ARMFunctionInfo : public MachineFunctionInfo {
   /// being passed on the stack
   unsigned ArgumentStackSize;
 
+  /// CoalescedWeights - mapping of basic blocks to the rolling counter of
+  /// coalesced weights.
+  DenseMap<const MachineBasicBlock*, unsigned> CoalescedWeights;
+
 public:
   ARMFunctionInfo() :
     isThumb(false),
     hasThumb2(false),
-    ArgRegsSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
+    ArgRegsSaveSize(0), ReturnRegsCount(0), HasStackFrame(false),
+    RestoreSPFromFP(false),
     LRSpilledForFarJump(false),
     FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
-    GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
+    GPRCS1Size(0), GPRCS2Size(0), DPRCSAlignGapSize(0), DPRCSSize(0),
     NumAlignedDPRCS2Regs(0),
     JumpTableUId(0), PICLabelUId(0),
     VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {}
@@ -146,6 +156,9 @@ public:
   }
   void setArgRegsSaveSize(unsigned s) { ArgRegsSaveSize = s; }
 
+  unsigned getReturnRegsCount() const { return ReturnRegsCount; }
+  void setReturnRegsCount(unsigned s) { ReturnRegsCount = s; }
+
   bool hasStackFrame() const { return HasStackFrame; }
   void setHasStackFrame(bool s) { HasStackFrame = s; }
 
@@ -171,10 +184,12 @@ public:
 
   unsigned getGPRCalleeSavedArea1Size() const { return GPRCS1Size; }
   unsigned getGPRCalleeSavedArea2Size() const { return GPRCS2Size; }
+  unsigned getDPRCalleeSavedGapSize() const   { return DPRCSAlignGapSize; }
   unsigned getDPRCalleeSavedAreaSize()  const { return DPRCSSize; }
 
   void setGPRCalleeSavedArea1Size(unsigned s) { GPRCS1Size = s; }
   void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; }
+  void setDPRCalleeSavedGapSize(unsigned s)   { DPRCSAlignGapSize = s; }
   void setDPRCalleeSavedAreaSize(unsigned s)  { DPRCSSize = s; }
 
   unsigned getArgumentStackSize() const { return ArgumentStackSize; }
@@ -221,7 +236,16 @@ public:
     else
       return -1U;
   }
+
+  DenseMap<const MachineBasicBlock*, unsigned>::iterator getCoalescedWeight(
+                                                  MachineBasicBlock* MBB) {
+    auto It = CoalescedWeights.find(MBB);
+    if (It == CoalescedWeights.end()) {
+      It = CoalescedWeights.insert(std::make_pair(MBB, 0)).first;
+    }
+    return It;
+  }
 };
 } // End llvm namespace
 
-#endif // ARMMACHINEFUNCTIONINFO_H
+#endif
diff --git a/lib/Target/ARM/ARMPerfectShuffle.h b/lib/Target/ARM/ARMPerfectShuffle.h
index efa22fb..3ff0bee 100644
--- a/lib/Target/ARM/ARMPerfectShuffle.h
+++ b/lib/Target/ARM/ARMPerfectShuffle.h
@@ -12,6 +12,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#ifndef LLVM_LIB_TARGET_ARM_ARMPERFECTSHUFFLE_H
+#define LLVM_LIB_TARGET_ARM_ARMPERFECTSHUFFLE_H
+
 // 31 entries have cost 0
 // 242 entries have cost 1
 // 1447 entries have cost 2
@@ -6584,3 +6587,5 @@ static const unsigned PerfectShuffleTable[6561+1] = {
   835584U, // <u,u,u,u>: Cost 0 copy LHS
   0
 };
+
+#endif
diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h
index 3e6af3f..b623173 100644
--- a/lib/Target/ARM/ARMRegisterInfo.h
+++ b/lib/Target/ARM/ARMRegisterInfo.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef ARMREGISTERINFO_H
-#define ARMREGISTERINFO_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMREGISTERINFO_H
+#define LLVM_LIB_TARGET_ARM_ARMREGISTERINFO_H
 
 #include "ARMBaseRegisterInfo.h"
 
diff --git a/lib/Target/ARM/ARMRelocations.h b/lib/Target/ARM/ARMRelocations.h
deleted file mode 100644
index 21877fd..0000000
--- a/lib/Target/ARM/ARMRelocations.h
+++ /dev/null
@@ -1,62 +0,0 @@
-//===-- ARMRelocations.h - ARM Code Relocations -----------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the ARM target-specific relocation types.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ARMRELOCATIONS_H
-#define ARMRELOCATIONS_H
-
-#include "llvm/CodeGen/MachineRelocation.h"
-
-namespace llvm {
-  namespace ARM {
-    enum RelocationType {
-      // reloc_arm_absolute - Absolute relocation, just add the relocated value
-      // to the value already in memory.
-      reloc_arm_absolute,
-
-      // reloc_arm_relative - PC relative relocation, add the relocated value to
-      // the value already in memory, after we adjust it for where the PC is.
-      reloc_arm_relative,
-
-      // reloc_arm_cp_entry - PC relative relocation for constpool_entry's whose
-      // addresses are kept locally in a map.
-      reloc_arm_cp_entry,
-
-      // reloc_arm_vfp_cp_entry - Same as reloc_arm_cp_entry except the offset
-      // should be divided by 4.
-      reloc_arm_vfp_cp_entry,
-
-      // reloc_arm_machine_cp_entry - Relocation of a ARM machine constantpool
-      // entry.
-      reloc_arm_machine_cp_entry,
-
-      // reloc_arm_jt_base - PC relative relocation for jump tables whose
-      // addresses are kept locally in a map.
-      reloc_arm_jt_base,
-
-      // reloc_arm_pic_jt - PIC jump table entry relocation: dest bb - jt base.
-      reloc_arm_pic_jt,
-
-      // reloc_arm_branch - Branch address relocation.
-      reloc_arm_branch,
-
-      // reloc_arm_movt  - MOVT immediate relocation.
-      reloc_arm_movt,
-
-      // reloc_arm_movw  - MOVW immediate relocation.
-      reloc_arm_movw
-    };
-  }
-}
-
-#endif
-
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index 3dcc0df..fa30ac3 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -157,7 +157,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
     return SDValue();
 
   const ARMTargetLowering &TLI =
-    *static_cast<const ARMTargetLowering*>(DAG.getTarget().getTargetLowering());
+      *DAG.getTarget().getSubtarget<ARMSubtarget>().getTargetLowering();
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
 
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.h b/lib/Target/ARM/ARMSelectionDAGInfo.h
index 13769dc..94b98e6 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.h
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef ARMSELECTIONDAGINFO_H
-#define ARMSELECTIONDAGINFO_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMSELECTIONDAGINFO_H
+#define LLVM_LIB_TARGET_ARM_ARMSELECTIONDAGINFO_H
 
 #include "MCTargetDesc/ARMAddressingModes.h"
 #include "llvm/Target/TargetSelectionDAGInfo.h"
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 0eb24ef..600f39d 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -15,9 +15,9 @@
 #include "ARMFrameLowering.h"
 #include "ARMISelLowering.h"
 #include "ARMInstrInfo.h"
-#include "ARMJITInfo.h"
 #include "ARMSelectionDAGInfo.h"
 #include "ARMSubtarget.h"
+#include "ARMMachineFunctionInfo.h"
 #include "Thumb1FrameLowering.h"
 #include "Thumb1InstrInfo.h"
 #include "Thumb2InstrInfo.h"
@@ -27,6 +27,8 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 
 using namespace llvm;
 
@@ -47,11 +49,13 @@ static cl::opt<bool>
 UseFusedMulOps("arm-use-mulops",
                cl::init(true), cl::Hidden);
 
+namespace {
 enum AlignMode {
   DefaultAlign,
   StrictAlign,
   NoStrictAlign
 };
+}
 
 static cl::opt<AlignMode>
 Align(cl::desc("Load/store alignment support"),
@@ -98,11 +102,6 @@ static std::string computeDataLayout(ARMSubtarget &ST) {
   // Pointers are 32 bits and aligned to 32 bits.
   Ret += "-p:32:32";
 
-  // On thumb, i16,i18 and i1 have natural aligment requirements, but we try to
-  // align to 32.
-  if (ST.isThumb())
-    Ret += "-i1:8:32-i8:8:32-i16:16:32";
-
   // ABIs other than APCS have 64 bit integers with natural alignment.
   if (!ST.isAPCS_ABI())
     Ret += "-i64:64";
@@ -119,10 +118,9 @@ static std::string computeDataLayout(ARMSubtarget &ST) {
   else
     Ret += "-v128:64:128";
 
-  // On thumb and APCS, only try to align aggregates to 32 bits (the default is
-  // 64 bits).
-  if (ST.isThumb() || ST.isAPCS_ABI())
-    Ret += "-a:0:32";
+  // Try to align aggregates to 32 bits (the default is 64 bits, which has no
+  // particular hardware support on 32-bit ARM).
+  Ret += "-a:0:32";
 
   // Integer registers are 32 bits.
   Ret += "-n32";
@@ -144,18 +142,18 @@ static std::string computeDataLayout(ARMSubtarget &ST) {
 ARMSubtarget &ARMSubtarget::initializeSubtargetDependencies(StringRef CPU,
                                                             StringRef FS) {
   initializeEnvironment();
-  resetSubtargetFeatures(CPU, FS);
+  initSubtargetFeatures(CPU, FS);
   return *this;
 }
 
 ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
-                           const std::string &FS, TargetMachine &TM,
-                           bool IsLittle, const TargetOptions &Options)
+                           const std::string &FS, const TargetMachine &TM,
+                           bool IsLittle)
     : ARMGenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others),
       ARMProcClass(None), stackAlignment(4), CPUString(CPU), IsLittle(IsLittle),
-      TargetTriple(TT), Options(Options), TargetABI(ARM_ABI_UNKNOWN),
+      TargetTriple(TT), Options(TM.Options), TargetABI(ARM_ABI_UNKNOWN),
       DL(computeDataLayout(initializeSubtargetDependencies(CPU, FS))),
-      TSInfo(DL), JITInfo(),
+      TSInfo(DL),
       InstrInfo(isThumb1Only()
                     ? (ARMBaseInstrInfo *)new Thumb1InstrInfo(*this)
                     : !isThumb()
@@ -188,7 +186,6 @@ void ARMSubtarget::initializeEnvironment() {
   InThumbMode = false;
   HasThumb2 = false;
   NoARM = false;
-  PostRAScheduler = false;
   IsR9Reserved = ReserveR9;
   UseMovt = false;
   SupportsTailCall = false;
@@ -217,23 +214,7 @@ void ARMSubtarget::initializeEnvironment() {
   UseLong64 = false;
 }
 
-void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) {
-  AttributeSet FnAttrs = MF->getFunction()->getAttributes();
-  Attribute CPUAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex,
-                                           "target-cpu");
-  Attribute FSAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex,
-                                          "target-features");
-  std::string CPU =
-    !CPUAttr.hasAttribute(Attribute::None) ?CPUAttr.getValueAsString() : "";
-  std::string FS =
-    !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : "";
-  if (!FS.empty()) {
-    initializeEnvironment();
-    resetSubtargetFeatures(CPU, FS);
-  }
-}
-
-void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
+void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
   if (CPUString.empty()) {
     if (isTargetIOS() && TargetTriple.getArchName().endswith("v7s"))
       // Default to the Swift CPU when targeting armv7s/thumbv7s.
@@ -275,9 +256,8 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
       TargetABI = ARM_ABI_AAPCS;
       break;
     default:
-      if ((isTargetIOS() && isMClass()) ||
-          (TargetTriple.isOSBinFormatMachO() &&
-           TargetTriple.getOS() == Triple::UnknownOS))
+      if (TargetTriple.isOSBinFormatMachO() &&
+          TargetTriple.getOS() == Triple::UnknownOS)
         TargetABI = ARM_ABI_AAPCS;
       else
         TargetABI = ARM_ABI_APCS;
@@ -299,49 +279,39 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
   UseMovt = hasV6T2Ops() && ArmUseMOVT;
 
   if (isTargetMachO()) {
-    IsR9Reserved = ReserveR9 | !HasV6Ops;
+    IsR9Reserved = ReserveR9 || !HasV6Ops;
     SupportsTailCall = !isTargetIOS() || !getTargetTriple().isOSVersionLT(5, 0);
   } else {
     IsR9Reserved = ReserveR9;
     SupportsTailCall = !isThumb1Only();
   }
 
-  if (!isThumb() || hasThumb2())
-    PostRAScheduler = true;
-
-  switch (Align) {
-    case DefaultAlign:
-      // Assume pre-ARMv6 doesn't support unaligned accesses.
-      //
-      // ARMv6 may or may not support unaligned accesses depending on the
-      // SCTLR.U bit, which is architecture-specific. We assume ARMv6
-      // Darwin and NetBSD targets support unaligned accesses, and others don't.
-      //
-      // ARMv7 always has SCTLR.U set to 1, but it has a new SCTLR.A bit
-      // which raises an alignment fault on unaligned accesses. Linux
-      // defaults this bit to 0 and handles it as a system-wide (not
-      // per-process) setting. It is therefore safe to assume that ARMv7+
-      // Linux targets support unaligned accesses. The same goes for NaCl.
-      //
-      // The above behavior is consistent with GCC.
-      AllowsUnalignedMem =
-          (hasV7Ops() && (isTargetLinux() || isTargetNaCl() ||
-                          isTargetNetBSD())) ||
-          (hasV6Ops() && (isTargetMachO() || isTargetNetBSD()));
-      // The one exception is cortex-m0, which despite being v6, does not
-      // support unaligned accesses. Rather than make the above boolean
-      // expression even more obtuse, just override the value here.
-      if (isThumb1Only() && isMClass())
-        AllowsUnalignedMem = false;
-      break;
-    case StrictAlign:
-      AllowsUnalignedMem = false;
-      break;
-    case NoStrictAlign:
-      AllowsUnalignedMem = true;
-      break;
+  if (Align == DefaultAlign) {
+    // Assume pre-ARMv6 doesn't support unaligned accesses.
+    //
+    // ARMv6 may or may not support unaligned accesses depending on the
+    // SCTLR.U bit, which is architecture-specific. We assume ARMv6
+    // Darwin and NetBSD targets support unaligned accesses, and others don't.
+    //
+    // ARMv7 always has SCTLR.U set to 1, but it has a new SCTLR.A bit
+    // which raises an alignment fault on unaligned accesses. Linux
+    // defaults this bit to 0 and handles it as a system-wide (not
+    // per-process) setting. It is therefore safe to assume that ARMv7+
+    // Linux targets support unaligned accesses. The same goes for NaCl.
+    //
+    // The above behavior is consistent with GCC.
+    AllowsUnalignedMem =
+      (hasV7Ops() && (isTargetLinux() || isTargetNaCl() ||
+                      isTargetNetBSD())) ||
+      (hasV6Ops() && (isTargetMachO() || isTargetNetBSD()));
+  } else {
+    AllowsUnalignedMem = !(Align == StrictAlign);
   }
 
+  // No v6M core supports unaligned memory access (v6M ARM ARM A3.2)
+  if (isV6M())
+    AllowsUnalignedMem = false;
+
   switch (IT) {
   case DefaultIT:
     RestrictIT = hasV8Ops() ? true : false;
@@ -368,11 +338,7 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV,
   if (RelocM == Reloc::Static)
     return false;
 
-  // Materializable GVs (in JIT lazy compilation mode) do not require an extra
-  // load from stub.
-  bool isDecl = GV->hasAvailableExternallyLinkage();
-  if (GV->isDeclaration() && !GV->isMaterializable())
-    isDecl = true;
+  bool isDecl = GV->isDeclarationForLinker();
 
   if (!isTargetMachO()) {
     // Extra load is needed for all externally visible.
@@ -415,33 +381,22 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV,
 }
 
 unsigned ARMSubtarget::getMispredictionPenalty() const {
-  return SchedModel->MispredictPenalty;
+  return SchedModel.MispredictPenalty;
 }
 
 bool ARMSubtarget::hasSinCos() const {
-  return getTargetTriple().getOS() == Triple::IOS &&
-    !getTargetTriple().isOSVersionLT(7, 0);
+  return getTargetTriple().isiOS() && !getTargetTriple().isOSVersionLT(7, 0);
 }
 
-// Enable the PostMachineScheduler if the target selects it instead of
-// PostRAScheduler. Currently only available on the command line via
-// -misched-postra.
+// This overrides the PostRAScheduler bit in the SchedModel for any CPU.
 bool ARMSubtarget::enablePostMachineScheduler() const {
-  return PostRAScheduler;
+  return (!isThumb() || hasThumb2());
 }
 
-bool ARMSubtarget::enableAtomicExpandLoadLinked() const {
+bool ARMSubtarget::enableAtomicExpand() const {
   return hasAnyDataBarrier() && !isThumb1Only();
 }
 
-bool ARMSubtarget::enablePostRAScheduler(
-           CodeGenOpt::Level OptLevel,
-           TargetSubtargetInfo::AntiDepBreakMode& Mode,
-           RegClassVector& CriticalPathRCs) const {
-  Mode = TargetSubtargetInfo::ANTIDEP_NONE;
-  return PostRAScheduler && OptLevel >= CodeGenOpt::Default;
-}
-
 bool ARMSubtarget::useMovt(const MachineFunction &MF) const {
   // NOTE Windows on ARM needs to use mov.w/mov.t pairs to materialise 32-bit
   // immediates as it is inherently position independent, and may be out of
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 8f6c165..d5ee009 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -11,20 +11,18 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef ARMSUBTARGET_H
-#define ARMSUBTARGET_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMSUBTARGET_H
+#define LLVM_LIB_TARGET_ARM_ARMSUBTARGET_H
 
 
 #include "ARMFrameLowering.h"
 #include "ARMISelLowering.h"
 #include "ARMInstrInfo.h"
-#include "ARMJITInfo.h"
 #include "ARMSelectionDAGInfo.h"
 #include "ARMSubtarget.h"
 #include "Thumb1FrameLowering.h"
 #include "Thumb1InstrInfo.h"
 #include "Thumb2InstrInfo.h"
-#include "ARMJITInfo.h"
 #include "MCTargetDesc/ARMMCTargetDesc.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/IR/DataLayout.h"
@@ -44,7 +42,7 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
 protected:
   enum ARMProcFamilyEnum {
     Others, CortexA5, CortexA7, CortexA8, CortexA9, CortexA12, CortexA15,
-    CortexR5, Swift, CortexA53, CortexA57, Krait
+    CortexA17, CortexR5, Swift, CortexA53, CortexA57, Krait, 
   };
   enum ARMProcClassEnum {
     None, AClass, RClass, MClass
@@ -105,9 +103,6 @@ protected:
   /// NoARM - True if subtarget does not support ARM mode execution.
   bool NoARM;
 
-  /// PostRAScheduler - True if using post-register-allocation scheduler.
-  bool PostRAScheduler;
-
   /// IsR9Reserved - True if R9 is a not available as general purpose register.
   bool IsR9Reserved;
 
@@ -191,7 +186,7 @@ protected:
 
   /// AllowsUnalignedMem - If true, the subtarget allows unaligned memory
   /// accesses for some types.  For details, see
-  /// ARMTargetLowering::allowsUnalignedMemoryAccesses().
+  /// ARMTargetLowering::allowsMisalignedMemoryAccesses().
   bool AllowsUnalignedMem;
 
   /// RestrictIT - If true, the subtarget disallows generation of deprecated IT
@@ -225,7 +220,7 @@ protected:
   Triple TargetTriple;
 
   /// SchedModel - Processor specific instruction costs.
-  const MCSchedModel *SchedModel;
+  MCSchedModel SchedModel;
 
   /// Selected instruction itineraries (one entry per itinerary class.)
   InstrItineraryData InstrItins;
@@ -244,8 +239,7 @@ protected:
   /// of the specified triple.
   ///
   ARMSubtarget(const std::string &TT, const std::string &CPU,
-               const std::string &FS, TargetMachine &TM, bool IsLittle,
-               const TargetOptions &Options);
+               const std::string &FS, const TargetMachine &TM, bool IsLittle);
 
   /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
   /// that still makes it profitable to inline the call.
@@ -256,27 +250,30 @@ protected:
   /// subtarget options.  Definition of function is auto generated by tblgen.
   void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
 
-  /// \brief Reset the features for the ARM target.
-  void resetSubtargetFeatures(const MachineFunction *MF) override;
-
   /// initializeSubtargetDependencies - Initializes using a CPU and feature string
   /// so that we can use initializer lists for subtarget initialization.
   ARMSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
 
-  const DataLayout *getDataLayout() const { return &DL; }
-  const ARMSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
-  ARMJITInfo *getJITInfo() { return &JITInfo; }
-  const ARMBaseInstrInfo *getInstrInfo() const { return InstrInfo.get(); }
-  const ARMTargetLowering *getTargetLowering() const { return &TLInfo; }
-  const ARMFrameLowering *getFrameLowering() const { return FrameLowering.get(); }
-  const ARMBaseRegisterInfo *getRegisterInfo() const {
+  const DataLayout *getDataLayout() const override { return &DL; }
+  const ARMSelectionDAGInfo *getSelectionDAGInfo() const override {
+    return &TSInfo;
+  }
+  const ARMBaseInstrInfo *getInstrInfo() const override {
+    return InstrInfo.get();
+  }
+  const ARMTargetLowering *getTargetLowering() const override {
+    return &TLInfo;
+  }
+  const ARMFrameLowering *getFrameLowering() const override {
+    return FrameLowering.get();
+  }
+  const ARMBaseRegisterInfo *getRegisterInfo() const override {
     return &InstrInfo->getRegisterInfo();
   }
 
 private:
   const DataLayout DL;
   ARMSelectionDAGInfo TSInfo;
-  ARMJITInfo JITInfo;
   // Either Thumb1InstrInfo or Thumb2InstrInfo.
   std::unique_ptr<ARMBaseInstrInfo> InstrInfo;
   ARMTargetLowering   TLInfo;
@@ -284,7 +281,7 @@ private:
   std::unique_ptr<ARMFrameLowering> FrameLowering;
 
   void initializeEnvironment();
-  void resetSubtargetFeatures(StringRef CPU, StringRef FS);
+  void initSubtargetFeatures(StringRef CPU, StringRef FS);
 public:
   void computeIssueWidth();
 
@@ -411,6 +408,10 @@ public:
   bool isRClass() const { return ARMProcClass == RClass; }
   bool isAClass() const { return ARMProcClass == AClass; }
 
+  bool isV6M() const {
+    return isThumb1Only() && isMClass();
+  }
+
   bool isR9Reserved() const { return IsR9Reserved; }
 
   bool useMovt(const MachineFunction &MF) const;
@@ -432,19 +433,16 @@ public:
   bool hasSinCos() const;
 
   /// True for some subtargets at > -O0.
-  bool enablePostMachineScheduler() const;
-
-  /// enablePostRAScheduler - True at 'More' optimization.
-  bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
-                             TargetSubtargetInfo::AntiDepBreakMode& Mode,
-                             RegClassVector& CriticalPathRCs) const override;
+  bool enablePostMachineScheduler() const override;
 
-  // enableAtomicExpandLoadLinked - True if we need to expand our atomics.
-  bool enableAtomicExpandLoadLinked() const override;
+  // enableAtomicExpand- True if we need to expand our atomics.
+  bool enableAtomicExpand() const override;
 
-  /// getInstrItins - Return the instruction itineraies based on subtarget
+  /// getInstrItins - Return the instruction itineraries based on subtarget
   /// selection.
-  const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+  const InstrItineraryData *getInstrItineraryData() const override {
+    return &InstrItins;
+  }
 
   /// getStackAlignment - Returns the minimum alignment known to hold of the
   /// stack frame on entry to the function and which must be maintained by every
@@ -454,6 +452,7 @@ public:
   /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect
   /// symbol.
   bool GVIsIndirectSymbol(const GlobalValue *GV, Reloc::Model RelocM) const;
+
 };
 } // End llvm namespace
 
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index d85194b..88d6c5e 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -13,7 +13,9 @@
 #include "ARM.h"
 #include "ARMTargetMachine.h"
 #include "ARMFrameLowering.h"
+#include "ARMTargetObjectFile.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
@@ -42,6 +44,13 @@ extern "C" void LLVMInitializeARMTarget() {
   RegisterTargetMachine<ThumbBETargetMachine> B(TheThumbBETarget);
 }
 
+static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
+  if (TT.isOSBinFormatMachO())
+    return make_unique<TargetLoweringObjectFileMachO>();
+  if (TT.isOSWindows())
+    return make_unique<TargetLoweringObjectFileCOFF>();
+  return make_unique<ARMElfTargetObjectFile>();
+}
 
 /// TargetMachine ctor - Create an ARM architecture model.
 ///
@@ -51,7 +60,8 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT,
                                            Reloc::Model RM, CodeModel::Model CM,
                                            CodeGenOpt::Level OL, bool isLittle)
     : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
-      Subtarget(TT, CPU, FS, *this, isLittle, Options) {
+      TLOF(createTLOF(Triple(getTargetTriple()))),
+      Subtarget(TT, CPU, FS, *this, isLittle), isLittle(isLittle) {
 
   // Default to triple-appropriate float ABI
   if (Options.FloatABIType == FloatABI::Default)
@@ -59,6 +69,46 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT,
         Subtarget.isTargetHardFloat() ? FloatABI::Hard : FloatABI::Soft;
 }
 
+ARMBaseTargetMachine::~ARMBaseTargetMachine() {}
+
+const ARMSubtarget *
+ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const {
+  AttributeSet FnAttrs = F.getAttributes();
+  Attribute CPUAttr =
+      FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-cpu");
+  Attribute FSAttr =
+      FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-features");
+
+  std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
+                        ? CPUAttr.getValueAsString().str()
+                        : TargetCPU;
+  std::string FS = !FSAttr.hasAttribute(Attribute::None)
+                       ? FSAttr.getValueAsString().str()
+                       : TargetFS;
+
+  // FIXME: This is related to the code below to reset the target options,
+  // we need to know whether or not the soft float flag is set on the
+  // function before we can generate a subtarget. We also need to use
+  // it as a key for the subtarget since that can be the only difference
+  // between two functions.
+  Attribute SFAttr =
+      FnAttrs.getAttribute(AttributeSet::FunctionIndex, "use-soft-float");
+  bool SoftFloat = !SFAttr.hasAttribute(Attribute::None)
+                       ? SFAttr.getValueAsString() == "true"
+                       : Options.UseSoftFloat;
+
+  auto &I = SubtargetMap[CPU + FS + (SoftFloat ? "use-soft-float=true"
+                                               : "use-soft-float=false")];
+  if (!I) {
+    // This needs to be done before we create a new subtarget since any
+    // creation will depend on the TM and the code generation flags on the
+    // function that reside in TargetOptions.
+    resetTargetOptions(F);
+    I = llvm::make_unique<ARMSubtarget>(TargetTriple, CPU, FS, *this, isLittle);
+  }
+  return I.get();
+}
+
 void ARMBaseTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
   // Add first the target-independent BasicTTI pass, then our ARM pass. This
   // allows the ARM pass to delegate to the target independent layer when
@@ -158,7 +208,10 @@ TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) {
 }
 
 void ARMPassConfig::addIRPasses() {
-  addPass(createAtomicExpandLoadLinkedPass(TM));
+  if (TM->Options.ThreadModel == ThreadModel::Single)
+    addPass(createLowerAtomicPass());
+  else
+    addPass(createAtomicExpandPass(TM));
 
   // Cmpxchg instructions are often used with a subsequent comparison to
   // determine whether it succeeded. We can exploit existing control-flow in
@@ -244,10 +297,3 @@ bool ARMPassConfig::addPreEmitPass() {
 
   return true;
 }
-
-bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM,
-                                          JITCodeEmitter &JCE) {
-  // Machine code emitter pass for ARM.
-  PM.add(createARMJITCodeEmitterPass(*this, JCE));
-  return false;
-}
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index b72b1df..fba0ec2 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef ARMTARGETMACHINE_H
-#define ARMTARGETMACHINE_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMTARGETMACHINE_H
+#define LLVM_LIB_TARGET_ARM_ARMTARGETMACHINE_H
 
 #include "ARMInstrInfo.h"
 #include "ARMSubtarget.h"
@@ -23,7 +23,11 @@ namespace llvm {
 
 class ARMBaseTargetMachine : public LLVMTargetMachine {
 protected:
+  std::unique_ptr<TargetLoweringObjectFile> TLOF;
   ARMSubtarget        Subtarget;
+  bool isLittle;
+  mutable StringMap<std::unique_ptr<ARMSubtarget>> SubtargetMap;
+
 public:
   ARMBaseTargetMachine(const Target &T, StringRef TT,
                        StringRef CPU, StringRef FS,
@@ -31,30 +35,10 @@ public:
                        Reloc::Model RM, CodeModel::Model CM,
                        CodeGenOpt::Level OL,
                        bool isLittle);
+  ~ARMBaseTargetMachine() override;
 
   const ARMSubtarget *getSubtargetImpl() const override { return &Subtarget; }
-  const ARMBaseRegisterInfo *getRegisterInfo() const override {
-    return getSubtargetImpl()->getRegisterInfo();
-  }
-  const ARMTargetLowering *getTargetLowering() const override {
-    return getSubtargetImpl()->getTargetLowering();
-  }
-  const ARMSelectionDAGInfo *getSelectionDAGInfo() const override {
-    return getSubtargetImpl()->getSelectionDAGInfo();
-  }
-  const ARMBaseInstrInfo *getInstrInfo() const override {
-    return getSubtargetImpl()->getInstrInfo();
-  }
-  const ARMFrameLowering *getFrameLowering() const override {
-    return getSubtargetImpl()->getFrameLowering();
-  }
-  const InstrItineraryData *getInstrItineraryData() const override {
-    return &getSubtargetImpl()->getInstrItineraryData();
-  }
-  const DataLayout *getDataLayout() const override {
-    return getSubtargetImpl()->getDataLayout();
-  }
-  ARMJITInfo *getJITInfo() override { return Subtarget.getJITInfo(); }
+  const ARMSubtarget *getSubtargetImpl(const Function &F) const override;
 
   /// \brief Register ARM analysis passes with a pass manager.
   void addAnalysisPasses(PassManagerBase &PM) override;
@@ -62,7 +46,9 @@ public:
   // Pass Pipeline Configuration
   TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
 
-  bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &MCE) override;
+  TargetLoweringObjectFile *getObjFileLowering() const override {
+    return TLOF.get();
+  }
 };
 
 /// ARMTargetMachine - ARM target machine.
diff --git a/lib/Target/ARM/ARMTargetObjectFile.h b/lib/Target/ARM/ARMTargetObjectFile.h
index c926421..98e8763 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.h
+++ b/lib/Target/ARM/ARMTargetObjectFile.h
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TARGET_ARM_TARGETOBJECTFILE_H
-#define LLVM_TARGET_ARM_TARGETOBJECTFILE_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMTARGETOBJECTFILE_H
+#define LLVM_LIB_TARGET_ARM_ARMTARGETOBJECTFILE_H
 
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index a2ace62..ec834e8 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -49,7 +49,7 @@ public:
 
   ARMTTI(const ARMBaseTargetMachine *TM)
       : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
-        TLI(TM->getTargetLowering()) {
+        TLI(TM->getSubtargetImpl()->getTargetLowering()) {
     initializeARMTTIPass(*PassRegistry::getPassRegistry());
   }
 
@@ -104,7 +104,7 @@ public:
     return 32;
   }
 
-  unsigned getMaximumUnrollFactor() const override {
+  unsigned getMaxInterleaveFactor() const override {
     // These are out of order CPUs:
     if (ST->isCortexA15() || ST->isSwift())
       return 2;
@@ -126,10 +126,11 @@ public:
   unsigned getAddressComputationCost(Type *Val,
                                      bool IsComplex) const override;
 
-  unsigned
-  getArithmeticInstrCost(unsigned Opcode, Type *Ty,
-                         OperandValueKind Op1Info = OK_AnyValue,
-                         OperandValueKind Op2Info = OK_AnyValue) const override;
+  unsigned getArithmeticInstrCost(
+      unsigned Opcode, Type *Ty, OperandValueKind Op1Info = OK_AnyValue,
+      OperandValueKind Op2Info = OK_AnyValue,
+      OperandValueProperties Opd1PropInfo = OP_None,
+      OperandValueProperties Opd2PropInfo = OP_None) const override;
 
   unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
                            unsigned AddressSpace) const override;
@@ -389,6 +390,13 @@ unsigned ARMTTI::getVectorInstrCost(unsigned Opcode, Type *ValTy,
       ValTy->getScalarSizeInBits() <= 32)
     return 3;
 
+  // Cross-class copies are expensive on many microarchitectures,
+  // so assume they are expensive by default.
+  if ((Opcode == Instruction::InsertElement ||
+       Opcode == Instruction::ExtractElement) &&
+      ValTy->getVectorElementType()->isIntegerTy())
+    return 3;
+
   return TargetTransformInfo::getVectorInstrCost(Opcode, ValTy, Index);
 }
 
@@ -497,9 +505,10 @@ unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
   return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
 }
 
-unsigned ARMTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
-                                        OperandValueKind Op1Info,
-                                        OperandValueKind Op2Info) const {
+unsigned ARMTTI::getArithmeticInstrCost(
+    unsigned Opcode, Type *Ty, OperandValueKind Op1Info,
+    OperandValueKind Op2Info, OperandValueProperties Opd1PropInfo,
+    OperandValueProperties Opd2PropInfo) const {
 
   int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
   std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
@@ -555,8 +564,8 @@ unsigned ARMTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
   if (Idx != -1)
     return LT.first * CostTbl[Idx].Cost;
 
-  unsigned Cost =
-      TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info);
+  unsigned Cost = TargetTransformInfo::getArithmeticInstrCost(
+      Opcode, Ty, Op1Info, Op2Info, Opd1PropInfo, Opd2PropInfo);
 
   // This is somewhat of a hack. The problem that we are facing is that SROA
   // creates a sequence of shift, and, or instructions to construct values.
diff --git a/lib/Target/ARM/Android.mk b/lib/Target/ARM/Android.mk
index 095955b..55a5775 100644
--- a/lib/Target/ARM/Android.mk
+++ b/lib/Target/ARM/Android.mk
@@ -19,7 +19,6 @@ arm_codegen_SRC_FILES := \
   ARMAsmPrinter.cpp \
   ARMBaseInstrInfo.cpp \
   ARMBaseRegisterInfo.cpp \
-  ARMCodeEmitter.cpp \
   ARMConstantIslandPass.cpp \
   ARMConstantPoolValue.cpp \
   ARMExpandPseudoInsts.cpp \
@@ -29,7 +28,6 @@ arm_codegen_SRC_FILES := \
   ARMISelDAGToDAG.cpp \
   ARMISelLowering.cpp \
   ARMInstrInfo.cpp \
-  ARMJITInfo.cpp \
   ARMLoadStoreOptimizer.cpp \
   ARMMCInstLower.cpp \
   ARMMachineFunctionInfo.cpp \
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index b62706c..9cc89bd 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -129,12 +129,13 @@ public:
 
 class ARMAsmParser : public MCTargetAsmParser {
   MCSubtargetInfo &STI;
-  MCAsmParser &Parser;
   const MCInstrInfo &MII;
   const MCRegisterInfo *MRI;
   UnwindContext UC;
 
   ARMTargetStreamer &getTargetStreamer() {
+    assert(getParser().getStreamer().getTargetStreamer() &&
+           "do not have a target streamer");
     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
     return static_cast<ARMTargetStreamer &>(TS);
   }
@@ -173,20 +174,16 @@ class ARMAsmParser : public MCTargetAsmParser {
       ITState.CurPosition = ~0U; // Done with the IT block after this.
   }
 
-
-  MCAsmParser &getParser() const { return Parser; }
-  MCAsmLexer &getLexer() const { return Parser.getLexer(); }
-
   void Note(SMLoc L, const Twine &Msg, ArrayRef<SMRange> Ranges = None) {
-    return Parser.Note(L, Msg, Ranges);
+    return getParser().Note(L, Msg, Ranges);
   }
   bool Warning(SMLoc L, const Twine &Msg,
                ArrayRef<SMRange> Ranges = None) {
-    return Parser.Warning(L, Msg, Ranges);
+    return getParser().Warning(L, Msg, Ranges);
   }
   bool Error(SMLoc L, const Twine &Msg,
              ArrayRef<SMRange> Ranges = None) {
-    return Parser.Error(L, Msg, Ranges);
+    return getParser().Error(L, Msg, Ranges);
   }
 
   int tryParseRegister();
@@ -265,9 +262,15 @@ class ARMAsmParser : public MCTargetAsmParser {
   bool hasARM() const {
     return !(STI.getFeatureBits() & ARM::FeatureNoARM);
   }
+  bool hasThumb2DSP() const {
+    return STI.getFeatureBits() & ARM::FeatureDSPThumb2;
+  }
+  bool hasD16() const {
+    return STI.getFeatureBits() & ARM::FeatureD16;
+  }
 
   void SwitchMode() {
-    unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(ARM::ModeThumb));
+    uint64_t FB = ComputeAvailableFeatures(STI.ToggleFeature(ARM::ModeThumb));
     setAvailableFeatures(FB);
   }
   bool isMClass() const {
@@ -290,6 +293,7 @@ class ARMAsmParser : public MCTargetAsmParser {
   OperandMatchResultTy parseInstSyncBarrierOptOperand(OperandVector &);
   OperandMatchResultTy parseProcIFlagsOperand(OperandVector &);
   OperandMatchResultTy parseMSRMaskOperand(OperandVector &);
+  OperandMatchResultTy parseBankedRegOperand(OperandVector &);
   OperandMatchResultTy parsePKHImm(OperandVector &O, StringRef Op, int Low,
                                    int High);
   OperandMatchResultTy parsePKHLSLImm(OperandVector &O) {
@@ -329,10 +333,9 @@ public:
 
   };
 
-  ARMAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
-               const MCInstrInfo &MII,
-               const MCTargetOptions &Options)
-      : MCTargetAsmParser(), STI(_STI), Parser(_Parser), MII(MII), UC(_Parser) {
+  ARMAsmParser(MCSubtargetInfo & _STI, MCAsmParser & _Parser,
+               const MCInstrInfo &MII, const MCTargetOptions &Options)
+      : MCTargetAsmParser(), STI(_STI), MII(MII), UC(_Parser) {
     MCAsmParserExtension::Initialize(_Parser);
 
     // Cache the MCRegisterInfo.
@@ -359,7 +362,7 @@ public:
 
   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                                OperandVector &Operands, MCStreamer &Out,
-                               unsigned &ErrorInfo,
+                               uint64_t &ErrorInfo,
                                bool MatchingInlineAsm) override;
   void onLabelParsed(MCSymbol *Symbol) override;
 };
@@ -383,6 +386,7 @@ class ARMOperand : public MCParsedAsmOperand {
     k_Memory,
     k_PostIndexRegister,
     k_MSRMask,
+    k_BankedReg,
     k_ProcIFlags,
     k_VectorIndex,
     k_Register,
@@ -435,6 +439,10 @@ class ARMOperand : public MCParsedAsmOperand {
     unsigned Val;
   };
 
+  struct BankedRegOp {
+    unsigned Val;
+  };
+
   struct TokOp {
     const char *Data;
     unsigned Length;
@@ -517,6 +525,7 @@ class ARMOperand : public MCParsedAsmOperand {
     struct ITMaskOp ITMask;
     struct IFlagsOp IFlags;
     struct MMaskOp MMask;
+    struct BankedRegOp BankedReg;
     struct TokOp Tok;
     struct RegOp Reg;
     struct VectorListOp VectorList;
@@ -585,6 +594,9 @@ public:
     case k_MSRMask:
       MMask = o.MMask;
       break;
+    case k_BankedReg:
+      BankedReg = o.BankedReg;
+      break;
     case k_ProcIFlags:
       IFlags = o.IFlags;
       break;
@@ -679,6 +691,11 @@ public:
     return MMask.Val;
   }
 
+  unsigned getBankedReg() const {
+    assert(Kind == k_BankedReg && "Invalid access!");
+    return BankedReg.Val;
+  }
+
   bool isCoprocNum() const { return Kind == k_CoprocNum; }
   bool isCoprocReg() const { return Kind == k_CoprocReg; }
   bool isCoprocOption() const { return Kind == k_CoprocOption; }
@@ -1384,6 +1401,7 @@ public:
   }
 
   bool isMSRMask() const { return Kind == k_MSRMask; }
+  bool isBankedReg() const { return Kind == k_BankedReg; }
   bool isProcIFlags() const { return Kind == k_ProcIFlags; }
 
   // NEON operands.
@@ -1601,9 +1619,18 @@ public:
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
     // Must be a constant.
     if (!CE) return false;
-    int64_t Value = CE->getValue();
-    // i16 value in the range [0,255] or [0x0100, 0xff00]
-    return (Value >= 0 && Value < 256) || (Value >= 0x0100 && Value <= 0xff00);
+    unsigned Value = CE->getValue();
+    return ARM_AM::isNEONi16splat(Value);
+  }
+
+  bool isNEONi16splatNot() const {
+    if (!isImm())
+      return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    // Must be a constant.
+    if (!CE) return false;
+    unsigned Value = CE->getValue();
+    return ARM_AM::isNEONi16splat(~Value & 0xffff);
   }
 
   bool isNEONi32splat() const {
@@ -1614,12 +1641,18 @@ public:
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
     // Must be a constant.
     if (!CE) return false;
-    int64_t Value = CE->getValue();
-    // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X.
-    return (Value >= 0 && Value < 256) ||
-      (Value >= 0x0100 && Value <= 0xff00) ||
-      (Value >= 0x010000 && Value <= 0xff0000) ||
-      (Value >= 0x01000000 && Value <= 0xff000000);
+    unsigned Value = CE->getValue();
+    return ARM_AM::isNEONi32splat(Value);
+  }
+
+  bool isNEONi32splatNot() const {
+    if (!isImm())
+      return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    // Must be a constant.
+    if (!CE) return false;
+    unsigned Value = CE->getValue();
+    return ARM_AM::isNEONi32splat(~Value);
   }
 
   bool isNEONByteReplicate(unsigned NumBytes) const {
@@ -1655,6 +1688,7 @@ public:
     int64_t Value = CE->getValue();
     // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X,
     // for VMOV/VMVN only, 00Xf or 0Xff are also accepted.
+    // FIXME: This is probably wrong and a copy and paste from previous example
     return (Value >= 0 && Value < 256) ||
       (Value >= 0x0100 && Value <= 0xff00) ||
       (Value >= 0x010000 && Value <= 0xff0000) ||
@@ -1670,6 +1704,7 @@ public:
     int64_t Value = ~CE->getValue();
     // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X,
     // for VMOV/VMVN only, 00Xf or 0Xff are also accepted.
+    // FIXME: This is probably wrong and a copy and paste from previous example
     return (Value >= 0 && Value < 256) ||
       (Value >= 0x0100 && Value <= 0xff00) ||
       (Value >= 0x010000 && Value <= 0xff0000) ||
@@ -2334,6 +2369,11 @@ public:
     Inst.addOperand(MCOperand::CreateImm(unsigned(getMSRMask())));
   }
 
+  void addBankedRegOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateImm(unsigned(getBankedReg())));
+  }
+
   void addProcIFlagsOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     Inst.addOperand(MCOperand::CreateImm(unsigned(getProcIFlags())));
@@ -2378,10 +2418,16 @@ public:
     // The immediate encodes the type of constant as well as the value.
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
     unsigned Value = CE->getValue();
-    if (Value >= 256)
-      Value = (Value >> 8) | 0xa00;
-    else
-      Value |= 0x800;
+    Value = ARM_AM::encodeNEONi16splat(Value);
+    Inst.addOperand(MCOperand::CreateImm(Value));
+  }
+
+  void addNEONi16splatNotOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    // The immediate encodes the type of constant as well as the value.
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    unsigned Value = CE->getValue();
+    Value = ARM_AM::encodeNEONi16splat(~Value & 0xffff);
     Inst.addOperand(MCOperand::CreateImm(Value));
   }
 
@@ -2390,12 +2436,16 @@ public:
     // The immediate encodes the type of constant as well as the value.
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
     unsigned Value = CE->getValue();
-    if (Value >= 256 && Value <= 0xff00)
-      Value = (Value >> 8) | 0x200;
-    else if (Value > 0xffff && Value <= 0xff0000)
-      Value = (Value >> 16) | 0x400;
-    else if (Value > 0xffffff)
-      Value = (Value >> 24) | 0x600;
+    Value = ARM_AM::encodeNEONi32splat(Value);
+    Inst.addOperand(MCOperand::CreateImm(Value));
+  }
+
+  void addNEONi32splatNotOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    // The immediate encodes the type of constant as well as the value.
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    unsigned Value = CE->getValue();
+    Value = ARM_AM::encodeNEONi32splat(~Value);
     Inst.addOperand(MCOperand::CreateImm(Value));
   }
 
@@ -2736,6 +2786,14 @@ public:
     Op->EndLoc = S;
     return Op;
   }
+
+  static std::unique_ptr<ARMOperand> CreateBankedReg(unsigned Reg, SMLoc S) {
+    auto Op = make_unique<ARMOperand>(k_BankedReg);
+    Op->BankedReg.Val = Reg;
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    return Op;
+  }
 };
 
 } // end anonymous namespace.
@@ -2769,6 +2827,9 @@ void ARMOperand::print(raw_ostream &OS) const {
   case k_MSRMask:
     OS << "<mask: " << getMSRMask() << ">";
     break;
+  case k_BankedReg:
+    OS << "<banked reg: " << getBankedReg() << ">";
+    break;
   case k_Immediate:
     getImm()->print(OS);
     break;
@@ -2871,8 +2932,9 @@ static unsigned MatchRegisterName(StringRef Name);
 
 bool ARMAsmParser::ParseRegister(unsigned &RegNo,
                                  SMLoc &StartLoc, SMLoc &EndLoc) {
-  StartLoc = Parser.getTok().getLoc();
-  EndLoc = Parser.getTok().getEndLoc();
+  const AsmToken &Tok = getParser().getTok();
+  StartLoc = Tok.getLoc();
+  EndLoc = Tok.getEndLoc();
   RegNo = tryParseRegister();
 
   return (RegNo == (unsigned)-1);
@@ -2883,6 +2945,7 @@ bool ARMAsmParser::ParseRegister(unsigned &RegNo,
 /// returned.  Otherwise return -1.
 ///
 int ARMAsmParser::tryParseRegister() {
+  MCAsmParser &Parser = getParser();
   const AsmToken &Tok = Parser.getTok();
   if (Tok.isNot(AsmToken::Identifier)) return -1;
 
@@ -2924,6 +2987,10 @@ int ARMAsmParser::tryParseRegister() {
     return Entry->getValue();
   }
 
+  // Some FPUs only have 16 D registers, so D16-D31 are invalid
+  if (hasD16() && RegNum >= ARM::D16 && RegNum <= ARM::D31)
+    return -1;
+
   Parser.Lex(); // Eat identifier token.
 
   return RegNum;
@@ -2935,6 +3002,7 @@ int ARMAsmParser::tryParseRegister() {
 // consumed in the process of trying to parse the shifter (i.e., when it is
 // indeed a shifter operand, but malformed).
 int ARMAsmParser::tryParseShiftRegister(OperandVector &Operands) {
+  MCAsmParser &Parser = getParser();
   SMLoc S = Parser.getTok().getLoc();
   const AsmToken &Tok = Parser.getTok();
   if (Tok.isNot(AsmToken::Identifier))
@@ -3037,6 +3105,7 @@ int ARMAsmParser::tryParseShiftRegister(OperandVector &Operands) {
 /// TODO this is likely to change to allow different register types and or to
 /// parse for a specific register type.
 bool ARMAsmParser::tryParseRegisterWithWriteBack(OperandVector &Operands) {
+  MCAsmParser &Parser = getParser();
   const AsmToken &RegTok = Parser.getTok();
   int RegNo = tryParseRegister();
   if (RegNo == -1)
@@ -3118,9 +3187,10 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) {
       return -1;
     switch (Name[1]) {
     default:  return -1;
-    // p10 and p11 are invalid for coproc instructions (reserved for FP/NEON)
-    case '0': return CoprocOp == 'p'? -1: 10;
-    case '1': return CoprocOp == 'p'? -1: 11;
+    // CP10 and CP11 are VFP/NEON and so vector instructions should be used.
+    // However, old cores (v5/v6) did use them in that way.
+    case '0': return 10;
+    case '1': return 11;
     case '2': return 12;
     case '3': return 13;
     case '4': return 14;
@@ -3132,6 +3202,7 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) {
 /// parseITCondCode - Try to parse a condition code for an IT instruction.
 ARMAsmParser::OperandMatchResultTy
 ARMAsmParser::parseITCondCode(OperandVector &Operands) {
+  MCAsmParser &Parser = getParser();
   SMLoc S = Parser.getTok().getLoc();
   const AsmToken &Tok = Parser.getTok();
   if (!Tok.is(AsmToken::Identifier))
@@ -3169,6 +3240,7 @@ ARMAsmParser::parseITCondCode(OperandVector &Operands) {
 /// number, the token is eaten and the operand is added to the operand list.
 ARMAsmParser::OperandMatchResultTy
 ARMAsmParser::parseCoprocNumOperand(OperandVector &Operands) {
+  MCAsmParser &Parser = getParser();
   SMLoc S = Parser.getTok().getLoc();
   const AsmToken &Tok = Parser.getTok();
   if (Tok.isNot(AsmToken::Identifier))
@@ -3177,6 +3249,9 @@ ARMAsmParser::parseCoprocNumOperand(OperandVector &Operands) {
   int Num = MatchCoprocessorOperandName(Tok.getString(), 'p');
   if (Num == -1)
     return MatchOperand_NoMatch;
+  // ARMv7 and v8 don't allow cp10/cp11 due to VFP/NEON specific instructions
+  if ((hasV7Ops() || hasV8Ops()) && (Num == 10 || Num == 11))
+    return MatchOperand_NoMatch;
 
   Parser.Lex(); // Eat identifier token.
   Operands.push_back(ARMOperand::CreateCoprocNum(Num, S));
@@ -3188,6 +3263,7 @@ ARMAsmParser::parseCoprocNumOperand(OperandVector &Operands) {
 /// number, the token is eaten and the operand is added to the operand list.
 ARMAsmParser::OperandMatchResultTy
 ARMAsmParser::parseCoprocRegOperand(OperandVector &Operands) {
+  MCAsmParser &Parser = getParser();
   SMLoc S = Parser.getTok().getLoc();
   const AsmToken &Tok = Parser.getTok();
   if (Tok.isNot(AsmToken::Identifier))
@@ -3206,6 +3282,7 @@ ARMAsmParser::parseCoprocRegOperand(OperandVector &Operands) {
 /// coproc_option : '{' imm0_255 '}'
 ARMAsmParser::OperandMatchResultTy
 ARMAsmParser::parseCoprocOptionOperand(OperandVector &Operands) {
+  MCAsmParser &Parser = getParser();
   SMLoc S = Parser.getTok().getLoc();
 
   // If this isn't a '{', this isn't a coprocessor immediate operand.
@@ -3283,6 +3360,7 @@ static unsigned getDRegFromQReg(unsigned QReg) {
 
 /// Parse a register list.
 bool ARMAsmParser::parseRegisterList(OperandVector &Operands) {
+  MCAsmParser &Parser = getParser();
   assert(Parser.getTok().is(AsmToken::LCurly) &&
          "Token is not a Left Curly Brace");
   SMLoc S = Parser.getTok().getLoc();
@@ -3414,6 +3492,7 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands) {
 // Helper function to parse the lane index for vector lists.
 ARMAsmParser::OperandMatchResultTy ARMAsmParser::
 parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index, SMLoc &EndLoc) {
+  MCAsmParser &Parser = getParser();
   Index = 0; // Always return a defined index value.
   if (Parser.getTok().is(AsmToken::LBrac)) {
     Parser.Lex(); // Eat the '['.
@@ -3465,6 +3544,7 @@ parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index, SMLoc &EndLoc) {
 // parse a vector register list
 ARMAsmParser::OperandMatchResultTy
 ARMAsmParser::parseVectorList(OperandVector &Operands) {
+  MCAsmParser &Parser = getParser();
   VectorLaneTy LaneKind;
   unsigned LaneIndex;
   SMLoc S = Parser.getTok().getLoc();
@@ -3716,6 +3796,7 @@ ARMAsmParser::parseVectorList(OperandVector &Operands) {
 /// parseMemBarrierOptOperand - Try to parse DSB/DMB data barrier options.
 ARMAsmParser::OperandMatchResultTy
 ARMAsmParser::parseMemBarrierOptOperand(OperandVector &Operands) {
+  MCAsmParser &Parser = getParser();
   SMLoc S = Parser.getTok().getLoc();
   const AsmToken &Tok = Parser.getTok();
   unsigned Opt;
@@ -3787,6 +3868,7 @@ ARMAsmParser::parseMemBarrierOptOperand(OperandVector &Operands) {
 /// parseInstSyncBarrierOptOperand - Try to parse ISB inst sync barrier options.
 ARMAsmParser::OperandMatchResultTy
 ARMAsmParser::parseInstSyncBarrierOptOperand(OperandVector &Operands) {
+  MCAsmParser &Parser = getParser();
   SMLoc S = Parser.getTok().getLoc();
   const AsmToken &Tok = Parser.getTok();
   unsigned Opt;
@@ -3838,6 +3920,7 @@ ARMAsmParser::parseInstSyncBarrierOptOperand(OperandVector &Operands) {
 /// parseProcIFlagsOperand - Try to parse iflags from CPS instruction.
 ARMAsmParser::OperandMatchResultTy
 ARMAsmParser::parseProcIFlagsOperand(OperandVector &Operands) {
+  MCAsmParser &Parser = getParser();
   SMLoc S = Parser.getTok().getLoc();
   const AsmToken &Tok = Parser.getTok();
   if (!Tok.is(AsmToken::Identifier)) 
@@ -3872,6 +3955,7 @@ ARMAsmParser::parseProcIFlagsOperand(OperandVector &Operands) {
 /// parseMSRMaskOperand - Try to parse mask flags from MSR instruction.
 ARMAsmParser::OperandMatchResultTy
 ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) {
+  MCAsmParser &Parser = getParser();
   SMLoc S = Parser.getTok().getLoc();
   const AsmToken &Tok = Parser.getTok();
   if (!Tok.is(AsmToken::Identifier))
@@ -3892,9 +3976,6 @@ ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) {
       // should really only be allowed when writing a special register.  Note
       // they get dropped in the MRS instruction reading a special register as
       // the SYSm field is only 8 bits.
-      //
-      // FIXME: the _g and _nzcvqg versions are only allowed if the processor
-      // includes the DSP extension but that is not checked.
       .Case("apsr", 0x800)
       .Case("apsr_nzcvq", 0x800)
       .Case("apsr_g", 0x400)
@@ -3926,6 +4007,11 @@ ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) {
     if (FlagsVal == ~0U)
       return MatchOperand_NoMatch;
 
+    if (!hasThumb2DSP() && (FlagsVal & 0x400))
+      // The _g and _nzcvqg versions are only valid if the DSP extension is
+      // available.
+      return MatchOperand_NoMatch;
+
     if (!hasV7Ops() && FlagsVal >= 0x811 && FlagsVal <= 0x813)
       // basepri, basepri_max and faultmask only valid for V7m.
       return MatchOperand_NoMatch;
@@ -3998,9 +4084,67 @@ ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) {
   return MatchOperand_Success;
 }
 
+/// parseBankedRegOperand - Try to parse a banked register (e.g. "lr_irq") for
+/// use in the MRS/MSR instructions added to support virtualization.
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseBankedRegOperand(OperandVector &Operands) {
+  MCAsmParser &Parser = getParser();
+  SMLoc S = Parser.getTok().getLoc();
+  const AsmToken &Tok = Parser.getTok();
+  if (!Tok.is(AsmToken::Identifier))
+    return MatchOperand_NoMatch;
+  StringRef RegName = Tok.getString();
+
+  // The values here come from B9.2.3 of the ARM ARM, where bits 4-0 are SysM
+  // and bit 5 is R.
+  unsigned Encoding = StringSwitch<unsigned>(RegName.lower())
+                          .Case("r8_usr", 0x00)
+                          .Case("r9_usr", 0x01)
+                          .Case("r10_usr", 0x02)
+                          .Case("r11_usr", 0x03)
+                          .Case("r12_usr", 0x04)
+                          .Case("sp_usr", 0x05)
+                          .Case("lr_usr", 0x06)
+                          .Case("r8_fiq", 0x08)
+                          .Case("r9_fiq", 0x09)
+                          .Case("r10_fiq", 0x0a)
+                          .Case("r11_fiq", 0x0b)
+                          .Case("r12_fiq", 0x0c)
+                          .Case("sp_fiq", 0x0d)
+                          .Case("lr_fiq", 0x0e)
+                          .Case("lr_irq", 0x10)
+                          .Case("sp_irq", 0x11)
+                          .Case("lr_svc", 0x12)
+                          .Case("sp_svc", 0x13)
+                          .Case("lr_abt", 0x14)
+                          .Case("sp_abt", 0x15)
+                          .Case("lr_und", 0x16)
+                          .Case("sp_und", 0x17)
+                          .Case("lr_mon", 0x1c)
+                          .Case("sp_mon", 0x1d)
+                          .Case("elr_hyp", 0x1e)
+                          .Case("sp_hyp", 0x1f)
+                          .Case("spsr_fiq", 0x2e)
+                          .Case("spsr_irq", 0x30)
+                          .Case("spsr_svc", 0x32)
+                          .Case("spsr_abt", 0x34)
+                          .Case("spsr_und", 0x36)
+                          .Case("spsr_mon", 0x3c)
+                          .Case("spsr_hyp", 0x3e)
+                          .Default(~0U);
+
+  if (Encoding == ~0U)
+    return MatchOperand_NoMatch;
+
+  Parser.Lex(); // Eat identifier token.
+  Operands.push_back(ARMOperand::CreateBankedReg(Encoding, S));
+  return MatchOperand_Success;
+}
+
 ARMAsmParser::OperandMatchResultTy
 ARMAsmParser::parsePKHImm(OperandVector &Operands, StringRef Op, int Low,
                           int High) {
+  MCAsmParser &Parser = getParser();
   const AsmToken &Tok = Parser.getTok();
   if (Tok.isNot(AsmToken::Identifier)) {
     Error(Parser.getTok().getLoc(), Op + " operand expected.");
@@ -4048,6 +4192,7 @@ ARMAsmParser::parsePKHImm(OperandVector &Operands, StringRef Op, int Low,
 
 ARMAsmParser::OperandMatchResultTy
 ARMAsmParser::parseSetEndImm(OperandVector &Operands) {
+  MCAsmParser &Parser = getParser();
   const AsmToken &Tok = Parser.getTok();
   SMLoc S = Tok.getLoc();
   if (Tok.isNot(AsmToken::Identifier)) {
@@ -4077,6 +4222,7 @@ ARMAsmParser::parseSetEndImm(OperandVector &Operands) {
 ///             n == 32 encoded as n == 0.
 ARMAsmParser::OperandMatchResultTy
 ARMAsmParser::parseShifterImm(OperandVector &Operands) {
+  MCAsmParser &Parser = getParser();
   const AsmToken &Tok = Parser.getTok();
   SMLoc S = Tok.getLoc();
   if (Tok.isNot(AsmToken::Identifier)) {
@@ -4147,6 +4293,7 @@ ARMAsmParser::parseShifterImm(OperandVector &Operands) {
 ///     ror #n  'n' in {0, 8, 16, 24}
 ARMAsmParser::OperandMatchResultTy
 ARMAsmParser::parseRotImm(OperandVector &Operands) {
+  MCAsmParser &Parser = getParser();
   const AsmToken &Tok = Parser.getTok();
   SMLoc S = Tok.getLoc();
   if (Tok.isNot(AsmToken::Identifier))
@@ -4193,6 +4340,7 @@ ARMAsmParser::parseRotImm(OperandVector &Operands) {
 
 ARMAsmParser::OperandMatchResultTy
 ARMAsmParser::parseBitfield(OperandVector &Operands) {
+  MCAsmParser &Parser = getParser();
   SMLoc S = Parser.getTok().getLoc();
   // The bitfield descriptor is really two operands, the LSB and the width.
   if (Parser.getTok().isNot(AsmToken::Hash) &&
@@ -4269,6 +4417,7 @@ ARMAsmParser::parsePostIdxReg(OperandVector &Operands) {
   // This method must return MatchOperand_NoMatch without consuming any tokens
   // in the case where there is no match, as other alternatives take other
   // parse methods.
+  MCAsmParser &Parser = getParser();
   AsmToken Tok = Parser.getTok();
   SMLoc S = Tok.getLoc();
   bool haveEaten = false;
@@ -4321,6 +4470,7 @@ ARMAsmParser::parseAM3Offset(OperandVector &Operands) {
   // This method must return MatchOperand_NoMatch without consuming any tokens
   // in the case where there is no match, as other alternatives take other
   // parse methods.
+  MCAsmParser &Parser = getParser();
   AsmToken Tok = Parser.getTok();
   SMLoc S = Tok.getLoc();
 
@@ -4458,6 +4608,7 @@ void ARMAsmParser::cvtThumbBranches(MCInst &Inst,
 /// Parse an ARM memory expression, return false if successful else return true
 /// or an error.  The first token must be a '[' when called.
 bool ARMAsmParser::parseMemory(OperandVector &Operands) {
+  MCAsmParser &Parser = getParser();
   SMLoc S, E;
   assert(Parser.getTok().is(AsmToken::LBrac) &&
          "Token is not a Left Bracket");
@@ -4649,6 +4800,7 @@ bool ARMAsmParser::parseMemory(OperandVector &Operands) {
 /// return true if it parses a shift otherwise it returns false.
 bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St,
                                           unsigned &Amount) {
+  MCAsmParser &Parser = getParser();
   SMLoc Loc = Parser.getTok().getLoc();
   const AsmToken &Tok = Parser.getTok();
   if (Tok.isNot(AsmToken::Identifier))
@@ -4709,6 +4861,7 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St,
 /// parseFPImm - A floating point immediate expression operand.
 ARMAsmParser::OperandMatchResultTy
 ARMAsmParser::parseFPImm(OperandVector &Operands) {
+  MCAsmParser &Parser = getParser();
   // Anything that can accept a floating point constant as an operand
   // needs to go through here, as the regular parseExpression is
   // integer only.
@@ -4789,6 +4942,7 @@ ARMAsmParser::parseFPImm(OperandVector &Operands) {
 /// Parse a arm instruction operand.  For now this parses the operand regardless
 /// of the mnemonic.
 bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
+  MCAsmParser &Parser = getParser();
   SMLoc S, E;
 
   // Check if the current operand has a custom associated parser, if so, try to
@@ -4921,6 +5075,7 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
 // parsePrefix - Parse ARM 16-bit relocations expression prefix, i.e.
 //  :lower16: and :upper16:.
 bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) {
+  MCAsmParser &Parser = getParser();
   RefKind = ARMMCExpr::VK_ARM_None;
 
   // consume an optional '#' (GNU compatibility)
@@ -5271,7 +5426,7 @@ static bool isDataTypeToken(StringRef Tok) {
 static bool doesIgnoreDataTypeSuffix(StringRef Mnemonic, StringRef DT) {
   return Mnemonic.startswith("vldm") || Mnemonic.startswith("vstm");
 }
-static void applyMnemonicAliases(StringRef &Mnemonic, unsigned Features,
+static void applyMnemonicAliases(StringRef &Mnemonic, uint64_t Features,
                                  unsigned VariantID);
 
 static bool RequiresVFPRegListValidation(StringRef Inst,
@@ -5296,6 +5451,7 @@ static bool RequiresVFPRegListValidation(StringRef Inst,
 /// Parse an arm instruction mnemonic followed by its operands.
 bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
                                     SMLoc NameLoc, OperandVector &Operands) {
+  MCAsmParser &Parser = getParser();
   // FIXME: Can this be done via tablegen in some fashion?
   bool RequireVFPRegisterListCheck;
   bool AcceptSinglePrecisionOnly;
@@ -5309,7 +5465,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
   // The generic tblgen'erated code does this later, at the start of
   // MatchInstructionImpl(), but that's too late for aliases that include
   // any sort of suffix.
-  unsigned AvailableFeatures = getAvailableFeatures();
+  uint64_t AvailableFeatures = getAvailableFeatures();
   unsigned AssemblerDialect = getParser().getAssemblerDialect();
   applyMnemonicAliases(Name, AvailableFeatures, AssemblerDialect);
 
@@ -5415,6 +5571,8 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
     Operands.push_back(ARMOperand::CreateImm(
           MCConstantExpr::Create(ProcessorIMod, getContext()),
                                  NameLoc, NameLoc));
+  } else if (Mnemonic == "cps" && isMClass()) {
+    return Error(NameLoc, "instruction 'cps' requires effect for M-class");
   }
 
   // Add the remaining tokens in the mnemonic.
@@ -5546,6 +5704,48 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
     }
   }
 
+  // If first 2 operands of a 3 operand instruction are the same
+  // then transform to 2 operand version of the same instruction
+  // e.g. 'adds r0, r0, #1' transforms to 'adds r0, #1'
+  // FIXME: We would really like to be able to tablegen'erate this.
+  if (isThumbOne() && Operands.size() == 6 &&
+       (Mnemonic == "add" || Mnemonic == "sub" || Mnemonic == "and" ||
+        Mnemonic == "eor" || Mnemonic == "lsl" || Mnemonic == "lsr" ||
+        Mnemonic == "asr" || Mnemonic == "adc" || Mnemonic == "sbc" ||
+        Mnemonic == "ror" || Mnemonic == "orr" || Mnemonic == "bic")) {
+      ARMOperand &Op3 = static_cast<ARMOperand &>(*Operands[3]);
+      ARMOperand &Op4 = static_cast<ARMOperand &>(*Operands[4]);
+      ARMOperand &Op5 = static_cast<ARMOperand &>(*Operands[5]);
+
+      // If both registers are the same then remove one of them from
+      // the operand list.
+      if (Op3.isReg() && Op4.isReg() && Op3.getReg() == Op4.getReg()) {
+          // If 3rd operand (variable Op5) is a register and the instruction is adds/sub
+          // then do not transform as the backend already handles this instruction
+          // correctly.
+          if (!Op5.isReg() || !((Mnemonic == "add" && CarrySetting) || Mnemonic == "sub")) {
+              Operands.erase(Operands.begin() + 3);
+              if (Mnemonic == "add" && !CarrySetting) {
+                  // Special case for 'add' (not 'adds') instruction must
+                  // remove the CCOut operand as well.
+                  Operands.erase(Operands.begin() + 1);
+              }
+          }
+      }
+  }
+
+  // If instruction is 'add' and first two register operands
+  // use SP register, then remove one of the SP registers from
+  // the instruction.
+  // FIXME: We would really like to be able to tablegen'erate this.
+  if (isThumbOne() && Operands.size() == 5 && Mnemonic == "add" && !CarrySetting) {
+      ARMOperand &Op2 = static_cast<ARMOperand &>(*Operands[2]);
+      ARMOperand &Op3 = static_cast<ARMOperand &>(*Operands[3]);
+      if (Op2.isReg() && Op3.isReg() && Op2.getReg() == ARM::SP && Op3.getReg() == ARM::SP) {
+          Operands.erase(Operands.begin() + 2);
+      }
+  }
+
   // GNU Assembler extension (compatibility)
   if ((Mnemonic == "ldrd" || Mnemonic == "strd")) {
     ARMOperand &Op2 = static_cast<ARMOperand &>(*Operands[2]);
@@ -5728,6 +5928,48 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
                    "source operands must be sequential");
     return false;
   }
+  case ARM::STR_PRE_IMM:
+  case ARM::STR_PRE_REG:
+  case ARM::STR_POST_IMM:
+  case ARM::STR_POST_REG:
+  case ARM::STRH_PRE:
+  case ARM::STRH_POST:
+  case ARM::STRB_PRE_IMM:
+  case ARM::STRB_PRE_REG:
+  case ARM::STRB_POST_IMM:
+  case ARM::STRB_POST_REG: {
+    // Rt must be different from Rn.
+    const unsigned Rt = MRI->getEncodingValue(Inst.getOperand(1).getReg());
+    const unsigned Rn = MRI->getEncodingValue(Inst.getOperand(2).getReg());
+
+    if (Rt == Rn)
+      return Error(Operands[3]->getStartLoc(),
+                   "source register and base register can't be identical");
+    return false;
+  }
+  case ARM::LDR_PRE_IMM:
+  case ARM::LDR_PRE_REG:
+  case ARM::LDR_POST_IMM:
+  case ARM::LDR_POST_REG:
+  case ARM::LDRH_PRE:
+  case ARM::LDRH_POST:
+  case ARM::LDRSH_PRE:
+  case ARM::LDRSH_POST:
+  case ARM::LDRB_PRE_IMM:
+  case ARM::LDRB_PRE_REG:
+  case ARM::LDRB_POST_IMM:
+  case ARM::LDRB_POST_REG:
+  case ARM::LDRSB_PRE:
+  case ARM::LDRSB_POST: {
+    // Rt must be different from Rn.
+    const unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg());
+    const unsigned Rn = MRI->getEncodingValue(Inst.getOperand(2).getReg());
+
+    if (Rt == Rn)
+      return Error(Operands[3]->getStartLoc(),
+                   "destination register and base register can't be identical");
+    return false;
+  }
   case ARM::SBFX:
   case ARM::UBFX: {
     // Width must be in range [1, 32-lsb].
@@ -5764,7 +6006,9 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
       return Error(Operands[3]->getStartLoc(),
                    "writeback operator '!' not allowed when base register "
                    "in register list");
-
+    if (listContainsReg(Inst, 3 + HasWritebackToken, ARM::SP))
+      return Error(Operands[3 + HasWritebackToken]->getStartLoc(),
+                   "SP not allowed in register list");
     break;
   }
   case ARM::LDMIA_UPD:
@@ -5775,7 +6019,19 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
     // UNPREDICTABLE on v7 upwards. Goodness knows what they did before.
     if (!hasV7Ops())
       break;
-    // Fallthrough
+    if (listContainsReg(Inst, 3, Inst.getOperand(0).getReg()))
+      return Error(Operands.back()->getStartLoc(),
+                   "writeback register not allowed in register list");
+    break;
+  case ARM::t2LDMIA:
+  case ARM::t2LDMDB:
+  case ARM::t2STMIA:
+  case ARM::t2STMDB: {
+    if (listContainsReg(Inst, 3, ARM::SP))
+      return Error(Operands.back()->getStartLoc(),
+                   "SP not allowed in register list");
+    break;
+  }
   case ARM::t2LDMIA_UPD:
   case ARM::t2LDMDB_UPD:
   case ARM::t2STMIA_UPD:
@@ -5783,6 +6039,10 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
     if (listContainsReg(Inst, 3, Inst.getOperand(0).getReg()))
       return Error(Operands.back()->getStartLoc(),
                    "writeback register not allowed in register list");
+
+    if (listContainsReg(Inst, 4, ARM::SP))
+      return Error(Operands.back()->getStartLoc(),
+                   "SP not allowed in register list");
     break;
   }
   case ARM::sysLDMIA_UPD:
@@ -5851,6 +6111,9 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
       return Error(Operands[4]->getStartLoc(),
                    "writeback operator '!' not allowed when base register "
                    "in register list");
+    if (listContainsReg(Inst, 4, ARM::SP) && !inITBlock())
+      return Error(Operands.back()->getStartLoc(),
+                   "SP not allowed in register list");
     break;
   }
   case ARM::tADDrSP: {
@@ -8010,7 +8273,7 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
   }
   // Some high-register supporting Thumb1 encodings only allow both registers
   // to be from r0-r7 when in Thumb2.
-  else if (Opc == ARM::tADDhirr && isThumbOne() &&
+  else if (Opc == ARM::tADDhirr && isThumbOne() && !hasV6MOps() &&
            isARMLowRegister(Inst.getOperand(1).getReg()) &&
            isARMLowRegister(Inst.getOperand(2).getReg()))
     return Match_RequiresThumb2;
@@ -8028,10 +8291,10 @@ template <> inline bool IsCPSRDead<MCInst>(MCInst *Instr) {
 }
 }
 
-static const char *getSubtargetFeatureName(unsigned Val);
+static const char *getSubtargetFeatureName(uint64_t Val);
 bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                                            OperandVector &Operands,
-                                           MCStreamer &Out, unsigned &ErrorInfo,
+                                           MCStreamer &Out, uint64_t &ErrorInfo,
                                            bool MatchingInlineAsm) {
   MCInst Inst;
   unsigned MatchResult;
@@ -8085,7 +8348,7 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
     // Special case the error message for the very common case where only
     // a single subtarget feature is missing (Thumb vs. ARM, e.g.).
     std::string Msg = "instruction requires:";
-    unsigned Mask = 1;
+    uint64_t Mask = 1;
     for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
       if (ErrorInfo & Mask) {
         Msg += " ";
@@ -8097,7 +8360,7 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
   }
   case Match_InvalidOperand: {
     SMLoc ErrorLoc = IDLoc;
-    if (ErrorInfo != ~0U) {
+    if (ErrorInfo != ~0ULL) {
       if (ErrorInfo >= Operands.size())
         return Error(IDLoc, "too few operands for instruction");
 
@@ -8174,6 +8437,7 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
   const MCObjectFileInfo::Environment Format =
     getContext().getObjectFileInfo()->getObjectFileType();
   bool IsMachO = Format == MCObjectFileInfo::IsMachO;
+  bool IsCOFF = Format == MCObjectFileInfo::IsCOFF;
 
   StringRef IDVal = DirectiveID.getIdentifier();
   if (IDVal == ".word")
@@ -8225,7 +8489,7 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
   else if (IDVal == ".thumb_set")
     return parseDirectiveThumbSet(DirectiveID.getLoc());
 
-  if (!IsMachO) {
+  if (!IsMachO && !IsCOFF) {
     if (IDVal == ".arch")
       return parseDirectiveArch(DirectiveID.getLoc());
     else if (IDVal == ".cpu")
@@ -8256,6 +8520,7 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
 ///  ::= .short expression [, expression]*
 ///  ::= .word expression [, expression]*
 bool ARMAsmParser::parseLiteralValues(unsigned Size, SMLoc L) {
+  MCAsmParser &Parser = getParser();
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
     for (;;) {
       const MCExpr *Value;
@@ -8285,6 +8550,7 @@ bool ARMAsmParser::parseLiteralValues(unsigned Size, SMLoc L) {
 /// parseDirectiveThumb
 ///  ::= .thumb
 bool ARMAsmParser::parseDirectiveThumb(SMLoc L) {
+  MCAsmParser &Parser = getParser();
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
     Error(L, "unexpected token in directive");
     return false;
@@ -8306,6 +8572,7 @@ bool ARMAsmParser::parseDirectiveThumb(SMLoc L) {
 /// parseDirectiveARM
 ///  ::= .arm
 bool ARMAsmParser::parseDirectiveARM(SMLoc L) {
+  MCAsmParser &Parser = getParser();
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
     Error(L, "unexpected token in directive");
     return false;
@@ -8334,12 +8601,13 @@ void ARMAsmParser::onLabelParsed(MCSymbol *Symbol) {
 /// parseDirectiveThumbFunc
 ///  ::= .thumbfunc symbol_name
 bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) {
-  const MCAsmInfo *MAI = getParser().getStreamer().getContext().getAsmInfo();
-  bool isMachO = MAI->hasSubsectionsViaSymbols();
+  MCAsmParser &Parser = getParser();
+  const auto Format = getContext().getObjectFileInfo()->getObjectFileType();
+  bool IsMachO = Format == MCObjectFileInfo::IsMachO;
 
   // Darwin asm has (optionally) function name after .thumb_func direction
   // ELF doesn't
-  if (isMachO) {
+  if (IsMachO) {
     const AsmToken &Tok = Parser.getTok();
     if (Tok.isNot(AsmToken::EndOfStatement)) {
       if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String)) {
@@ -8356,7 +8624,8 @@ bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) {
   }
 
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
-    Error(L, "unexpected token in directive");
+    Error(Parser.getTok().getLoc(), "unexpected token in directive");
+    Parser.eatToEndOfStatement();
     return false;
   }
 
@@ -8367,6 +8636,7 @@ bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) {
 /// parseDirectiveSyntax
 ///  ::= .syntax unified | divided
 bool ARMAsmParser::parseDirectiveSyntax(SMLoc L) {
+  MCAsmParser &Parser = getParser();
   const AsmToken &Tok = Parser.getTok();
   if (Tok.isNot(AsmToken::Identifier)) {
     Error(L, "unexpected token in .syntax directive");
@@ -8398,6 +8668,7 @@ bool ARMAsmParser::parseDirectiveSyntax(SMLoc L) {
 /// parseDirectiveCode
 ///  ::= .code 16 | 32
 bool ARMAsmParser::parseDirectiveCode(SMLoc L) {
+  MCAsmParser &Parser = getParser();
   const AsmToken &Tok = Parser.getTok();
   if (Tok.isNot(AsmToken::Integer)) {
     Error(L, "unexpected token in .code directive");
@@ -8442,6 +8713,7 @@ bool ARMAsmParser::parseDirectiveCode(SMLoc L) {
 /// parseDirectiveReq
 ///  ::= name .req registername
 bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
+  MCAsmParser &Parser = getParser();
   Parser.Lex(); // Eat the '.req' token.
   unsigned Reg;
   SMLoc SRegLoc, ERegLoc;
@@ -8460,7 +8732,7 @@ bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
 
   Parser.Lex(); // Consume the EndOfStatement
 
-  if (RegisterReqs.GetOrCreateValue(Name, Reg).getValue() != Reg) {
+  if (!RegisterReqs.insert(std::make_pair(Name, Reg)).second) {
     Error(SRegLoc, "redefinition of '" + Name + "' does not match original.");
     return false;
   }
@@ -8471,6 +8743,7 @@ bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
 /// parseDirectiveUneq
 ///  ::= .unreq registername
 bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) {
+  MCAsmParser &Parser = getParser();
   if (Parser.getTok().isNot(AsmToken::Identifier)) {
     Parser.eatToEndOfStatement();
     Error(L, "unexpected input in .unreq directive.");
@@ -8507,6 +8780,7 @@ bool ARMAsmParser::parseDirectiveArch(SMLoc L) {
 ///  ::= .eabi_attribute int, int [, "str"]
 ///  ::= .eabi_attribute Tag_name, int [, "str"]
 bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) {
+  MCAsmParser &Parser = getParser();
   int64_t Tag;
   SMLoc TagLoc;
   TagLoc = Parser.getTok().getLoc();
@@ -8617,6 +8891,32 @@ bool ARMAsmParser::parseDirectiveCPU(SMLoc L) {
   return false;
 }
 
+// FIXME: This is duplicated in getARMFPUFeatures() in
+// tools/clang/lib/Driver/Tools.cpp
+static const struct {
+  const unsigned Fpu;
+  const uint64_t Enabled;
+  const uint64_t Disabled;
+} Fpus[] = {
+      {ARM::VFP, ARM::FeatureVFP2, ARM::FeatureNEON},
+      {ARM::VFPV2, ARM::FeatureVFP2, ARM::FeatureNEON},
+      {ARM::VFPV3, ARM::FeatureVFP3, ARM::FeatureNEON},
+      {ARM::VFPV3_D16, ARM::FeatureVFP3 | ARM::FeatureD16, ARM::FeatureNEON},
+      {ARM::VFPV4, ARM::FeatureVFP4, ARM::FeatureNEON},
+      {ARM::VFPV4_D16, ARM::FeatureVFP4 | ARM::FeatureD16, ARM::FeatureNEON},
+      {ARM::FPV5_D16, ARM::FeatureFPARMv8 | ARM::FeatureD16,
+       ARM::FeatureNEON | ARM::FeatureCrypto},
+      {ARM::FP_ARMV8, ARM::FeatureFPARMv8,
+       ARM::FeatureNEON | ARM::FeatureCrypto},
+      {ARM::NEON, ARM::FeatureNEON, 0},
+      {ARM::NEON_VFPV4, ARM::FeatureVFP4 | ARM::FeatureNEON, 0},
+      {ARM::NEON_FP_ARMV8, ARM::FeatureFPARMv8 | ARM::FeatureNEON,
+       ARM::FeatureCrypto},
+      {ARM::CRYPTO_NEON_FP_ARMV8,
+       ARM::FeatureFPARMv8 | ARM::FeatureNEON | ARM::FeatureCrypto, 0},
+      {ARM::SOFTVFP, 0, 0},
+};
+
 /// parseDirectiveFPU
 ///  ::= .fpu str
 bool ARMAsmParser::parseDirectiveFPU(SMLoc L) {
@@ -8632,6 +8932,18 @@ bool ARMAsmParser::parseDirectiveFPU(SMLoc L) {
     return false;
   }
 
+  for (const auto &Fpu : Fpus) {
+    if (Fpu.Fpu != ID)
+      continue;
+
+    // Need to toggle features that should be on but are off and that
+    // should off but are on.
+    uint64_t Toggle = (Fpu.Enabled & ~STI.getFeatureBits()) |
+                      (Fpu.Disabled & STI.getFeatureBits());
+    setAvailableFeatures(ComputeAvailableFeatures(STI.ToggleFeature(Toggle)));
+    break;
+  }
+
   getTargetStreamer().emitFPU(ID);
   return false;
 }
@@ -8698,6 +9010,7 @@ bool ARMAsmParser::parseDirectiveCantUnwind(SMLoc L) {
 /// parseDirectivePersonality
 ///  ::= .personality name
 bool ARMAsmParser::parseDirectivePersonality(SMLoc L) {
+  MCAsmParser &Parser = getParser();
   bool HasExistingPersonality = UC.hasPersonality();
 
   UC.recordPersonality(L);
@@ -8761,6 +9074,7 @@ bool ARMAsmParser::parseDirectiveHandlerData(SMLoc L) {
 /// parseDirectiveSetFP
 ///  ::= .setfp fpreg, spreg [, offset]
 bool ARMAsmParser::parseDirectiveSetFP(SMLoc L) {
+  MCAsmParser &Parser = getParser();
   // Check the ordering of unwind directives
   if (!UC.hasFnStart()) {
     Error(L, ".fnstart must precede .setfp directive");
@@ -8838,6 +9152,7 @@ bool ARMAsmParser::parseDirectiveSetFP(SMLoc L) {
 /// parseDirective
 ///  ::= .pad offset
 bool ARMAsmParser::parseDirectivePad(SMLoc L) {
+  MCAsmParser &Parser = getParser();
   // Check the ordering of unwind directives
   if (!UC.hasFnStart()) {
     Error(L, ".fnstart must precede .pad directive");
@@ -8912,6 +9227,7 @@ bool ARMAsmParser::parseDirectiveRegSave(SMLoc L, bool IsVector) {
 ///  ::= .inst.n opcode [, ...]
 ///  ::= .inst.w opcode [, ...]
 bool ARMAsmParser::parseDirectiveInst(SMLoc Loc, char Suffix) {
+  MCAsmParser &Parser = getParser();
   int Width;
 
   if (isThumb()) {
@@ -9008,7 +9324,7 @@ bool ARMAsmParser::parseDirectiveEven(SMLoc L) {
   }
 
   if (!Section) {
-    getStreamer().InitSections();
+    getStreamer().InitSections(false);
     Section = getStreamer().getCurrentSection().first;
   }
 
@@ -9024,6 +9340,7 @@ bool ARMAsmParser::parseDirectiveEven(SMLoc L) {
 /// parseDirectivePersonalityIndex
 ///   ::= .personalityindex index
 bool ARMAsmParser::parseDirectivePersonalityIndex(SMLoc L) {
+  MCAsmParser &Parser = getParser();
   bool HasExistingPersonality = UC.hasPersonality();
 
   UC.recordPersonalityIndex(L);
@@ -9079,6 +9396,7 @@ bool ARMAsmParser::parseDirectivePersonalityIndex(SMLoc L) {
 /// parseDirectiveUnwindRaw
 ///   ::= .unwind_raw offset, opcode [, opcode...]
 bool ARMAsmParser::parseDirectiveUnwindRaw(SMLoc L) {
+  MCAsmParser &Parser = getParser();
   if (!UC.hasFnStart()) {
     Parser.eatToEndOfStatement();
     Error(L, ".fnstart must precede .unwind_raw directives");
@@ -9160,6 +9478,8 @@ bool ARMAsmParser::parseDirectiveUnwindRaw(SMLoc L) {
 /// parseDirectiveTLSDescSeq
 ///   ::= .tlsdescseq tls-variable
 bool ARMAsmParser::parseDirectiveTLSDescSeq(SMLoc L) {
+  MCAsmParser &Parser = getParser();
+
   if (getLexer().isNot(AsmToken::Identifier)) {
     TokError("expected variable after '.tlsdescseq' directive");
     Parser.eatToEndOfStatement();
@@ -9184,6 +9504,7 @@ bool ARMAsmParser::parseDirectiveTLSDescSeq(SMLoc L) {
 /// parseDirectiveMovSP
 ///  ::= .movsp reg [, #offset]
 bool ARMAsmParser::parseDirectiveMovSP(SMLoc L) {
+  MCAsmParser &Parser = getParser();
   if (!UC.hasFnStart()) {
     Parser.eatToEndOfStatement();
     Error(L, ".fnstart must precede .movsp directives");
@@ -9247,6 +9568,7 @@ bool ARMAsmParser::parseDirectiveMovSP(SMLoc L) {
 /// parseDirectiveObjectArch
 ///   ::= .object_arch name
 bool ARMAsmParser::parseDirectiveObjectArch(SMLoc L) {
+  MCAsmParser &Parser = getParser();
   if (getLexer().isNot(AsmToken::Identifier)) {
     Error(getLexer().getLoc(), "unexpected token");
     Parser.eatToEndOfStatement();
@@ -9303,6 +9625,8 @@ bool ARMAsmParser::parseDirectiveAlign(SMLoc L) {
 /// parseDirectiveThumbSet
 ///  ::= .thumb_set name, value
 bool ARMAsmParser::parseDirectiveThumbSet(SMLoc L) {
+  MCAsmParser &Parser = getParser();
+
   StringRef Name;
   if (Parser.parseIdentifier(Name)) {
     TokError("expected identifier after '.thumb_set'");
@@ -9349,8 +9673,8 @@ extern "C" void LLVMInitializeARMAsmParser() {
 #define GET_MATCHER_IMPLEMENTATION
 #include "ARMGenAsmMatcher.inc"
 
-static const struct ExtMapEntry {
-  const char *Extension;
+static const struct {
+  const char *Name;
   const unsigned ArchCheck;
   const uint64_t Features;
 } Extensions[] = {
@@ -9381,46 +9705,47 @@ static const struct ExtMapEntry {
 /// parseDirectiveArchExtension
 ///   ::= .arch_extension [no]feature
 bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) {
+  MCAsmParser &Parser = getParser();
+
   if (getLexer().isNot(AsmToken::Identifier)) {
     Error(getLexer().getLoc(), "unexpected token");
     Parser.eatToEndOfStatement();
     return false;
   }
 
-  StringRef Extension = Parser.getTok().getString();
+  StringRef Name = Parser.getTok().getString();
   SMLoc ExtLoc = Parser.getTok().getLoc();
   getLexer().Lex();
 
   bool EnableFeature = true;
-  if (Extension.startswith_lower("no")) {
+  if (Name.startswith_lower("no")) {
     EnableFeature = false;
-    Extension = Extension.substr(2);
+    Name = Name.substr(2);
   }
 
-  for (unsigned EI = 0, EE = array_lengthof(Extensions); EI != EE; ++EI) {
-    if (Extensions[EI].Extension != Extension)
+  for (const auto &Extension : Extensions) {
+    if (Extension.Name != Name)
       continue;
 
-    unsigned FB = getAvailableFeatures();
-    if ((FB & Extensions[EI].ArchCheck) != Extensions[EI].ArchCheck) {
-      Error(ExtLoc, "architectural extension '" + Extension + "' is not "
+    if (!Extension.Features)
+      report_fatal_error("unsupported architectural extension: " + Name);
+
+    if ((getAvailableFeatures() & Extension.ArchCheck) != Extension.ArchCheck) {
+      Error(ExtLoc, "architectural extension '" + Name + "' is not "
             "allowed for the current base architecture");
       return false;
     }
 
-    if (!Extensions[EI].Features)
-      report_fatal_error("unsupported architectural extension: " + Extension);
-
-    if (EnableFeature)
-      FB |= ComputeAvailableFeatures(Extensions[EI].Features);
-    else
-      FB &= ~ComputeAvailableFeatures(Extensions[EI].Features);
-
-    setAvailableFeatures(FB);
+    uint64_t ToggleFeatures = EnableFeature
+                                  ? (~STI.getFeatureBits() & Extension.Features)
+                                  : ( STI.getFeatureBits() & Extension.Features);
+    uint64_t Features =
+        ComputeAvailableFeatures(STI.ToggleFeature(ToggleFeatures));
+    setAvailableFeatures(Features);
     return false;
   }
 
-  Error(ExtLoc, "unknown architectural extension: " + Extension);
+  Error(ExtLoc, "unknown architectural extension: " + Name);
   Parser.eatToEndOfStatement();
   return false;
 }
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index 9b5fa75..2530640 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -2,8 +2,7 @@ set(LLVM_TARGET_DEFINITIONS ARM.td)
 
 tablegen(LLVM ARMGenRegisterInfo.inc -gen-register-info)
 tablegen(LLVM ARMGenInstrInfo.inc -gen-instr-info)
-tablegen(LLVM ARMGenCodeEmitter.inc -gen-emitter)
-tablegen(LLVM ARMGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
+tablegen(LLVM ARMGenMCCodeEmitter.inc -gen-emitter)
 tablegen(LLVM ARMGenMCPseudoLowering.inc -gen-pseudo-lowering)
 tablegen(LLVM ARMGenAsmWriter.inc -gen-asm-writer)
 tablegen(LLVM ARMGenAsmMatcher.inc -gen-asm-matcher)
@@ -19,7 +18,6 @@ add_llvm_target(ARMCodeGen
   ARMAsmPrinter.cpp
   ARMBaseInstrInfo.cpp
   ARMBaseRegisterInfo.cpp
-  ARMCodeEmitter.cpp
   ARMConstantIslandPass.cpp
   ARMConstantPoolValue.cpp
   ARMExpandPseudoInsts.cpp
@@ -29,7 +27,6 @@ add_llvm_target(ARMCodeGen
   ARMISelDAGToDAG.cpp
   ARMISelLowering.cpp
   ARMInstrInfo.cpp
-  ARMJITInfo.cpp
   ARMLoadStoreOptimizer.cpp
   ARMMCInstLower.cpp
   ARMMachineFunctionInfo.cpp
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 4d4038d..ef65418 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -20,7 +20,6 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/LEB128.h"
-#include "llvm/Support/MemoryObject.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
 #include <vector>
@@ -85,42 +84,34 @@ namespace {
 }
 
 namespace {
-/// ARMDisassembler - ARM disassembler for all ARM platforms.
+/// ARM disassembler for all ARM platforms.
 class ARMDisassembler : public MCDisassembler {
 public:
-  /// Constructor     - Initializes the disassembler.
-  ///
   ARMDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) :
     MCDisassembler(STI, Ctx) {
   }
 
-  ~ARMDisassembler() {
-  }
+  ~ARMDisassembler() {}
 
-  /// getInstruction - See MCDisassembler.
-  DecodeStatus getInstruction(MCInst &instr, uint64_t &size,
-                              const MemoryObject &region, uint64_t address,
-                              raw_ostream &vStream,
-                              raw_ostream &cStream) const override;
+  DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
+                              ArrayRef<uint8_t> Bytes, uint64_t Address,
+                              raw_ostream &VStream,
+                              raw_ostream &CStream) const override;
 };
 
-/// ThumbDisassembler - Thumb disassembler for all Thumb platforms.
+/// Thumb disassembler for all Thumb platforms.
 class ThumbDisassembler : public MCDisassembler {
 public:
-  /// Constructor     - Initializes the disassembler.
-  ///
   ThumbDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) :
     MCDisassembler(STI, Ctx) {
   }
 
-  ~ThumbDisassembler() {
-  }
+  ~ThumbDisassembler() {}
 
-  /// getInstruction - See MCDisassembler.
-  DecodeStatus getInstruction(MCInst &instr, uint64_t &size,
-                              const MemoryObject &region, uint64_t address,
-                              raw_ostream &vStream,
-                              raw_ostream &cStream) const override;
+  DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
+                              ArrayRef<uint8_t> Bytes, uint64_t Address,
+                              raw_ostream &VStream,
+                              raw_ostream &CStream) const override;
 
 private:
   mutable ITStatus ITBlock;
@@ -281,6 +272,8 @@ static DecodeStatus DecodeInstSyncBarrierOption(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeBankedReg(MCInst &Inst, unsigned Insn,
+                               uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn,
@@ -413,103 +406,99 @@ static MCDisassembler *createThumbDisassembler(const Target &T,
 }
 
 DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
-                                             const MemoryObject &Region,
-                                             uint64_t Address,
-                                             raw_ostream &os,
-                                             raw_ostream &cs) const {
-  CommentStream = &cs;
-
-  uint8_t bytes[4];
+                                             ArrayRef<uint8_t> Bytes,
+                                             uint64_t Address, raw_ostream &OS,
+                                             raw_ostream &CS) const {
+  CommentStream = &CS;
 
   assert(!(STI.getFeatureBits() & ARM::ModeThumb) &&
-         "Asked to disassemble an ARM instruction but Subtarget is in Thumb mode!");
+         "Asked to disassemble an ARM instruction but Subtarget is in Thumb "
+         "mode!");
 
   // We want to read exactly 4 bytes of data.
-  if (Region.readBytes(Address, 4, bytes) == -1) {
+  if (Bytes.size() < 4) {
     Size = 0;
     return MCDisassembler::Fail;
   }
 
   // Encoded as a small-endian 32-bit word in the stream.
-  uint32_t insn = (bytes[3] << 24) |
-                  (bytes[2] << 16) |
-                  (bytes[1] <<  8) |
-                  (bytes[0] <<  0);
+  uint32_t Insn =
+      (Bytes[3] << 24) | (Bytes[2] << 16) | (Bytes[1] << 8) | (Bytes[0] << 0);
 
   // Calling the auto-generated decoder function.
-  DecodeStatus result = decodeInstruction(DecoderTableARM32, MI, insn,
-                                          Address, this, STI);
-  if (result != MCDisassembler::Fail) {
+  DecodeStatus Result =
+      decodeInstruction(DecoderTableARM32, MI, Insn, Address, this, STI);
+  if (Result != MCDisassembler::Fail) {
     Size = 4;
-    return result;
+    return Result;
   }
 
   // VFP and NEON instructions, similarly, are shared between ARM
   // and Thumb modes.
   MI.clear();
-  result = decodeInstruction(DecoderTableVFP32, MI, insn, Address, this, STI);
-  if (result != MCDisassembler::Fail) {
+  Result = decodeInstruction(DecoderTableVFP32, MI, Insn, Address, this, STI);
+  if (Result != MCDisassembler::Fail) {
     Size = 4;
-    return result;
+    return Result;
   }
 
   MI.clear();
-  result = decodeInstruction(DecoderTableVFPV832, MI, insn, Address, this, STI);
-  if (result != MCDisassembler::Fail) {
+  Result = decodeInstruction(DecoderTableVFPV832, MI, Insn, Address, this, STI);
+  if (Result != MCDisassembler::Fail) {
     Size = 4;
-    return result;
+    return Result;
   }
 
   MI.clear();
-  result = decodeInstruction(DecoderTableNEONData32, MI, insn, Address,
-                             this, STI);
-  if (result != MCDisassembler::Fail) {
+  Result =
+      decodeInstruction(DecoderTableNEONData32, MI, Insn, Address, this, STI);
+  if (Result != MCDisassembler::Fail) {
     Size = 4;
     // Add a fake predicate operand, because we share these instruction
     // definitions with Thumb2 where these instructions are predicable.
     if (!DecodePredicateOperand(MI, 0xE, Address, this))
       return MCDisassembler::Fail;
-    return result;
+    return Result;
   }
 
   MI.clear();
-  result = decodeInstruction(DecoderTableNEONLoadStore32, MI, insn, Address,
+  Result = decodeInstruction(DecoderTableNEONLoadStore32, MI, Insn, Address,
                              this, STI);
-  if (result != MCDisassembler::Fail) {
+  if (Result != MCDisassembler::Fail) {
     Size = 4;
     // Add a fake predicate operand, because we share these instruction
     // definitions with Thumb2 where these instructions are predicable.
     if (!DecodePredicateOperand(MI, 0xE, Address, this))
       return MCDisassembler::Fail;
-    return result;
+    return Result;
   }
 
   MI.clear();
-  result = decodeInstruction(DecoderTableNEONDup32, MI, insn, Address,
-                             this, STI);
-  if (result != MCDisassembler::Fail) {
+  Result =
+      decodeInstruction(DecoderTableNEONDup32, MI, Insn, Address, this, STI);
+  if (Result != MCDisassembler::Fail) {
     Size = 4;
     // Add a fake predicate operand, because we share these instruction
     // definitions with Thumb2 where these instructions are predicable.
     if (!DecodePredicateOperand(MI, 0xE, Address, this))
       return MCDisassembler::Fail;
-    return result;
+    return Result;
   }
 
   MI.clear();
-  result = decodeInstruction(DecoderTablev8NEON32, MI, insn, Address,
-                             this, STI);
-  if (result != MCDisassembler::Fail) {
+  Result =
+      decodeInstruction(DecoderTablev8NEON32, MI, Insn, Address, this, STI);
+  if (Result != MCDisassembler::Fail) {
     Size = 4;
-    return result;
+    return Result;
   }
 
   MI.clear();
-  result = decodeInstruction(DecoderTablev8Crypto32, MI, insn, Address,
-                             this, STI);
-  if (result != MCDisassembler::Fail) {
+  Result =
+      decodeInstruction(DecoderTablev8Crypto32, MI, Insn, Address, this, STI);
+  if (Result != MCDisassembler::Fail) {
     Size = 4;
-    return result;
+    return Result;
   }
 
   MI.clear();
@@ -681,55 +670,53 @@ void ThumbDisassembler::UpdateThumbVFPPredicate(MCInst &MI) const {
 }
 
 DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
-                                               const MemoryObject &Region,
+                                               ArrayRef<uint8_t> Bytes,
                                                uint64_t Address,
-                                               raw_ostream &os,
-                                               raw_ostream &cs) const {
-  CommentStream = &cs;
-
-  uint8_t bytes[4];
+                                               raw_ostream &OS,
+                                               raw_ostream &CS) const {
+  CommentStream = &CS;
 
   assert((STI.getFeatureBits() & ARM::ModeThumb) &&
          "Asked to disassemble in Thumb mode but Subtarget is in ARM mode!");
 
   // We want to read exactly 2 bytes of data.
-  if (Region.readBytes(Address, 2, bytes) == -1) {
+  if (Bytes.size() < 2) {
     Size = 0;
     return MCDisassembler::Fail;
   }
 
-  uint16_t insn16 = (bytes[1] << 8) | bytes[0];
-  DecodeStatus result = decodeInstruction(DecoderTableThumb16, MI, insn16,
-                                          Address, this, STI);
-  if (result != MCDisassembler::Fail) {
+  uint16_t Insn16 = (Bytes[1] << 8) | Bytes[0];
+  DecodeStatus Result =
+      decodeInstruction(DecoderTableThumb16, MI, Insn16, Address, this, STI);
+  if (Result != MCDisassembler::Fail) {
     Size = 2;
-    Check(result, AddThumbPredicate(MI));
-    return result;
+    Check(Result, AddThumbPredicate(MI));
+    return Result;
   }
 
   MI.clear();
-  result = decodeInstruction(DecoderTableThumbSBit16, MI, insn16,
-                             Address, this, STI);
-  if (result) {
+  Result = decodeInstruction(DecoderTableThumbSBit16, MI, Insn16, Address, this,
+                             STI);
+  if (Result) {
     Size = 2;
     bool InITBlock = ITBlock.instrInITBlock();
-    Check(result, AddThumbPredicate(MI));
+    Check(Result, AddThumbPredicate(MI));
     AddThumb1SBit(MI, InITBlock);
-    return result;
+    return Result;
   }
 
   MI.clear();
-  result = decodeInstruction(DecoderTableThumb216, MI, insn16,
-                             Address, this, STI);
-  if (result != MCDisassembler::Fail) {
+  Result =
+      decodeInstruction(DecoderTableThumb216, MI, Insn16, Address, this, STI);
+  if (Result != MCDisassembler::Fail) {
     Size = 2;
 
     // Nested IT blocks are UNPREDICTABLE.  Must be checked before we add
     // the Thumb predicate.
     if (MI.getOpcode() == ARM::t2IT && ITBlock.instrInITBlock())
-      result = MCDisassembler::SoftFail;
+      Result = MCDisassembler::SoftFail;
 
-    Check(result, AddThumbPredicate(MI));
+    Check(Result, AddThumbPredicate(MI));
 
     // If we find an IT instruction, we need to parse its condition
     // code and mask operands so that we can apply them correctly
@@ -741,115 +728,115 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
       ITBlock.setITState(Firstcond, Mask);
     }
 
-    return result;
+    return Result;
   }
 
   // We want to read exactly 4 bytes of data.
-  if (Region.readBytes(Address, 4, bytes) == -1) {
+  if (Bytes.size() < 4) {
     Size = 0;
     return MCDisassembler::Fail;
   }
 
-  uint32_t insn32 = (bytes[3] <<  8) |
-                    (bytes[2] <<  0) |
-                    (bytes[1] << 24) |
-                    (bytes[0] << 16);
+  uint32_t Insn32 =
+      (Bytes[3] << 8) | (Bytes[2] << 0) | (Bytes[1] << 24) | (Bytes[0] << 16);
   MI.clear();
-  result = decodeInstruction(DecoderTableThumb32, MI, insn32, Address,
-                             this, STI);
-  if (result != MCDisassembler::Fail) {
+  Result =
+      decodeInstruction(DecoderTableThumb32, MI, Insn32, Address, this, STI);
+  if (Result != MCDisassembler::Fail) {
     Size = 4;
     bool InITBlock = ITBlock.instrInITBlock();
-    Check(result, AddThumbPredicate(MI));
+    Check(Result, AddThumbPredicate(MI));
     AddThumb1SBit(MI, InITBlock);
-    return result;
+    return Result;
   }
 
   MI.clear();
-  result = decodeInstruction(DecoderTableThumb232, MI, insn32, Address,
-                             this, STI);
-  if (result != MCDisassembler::Fail) {
+  Result =
+      decodeInstruction(DecoderTableThumb232, MI, Insn32, Address, this, STI);
+  if (Result != MCDisassembler::Fail) {
     Size = 4;
-    Check(result, AddThumbPredicate(MI));
-    return result;
+    Check(Result, AddThumbPredicate(MI));
+    return Result;
   }
 
-  if (fieldFromInstruction(insn32, 28, 4) == 0xE) {
+  if (fieldFromInstruction(Insn32, 28, 4) == 0xE) {
     MI.clear();
-    result = decodeInstruction(DecoderTableVFP32, MI, insn32, Address, this, STI);
-    if (result != MCDisassembler::Fail) {
+    Result =
+        decodeInstruction(DecoderTableVFP32, MI, Insn32, Address, this, STI);
+    if (Result != MCDisassembler::Fail) {
       Size = 4;
       UpdateThumbVFPPredicate(MI);
-      return result;
+      return Result;
     }
   }
 
   MI.clear();
-  result = decodeInstruction(DecoderTableVFPV832, MI, insn32, Address, this, STI);
-  if (result != MCDisassembler::Fail) {
+  Result =
+      decodeInstruction(DecoderTableVFPV832, MI, Insn32, Address, this, STI);
+  if (Result != MCDisassembler::Fail) {
     Size = 4;
-    return result;
+    return Result;
   }
 
-  if (fieldFromInstruction(insn32, 28, 4) == 0xE) {
+  if (fieldFromInstruction(Insn32, 28, 4) == 0xE) {
     MI.clear();
-    result = decodeInstruction(DecoderTableNEONDup32, MI, insn32, Address,
-                               this, STI);
-    if (result != MCDisassembler::Fail) {
+    Result = decodeInstruction(DecoderTableNEONDup32, MI, Insn32, Address, this,
+                               STI);
+    if (Result != MCDisassembler::Fail) {
       Size = 4;
-      Check(result, AddThumbPredicate(MI));
-      return result;
+      Check(Result, AddThumbPredicate(MI));
+      return Result;
     }
   }
 
-  if (fieldFromInstruction(insn32, 24, 8) == 0xF9) {
+  if (fieldFromInstruction(Insn32, 24, 8) == 0xF9) {
     MI.clear();
-    uint32_t NEONLdStInsn = insn32;
+    uint32_t NEONLdStInsn = Insn32;
     NEONLdStInsn &= 0xF0FFFFFF;
     NEONLdStInsn |= 0x04000000;
-    result = decodeInstruction(DecoderTableNEONLoadStore32, MI, NEONLdStInsn,
+    Result = decodeInstruction(DecoderTableNEONLoadStore32, MI, NEONLdStInsn,
                                Address, this, STI);
-    if (result != MCDisassembler::Fail) {
+    if (Result != MCDisassembler::Fail) {
       Size = 4;
-      Check(result, AddThumbPredicate(MI));
-      return result;
+      Check(Result, AddThumbPredicate(MI));
+      return Result;
     }
   }
 
-  if (fieldFromInstruction(insn32, 24, 4) == 0xF) {
+  if (fieldFromInstruction(Insn32, 24, 4) == 0xF) {
     MI.clear();
-    uint32_t NEONDataInsn = insn32;
+    uint32_t NEONDataInsn = Insn32;
     NEONDataInsn &= 0xF0FFFFFF; // Clear bits 27-24
     NEONDataInsn |= (NEONDataInsn & 0x10000000) >> 4; // Move bit 28 to bit 24
     NEONDataInsn |= 0x12000000; // Set bits 28 and 25
-    result = decodeInstruction(DecoderTableNEONData32, MI, NEONDataInsn,
+    Result = decodeInstruction(DecoderTableNEONData32, MI, NEONDataInsn,
                                Address, this, STI);
-    if (result != MCDisassembler::Fail) {
+    if (Result != MCDisassembler::Fail) {
       Size = 4;
-      Check(result, AddThumbPredicate(MI));
-      return result;
+      Check(Result, AddThumbPredicate(MI));
+      return Result;
     }
 
     MI.clear();
-    uint32_t NEONCryptoInsn = insn32;
+    uint32_t NEONCryptoInsn = Insn32;
     NEONCryptoInsn &= 0xF0FFFFFF; // Clear bits 27-24
     NEONCryptoInsn |= (NEONCryptoInsn & 0x10000000) >> 4; // Move bit 28 to bit 24
     NEONCryptoInsn |= 0x12000000; // Set bits 28 and 25
-    result = decodeInstruction(DecoderTablev8Crypto32, MI, NEONCryptoInsn,
+    Result = decodeInstruction(DecoderTablev8Crypto32, MI, NEONCryptoInsn,
                                Address, this, STI);
-    if (result != MCDisassembler::Fail) {
+    if (Result != MCDisassembler::Fail) {
       Size = 4;
-      return result;
+      return Result;
     }
 
     MI.clear();
-    uint32_t NEONv8Insn = insn32;
+    uint32_t NEONv8Insn = Insn32;
     NEONv8Insn &= 0xF3FFFFFF; // Clear bits 27-26
-    result = decodeInstruction(DecoderTablev8NEON32, MI, NEONv8Insn, Address,
+    Result = decodeInstruction(DecoderTablev8NEON32, MI, NEONv8Insn, Address,
                                this, STI);
-    if (result != MCDisassembler::Fail) {
+    if (Result != MCDisassembler::Fail) {
       Size = 4;
-      return result;
+      return Result;
     }
   }
 
@@ -1015,7 +1002,11 @@ static const uint16_t DPRDecoderTable[] = {
 
 static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo,
                                    uint64_t Address, const void *Decoder) {
-  if (RegNo > 31)
+  uint64_t featureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo()
+                                                          .getFeatureBits();
+  bool hasD16 = featureBits & ARM::FeatureD16;
+
+  if (RegNo > 31 || (hasD16 && RegNo > 15))
     return MCDisassembler::Fail;
 
   unsigned Register = DPRDecoderTable[RegNo];
@@ -2973,11 +2964,9 @@ static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Insn,
   if (size == 0x3) {
     if (align == 0)
       return MCDisassembler::Fail;
-    size = 4;
     align = 16;
   } else {
     if (size == 2) {
-      size = 1 << size;
       align *= 8;
     } else {
       size = 1 << size;
@@ -3267,6 +3256,11 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn,
   unsigned Rt = fieldFromInstruction(Insn, 12, 4);
   unsigned Rn = fieldFromInstruction(Insn, 16, 4);
 
+  uint64_t featureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo()
+                                                          .getFeatureBits();
+  bool hasMP = featureBits & ARM::FeatureMP;
+  bool hasV7Ops = featureBits & ARM::HasV7Ops;
+
   if (Rn == 15) {
     switch (Inst.getOpcode()) {
     case ARM::t2LDRBs:
@@ -3302,11 +3296,10 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn,
     case ARM::t2LDRSHs:
       return MCDisassembler::Fail;
     case ARM::t2LDRHs:
-      // FIXME: this instruction is only available with MP extensions,
-      // this should be checked first but we don't have access to the
-      // feature bits here.
       Inst.setOpcode(ARM::t2PLDWs);
       break;
+    case ARM::t2LDRSBs:
+      Inst.setOpcode(ARM::t2PLIs);
     default:
       break;
     }
@@ -3314,8 +3307,14 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn,
 
   switch (Inst.getOpcode()) {
     case ARM::t2PLDs:
-    case ARM::t2PLDWs:
+      break;
     case ARM::t2PLIs:
+      if (!hasV7Ops)
+        return MCDisassembler::Fail;
+      break;
+    case ARM::t2PLDWs:
+      if (!hasV7Ops || !hasMP)
+        return MCDisassembler::Fail;
       break;
     default:
       if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
@@ -3341,6 +3340,12 @@ static DecodeStatus DecodeT2LoadImm8(MCInst &Inst, unsigned Insn,
   unsigned imm = fieldFromInstruction(Insn, 0, 8);
   imm |= (U << 8);
   imm |= (Rn << 9);
+  unsigned add = fieldFromInstruction(Insn, 9, 1);
+
+  uint64_t featureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo()
+                                                          .getFeatureBits();
+  bool hasMP = featureBits & ARM::FeatureMP;
+  bool hasV7Ops = featureBits & ARM::HasV7Ops;
 
   if (Rn == 15) {
     switch (Inst.getOpcode()) {
@@ -3375,6 +3380,13 @@ static DecodeStatus DecodeT2LoadImm8(MCInst &Inst, unsigned Insn,
     switch (Inst.getOpcode()) {
     case ARM::t2LDRSHi8:
       return MCDisassembler::Fail;
+    case ARM::t2LDRHi8:
+      if (!add)
+        Inst.setOpcode(ARM::t2PLDWi8);
+      break;
+    case ARM::t2LDRSBi8:
+      Inst.setOpcode(ARM::t2PLIi8);
+      break;
     default:
       break;
     }
@@ -3382,9 +3394,15 @@ static DecodeStatus DecodeT2LoadImm8(MCInst &Inst, unsigned Insn,
 
   switch (Inst.getOpcode()) {
   case ARM::t2PLDi8:
+    break;
   case ARM::t2PLIi8:
-  case ARM::t2PLDWi8:
+    if (!hasV7Ops)
+      return MCDisassembler::Fail;
     break;
+  case ARM::t2PLDWi8:
+      if (!hasV7Ops || !hasMP)
+        return MCDisassembler::Fail;
+      break;
   default:
     if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
       return MCDisassembler::Fail;
@@ -3404,6 +3422,11 @@ static DecodeStatus DecodeT2LoadImm12(MCInst &Inst, unsigned Insn,
   unsigned imm = fieldFromInstruction(Insn, 0, 12);
   imm |= (Rn << 13);
 
+  uint64_t featureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo()
+                                                          .getFeatureBits();
+  bool hasMP = (featureBits & ARM::FeatureMP);
+  bool hasV7Ops = (featureBits & ARM::HasV7Ops);
+
   if (Rn == 15) {
     switch (Inst.getOpcode()) {
     case ARM::t2LDRi12:
@@ -3438,7 +3461,10 @@ static DecodeStatus DecodeT2LoadImm12(MCInst &Inst, unsigned Insn,
     case ARM::t2LDRSHi12:
       return MCDisassembler::Fail;
     case ARM::t2LDRHi12:
-      Inst.setOpcode(ARM::t2PLDi12);
+      Inst.setOpcode(ARM::t2PLDWi12);
+      break;
+    case ARM::t2LDRSBi12:
+      Inst.setOpcode(ARM::t2PLIi12);
       break;
     default:
       break;
@@ -3447,9 +3473,15 @@ static DecodeStatus DecodeT2LoadImm12(MCInst &Inst, unsigned Insn,
 
   switch (Inst.getOpcode()) {
   case ARM::t2PLDi12:
-  case ARM::t2PLDWi12:
+    break;
   case ARM::t2PLIi12:
+    if (!hasV7Ops)
+      return MCDisassembler::Fail;
     break;
+  case ARM::t2PLDWi12:
+      if (!hasV7Ops || !hasMP)
+        return MCDisassembler::Fail;
+      break;
   default:
     if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
       return MCDisassembler::Fail;
@@ -3507,6 +3539,10 @@ static DecodeStatus DecodeT2LoadLabel(MCInst &Inst, unsigned Insn,
   unsigned U = fieldFromInstruction(Insn, 23, 1);
   int imm = fieldFromInstruction(Insn, 0, 12);
 
+  uint64_t featureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo()
+                                                          .getFeatureBits();
+  bool hasV7Ops = (featureBits & ARM::HasV7Ops);
+
   if (Rt == 15) {
     switch (Inst.getOpcode()) {
       case ARM::t2LDRBpci:
@@ -3525,7 +3561,10 @@ static DecodeStatus DecodeT2LoadLabel(MCInst &Inst, unsigned Insn,
 
   switch(Inst.getOpcode()) {
   case ARM::t2PLDpci:
+    break;
   case ARM::t2PLIpci:
+    if (!hasV7Ops)
+      return MCDisassembler::Fail;
     break;
   default:
     if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
@@ -3974,7 +4013,85 @@ static DecodeStatus DecodeInstSyncBarrierOption(MCInst &Inst, unsigned Val,
 
 static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val,
                           uint64_t Address, const void *Decoder) {
-  if (!Val) return MCDisassembler::Fail;
+  DecodeStatus S = MCDisassembler::Success;
+  uint64_t FeatureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo()
+                                                          .getFeatureBits();
+  if (FeatureBits & ARM::FeatureMClass) {
+    unsigned ValLow = Val & 0xff;
+
+    // Validate the SYSm value first.
+    switch (ValLow) {
+    case  0: // apsr
+    case  1: // iapsr
+    case  2: // eapsr
+    case  3: // xpsr
+    case  5: // ipsr
+    case  6: // epsr
+    case  7: // iepsr
+    case  8: // msp
+    case  9: // psp
+    case 16: // primask
+    case 20: // control
+      break;
+    case 17: // basepri
+    case 18: // basepri_max
+    case 19: // faultmask
+      if (!(FeatureBits & ARM::HasV7Ops))
+        // Values basepri, basepri_max and faultmask are only valid for v7m.
+        return MCDisassembler::Fail;
+      break;
+    default:
+      return MCDisassembler::Fail;
+    }
+
+    if (Inst.getOpcode() == ARM::t2MSR_M) {
+      unsigned Mask = fieldFromInstruction(Val, 10, 2);
+      if (!(FeatureBits & ARM::HasV7Ops)) {
+        // The ARMv6-M MSR bits {11-10} can be only 0b10, other values are
+        // unpredictable.
+        if (Mask != 2)
+          S = MCDisassembler::SoftFail;
+      }
+      else {
+        // The ARMv7-M architecture stores an additional 2-bit mask value in
+        // MSR bits {11-10}. The mask is used only with apsr, iapsr, eapsr and
+        // xpsr, it has to be 0b10 in other cases. Bit mask{1} indicates if
+        // the NZCVQ bits should be moved by the instruction. Bit mask{0}
+        // indicates the move for the GE{3:0} bits, the mask{0} bit can be set
+        // only if the processor includes the DSP extension.
+        if (Mask == 0 || (Mask != 2 && ValLow > 3) ||
+            (!(FeatureBits & ARM::FeatureDSPThumb2) && (Mask & 1)))
+          S = MCDisassembler::SoftFail;
+      }
+    }
+  } else {
+    // A/R class
+    if (Val == 0)
+      return MCDisassembler::Fail;
+  }
+  Inst.addOperand(MCOperand::CreateImm(Val));
+  return S;
+}
+
+static DecodeStatus DecodeBankedReg(MCInst &Inst, unsigned Val,
+                                    uint64_t Address, const void *Decoder) {
+
+  unsigned R = fieldFromInstruction(Val, 5, 1);
+  unsigned SysM = fieldFromInstruction(Val, 0, 5);
+
+  // The table of encodings for these banked registers comes from B9.2.3 of the
+  // ARM ARM. There are patterns, but nothing regular enough to make this logic
+  // neater. So by fiat, these values are UNPREDICTABLE:
+  if (!R) {
+    if (SysM == 0x7 || SysM == 0xf || SysM == 0x18 || SysM == 0x19 ||
+        SysM == 0x1a || SysM == 0x1b)
+      return MCDisassembler::SoftFail;
+  } else {
+    if (SysM != 0xe && SysM != 0x10 && SysM != 0x12 && SysM != 0x14 &&
+        SysM != 0x16 && SysM != 0x1c && SysM != 0x1e)
+      return MCDisassembler::SoftFail;
+  }
+
   Inst.addOperand(MCOperand::CreateImm(Val));
   return MCDisassembler::Success;
 }
diff --git a/lib/Target/ARM/Disassembler/LLVMBuild.txt b/lib/Target/ARM/Disassembler/LLVMBuild.txt
index 52d8338..a64a8a9 100644
--- a/lib/Target/ARM/Disassembler/LLVMBuild.txt
+++ b/lib/Target/ARM/Disassembler/LLVMBuild.txt
@@ -19,5 +19,5 @@
 type = Library
 name = ARMDisassembler
 parent = ARM
-required_libraries = ARMDesc ARMInfo MC Support
+required_libraries = ARMDesc ARMInfo MCDisassembler Support
 add_to_library_groups = ARM
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 228fb57..0570084 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -503,30 +503,6 @@ void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI,
 // Addressing Mode #3
 //===--------------------------------------------------------------------===//
 
-void ARMInstPrinter::printAM3PostIndexOp(const MCInst *MI, unsigned Op,
-                                         raw_ostream &O) {
-  const MCOperand &MO1 = MI->getOperand(Op);
-  const MCOperand &MO2 = MI->getOperand(Op+1);
-  const MCOperand &MO3 = MI->getOperand(Op+2);
-
-  O << markup("<mem:") << "[";
-  printRegName(O, MO1.getReg());
-  O << "], " << markup(">");
-
-  if (MO2.getReg()) {
-    O << (char)ARM_AM::getAM3Op(MO3.getImm());
-    printRegName(O, MO2.getReg());
-    return;
-  }
-
-  unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm());
-  O << markup("<imm:")
-    << '#'
-    << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm()))
-    << ImmOffs
-    << markup(">");
-}
-
 void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
                                                 raw_ostream &O,
                                                 bool AlwaysPrintImm0) {
@@ -568,13 +544,9 @@ void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op,
     return;
   }
 
-  const MCOperand &MO3 = MI->getOperand(Op+2);
-  unsigned IdxMode = ARM_AM::getAM3IdxMode(MO3.getImm());
-
-  if (IdxMode == ARMII::IndexModePost) {
-    printAM3PostIndexOp(MI, Op, O);
-    return;
-  }
+  assert(ARM_AM::getAM3IdxMode(MI->getOperand(Op + 2).getImm()) !=
+             ARMII::IndexModePost &&
+         "unexpected idxmode");
   printAM3PreOrOffsetIndexOp(MI, Op, O, AlwaysPrintImm0);
 }
 
@@ -807,52 +779,56 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
   const MCOperand &Op = MI->getOperand(OpNum);
   unsigned SpecRegRBit = Op.getImm() >> 4;
   unsigned Mask = Op.getImm() & 0xf;
+  uint64_t FeatureBits = getAvailableFeatures();
 
-  if (getAvailableFeatures() & ARM::FeatureMClass) {
+  if (FeatureBits & ARM::FeatureMClass) {
     unsigned SYSm = Op.getImm();
     unsigned Opcode = MI->getOpcode();
-    // For reads of the special registers ignore the "mask encoding" bits
-    // which are only for writes.
-    if (Opcode == ARM::t2MRS_M)
-      SYSm &= 0xff;
+
+    // For writes, handle extended mask bits if the DSP extension is present.
+    if (Opcode == ARM::t2MSR_M && (FeatureBits & ARM::FeatureDSPThumb2)) {
+      switch (SYSm) {
+      case 0x400: O << "apsr_g"; return;
+      case 0xc00: O << "apsr_nzcvqg"; return;
+      case 0x401: O << "iapsr_g"; return;
+      case 0xc01: O << "iapsr_nzcvqg"; return;
+      case 0x402: O << "eapsr_g"; return;
+      case 0xc02: O << "eapsr_nzcvqg"; return;
+      case 0x403: O << "xpsr_g"; return;
+      case 0xc03: O << "xpsr_nzcvqg"; return;
+      }
+    }
+
+    // Handle the basic 8-bit mask.
+    SYSm &= 0xff;
+
+    if (Opcode == ARM::t2MSR_M && (FeatureBits & ARM::HasV7Ops)) {
+      // ARMv7-M deprecates using MSR APSR without a _<bits> qualifier as an
+      // alias for MSR APSR_nzcvq.
+      switch (SYSm) {
+      case 0: O << "apsr_nzcvq"; return;
+      case 1: O << "iapsr_nzcvq"; return;
+      case 2: O << "eapsr_nzcvq"; return;
+      case 3: O << "xpsr_nzcvq"; return;
+      }
+    }
+
     switch (SYSm) {
     default: llvm_unreachable("Unexpected mask value!");
-    case     0:
-    case 0x800: O << "apsr"; return; // with _nzcvq bits is an alias for aspr
-    case 0x400: O << "apsr_g"; return;
-    case 0xc00: O << "apsr_nzcvqg"; return;
-    case     1:
-    case 0x801: O << "iapsr"; return; // with _nzcvq bits is an alias for iapsr
-    case 0x401: O << "iapsr_g"; return;
-    case 0xc01: O << "iapsr_nzcvqg"; return;
-    case     2:
-    case 0x802: O << "eapsr"; return; // with _nzcvq bits is an alias for eapsr
-    case 0x402: O << "eapsr_g"; return;
-    case 0xc02: O << "eapsr_nzcvqg"; return;
-    case     3:
-    case 0x803: O << "xpsr"; return; // with _nzcvq bits is an alias for xpsr
-    case 0x403: O << "xpsr_g"; return;
-    case 0xc03: O << "xpsr_nzcvqg"; return;
-    case     5:
-    case 0x805: O << "ipsr"; return;
-    case     6:
-    case 0x806: O << "epsr"; return;
-    case     7:
-    case 0x807: O << "iepsr"; return;
-    case     8:
-    case 0x808: O << "msp"; return;
-    case     9:
-    case 0x809: O << "psp"; return;
-    case  0x10:
-    case 0x810: O << "primask"; return;
-    case  0x11:
-    case 0x811: O << "basepri"; return;
-    case  0x12:
-    case 0x812: O << "basepri_max"; return;
-    case  0x13:
-    case 0x813: O << "faultmask"; return;
-    case  0x14:
-    case 0x814: O << "control"; return;
+    case  0: O << "apsr"; return;
+    case  1: O << "iapsr"; return;
+    case  2: O << "eapsr"; return;
+    case  3: O << "xpsr"; return;
+    case  5: O << "ipsr"; return;
+    case  6: O << "epsr"; return;
+    case  7: O << "iepsr"; return;
+    case  8: O << "msp"; return;
+    case  9: O << "psp"; return;
+    case 16: O << "primask"; return;
+    case 17: O << "basepri"; return;
+    case 18: O << "basepri_max"; return;
+    case 19: O << "faultmask"; return;
+    case 20: O << "control"; return;
     }
   }
 
@@ -882,6 +858,42 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
   }
 }
 
+void ARMInstPrinter::printBankedRegOperand(const MCInst *MI, unsigned OpNum,
+                                           raw_ostream &O) {
+  uint32_t Banked = MI->getOperand(OpNum).getImm();
+  uint32_t R = (Banked & 0x20) >> 5;
+  uint32_t SysM = Banked & 0x1f;
+
+  // Nothing much we can do about this, the encodings are specified in B9.2.3 of
+  // the ARM ARM v7C, and are all over the shop.
+  if (R) {
+    O << "SPSR_";
+
+    switch(SysM) {
+    case 0x0e: O << "fiq"; return;
+    case 0x10: O << "irq"; return;
+    case 0x12: O << "svc"; return;
+    case 0x14: O << "abt"; return;
+    case 0x16: O << "und"; return;
+    case 0x1c: O << "mon"; return;
+    case 0x1e: O << "hyp"; return;
+    default: llvm_unreachable("Invalid banked SPSR register");
+    }
+  }
+
+  assert(!R && "should have dealt with SPSR regs");
+  const char *RegNames[] = {
+    "r8_usr", "r9_usr", "r10_usr", "r11_usr", "r12_usr", "sp_usr", "lr_usr", "",
+    "r8_fiq", "r9_fiq", "r10_fiq", "r11_fiq", "r12_fiq", "sp_fiq", "lr_fiq", "",
+    "lr_irq", "sp_irq", "lr_svc",  "sp_svc",  "lr_abt",  "sp_abt", "lr_und", "sp_und",
+    "",       "",       "",        "",        "lr_mon",  "sp_mon", "elr_hyp", "sp_hyp"
+  };
+  const char *Name = RegNames[SysM];
+  assert(Name[0] && "invalid banked register operand");
+
+  O << Name;
+}
+
 void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum,
                                            raw_ostream &O) {
   ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index f671fe4..09fd536 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef ARMINSTPRINTER_H
-#define ARMINSTPRINTER_H
+#ifndef LLVM_LIB_TARGET_ARM_INSTPRINTER_ARMINSTPRINTER_H
+#define LLVM_LIB_TARGET_ARM_INSTPRINTER_ARMINSTPRINTER_H
 
 #include "llvm/MC/MCInstPrinter.h"
 #include "llvm/MC/MCSubtargetInfo.h"
@@ -51,7 +51,6 @@ public:
   void printAddrMode3Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum,
                                    raw_ostream &O);
-  void printAM3PostIndexOp(const MCInst *MI, unsigned Op, raw_ostream &O);
   void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, raw_ostream &O,
                                   bool AlwaysPrintImm0);
   void printPostIdxImm8Operand(const MCInst *MI, unsigned OpNum,
@@ -117,6 +116,7 @@ public:
   void printCPSIMod(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printCPSIFlag(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printMSRMaskOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printBankedRegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printMandatoryPredicateOperand(const MCInst *MI, unsigned OpNum,
                                       raw_ostream &O);
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
index b6c85c2..f0eed9b 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TARGET_ARM_ARMADDRESSINGMODES_H
-#define LLVM_TARGET_ARM_ARMADDRESSINGMODES_H
+#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMADDRESSINGMODES_H
+#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMADDRESSINGMODES_H
 
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
@@ -575,6 +575,53 @@ namespace ARM_AM {
     return Val;
   }
 
+  // Generic validation for single-byte immediate (0X00, 00X0, etc).
+  static inline bool isNEONBytesplat(unsigned Value, unsigned Size) {
+    assert(Size >= 1 && Size <= 4 && "Invalid size");
+    unsigned count = 0;
+    for (unsigned i = 0; i < Size; ++i) {
+      if (Value & 0xff) count++;
+      Value >>= 8;
+    }
+    return count == 1;
+  }
+
+  /// Checks if Value is a correct immediate for instructions like VBIC/VORR.
+  static inline bool isNEONi16splat(unsigned Value) {
+    if (Value > 0xffff)
+      return false;
+    // i16 value with set bits only in one byte X0 or 0X.
+    return Value == 0 || isNEONBytesplat(Value, 2);
+  }
+
+  // Encode NEON 16 bits Splat immediate for instructions like VBIC/VORR
+  static inline unsigned encodeNEONi16splat(unsigned Value) {
+    assert(isNEONi16splat(Value) && "Invalid NEON splat value");
+    if (Value >= 0x100)
+      Value = (Value >> 8) | 0xa00;
+    else
+      Value |= 0x800;
+    return Value;
+  }
+
+  /// Checks if Value is a correct immediate for instructions like VBIC/VORR.
+  static inline bool isNEONi32splat(unsigned Value) {
+    // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X.
+    return Value == 0 || isNEONBytesplat(Value, 4);
+  }
+
+  /// Encode NEON 32 bits Splat immediate for instructions like VBIC/VORR.
+  static inline unsigned encodeNEONi32splat(unsigned Value) {
+    assert(isNEONi32splat(Value) && "Invalid NEON splat value");
+    if (Value >= 0x100 && Value <= 0xff00)
+      Value = (Value >> 8) | 0x200;
+    else if (Value > 0xffff && Value <= 0xff0000)
+      Value = (Value >> 16) | 0x400;
+    else if (Value > 0xffffff)
+      Value = (Value >> 24) | 0x600;
+    return Value;
+  }
+
   AMSubMode getLoadStoreMultipleSubMode(int Opcode);
 
   //===--------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/MCTargetDesc/ARMArchName.h b/lib/Target/ARM/MCTargetDesc/ARMArchName.h
index 34b9fc1..bc05673 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMArchName.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMArchName.h
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef ARMARCHNAME_H
-#define ARMARCHNAME_H
+#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMARCHNAME_H
+#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMARCHNAME_H
 
 namespace llvm {
 namespace ARM {
@@ -24,4 +24,4 @@ enum ArchKind {
 } // namespace ARM
 } // namespace llvm
 
-#endif // ARMARCHNAME_H
+#endif
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 7acd9cc..0b2e3b0 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -9,6 +9,10 @@
 
 #include "MCTargetDesc/ARMMCTargetDesc.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMAsmBackend.h"
+#include "MCTargetDesc/ARMAsmBackendDarwin.h"
+#include "MCTargetDesc/ARMAsmBackendELF.h"
+#include "MCTargetDesc/ARMAsmBackendWinCOFF.h"
 #include "MCTargetDesc/ARMBaseInfo.h"
 #include "MCTargetDesc/ARMFixupKinds.h"
 #include "llvm/ADT/StringSwitch.h"
@@ -35,164 +39,136 @@ namespace {
 class ARMELFObjectWriter : public MCELFObjectTargetWriter {
 public:
   ARMELFObjectWriter(uint8_t OSABI)
-    : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSABI, ELF::EM_ARM,
-                              /*HasRelocationAddend*/ false) {}
+      : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSABI, ELF::EM_ARM,
+                                /*HasRelocationAddend*/ false) {}
 };
 
-class ARMAsmBackend : public MCAsmBackend {
-  const MCSubtargetInfo* STI;
-  bool isThumbMode;     // Currently emitting Thumb code.
-  bool IsLittleEndian;  // Big or little endian.
-public:
-  ARMAsmBackend(const Target &T, const StringRef TT, bool IsLittle)
-    : MCAsmBackend(), STI(ARM_MC::createARMMCSubtargetInfo(TT, "", "")),
-      isThumbMode(TT.startswith("thumb")), IsLittleEndian(IsLittle) {}
-
-  ~ARMAsmBackend() {
-    delete STI;
-  }
-
-  unsigned getNumFixupKinds() const override {
-    return ARM::NumTargetFixupKinds;
-  }
-
-  bool hasNOP() const {
-    return (STI->getFeatureBits() & ARM::HasV6T2Ops) != 0;
-  }
+const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
+  const static MCFixupKindInfo InfosLE[ARM::NumTargetFixupKinds] = {
+      // This table *must* be in the order that the fixup_* kinds are defined in
+      // ARMFixupKinds.h.
+      //
+      // Name                      Offset (bits) Size (bits)     Flags
+      {"fixup_arm_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_t2_ldst_pcrel_12", 0, 32,
+       MCFixupKindInfo::FKF_IsPCRel |
+           MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+      {"fixup_arm_pcrel_10_unscaled", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_arm_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_t2_pcrel_10", 0, 32,
+       MCFixupKindInfo::FKF_IsPCRel |
+           MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+      {"fixup_thumb_adr_pcrel_10", 0, 8,
+       MCFixupKindInfo::FKF_IsPCRel |
+           MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+      {"fixup_arm_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_t2_adr_pcrel_12", 0, 32,
+       MCFixupKindInfo::FKF_IsPCRel |
+           MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+      {"fixup_arm_condbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_arm_uncondbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_t2_condbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_t2_uncondbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_arm_thumb_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_arm_uncondbl", 0, 24, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_arm_condbl", 0, 24, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_arm_blx", 0, 24, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_arm_thumb_blx", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_arm_thumb_cb", 0, 16, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_arm_thumb_cp", 0, 8,
+       MCFixupKindInfo::FKF_IsPCRel |
+           MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+      {"fixup_arm_thumb_bcc", 0, 8, MCFixupKindInfo::FKF_IsPCRel},
+      // movw / movt: 16-bits immediate but scattered into two chunks 0 - 12, 16
+      // - 19.
+      {"fixup_arm_movt_hi16", 0, 20, 0},
+      {"fixup_arm_movw_lo16", 0, 20, 0},
+      {"fixup_t2_movt_hi16", 0, 20, 0},
+      {"fixup_t2_movw_lo16", 0, 20, 0},
+  };
+  const static MCFixupKindInfo InfosBE[ARM::NumTargetFixupKinds] = {
+      // This table *must* be in the order that the fixup_* kinds are defined in
+      // ARMFixupKinds.h.
+      //
+      // Name                      Offset (bits) Size (bits)     Flags
+      {"fixup_arm_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_t2_ldst_pcrel_12", 0, 32,
+       MCFixupKindInfo::FKF_IsPCRel |
+           MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+      {"fixup_arm_pcrel_10_unscaled", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_arm_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_t2_pcrel_10", 0, 32,
+       MCFixupKindInfo::FKF_IsPCRel |
+           MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+      {"fixup_thumb_adr_pcrel_10", 8, 8,
+       MCFixupKindInfo::FKF_IsPCRel |
+           MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+      {"fixup_arm_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_t2_adr_pcrel_12", 0, 32,
+       MCFixupKindInfo::FKF_IsPCRel |
+           MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+      {"fixup_arm_condbranch", 8, 24, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_arm_uncondbranch", 8, 24, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_t2_condbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_t2_uncondbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_arm_thumb_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_arm_uncondbl", 8, 24, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_arm_condbl", 8, 24, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_arm_blx", 8, 24, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_arm_thumb_blx", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_arm_thumb_cb", 0, 16, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_arm_thumb_cp", 8, 8,
+       MCFixupKindInfo::FKF_IsPCRel |
+           MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+      {"fixup_arm_thumb_bcc", 8, 8, MCFixupKindInfo::FKF_IsPCRel},
+      // movw / movt: 16-bits immediate but scattered into two chunks 0 - 12, 16
+      // - 19.
+      {"fixup_arm_movt_hi16", 12, 20, 0},
+      {"fixup_arm_movw_lo16", 12, 20, 0},
+      {"fixup_t2_movt_hi16", 12, 20, 0},
+      {"fixup_t2_movw_lo16", 12, 20, 0},
+  };
+
+  if (Kind < FirstTargetFixupKind)
+    return MCAsmBackend::getFixupKindInfo(Kind);
+
+  assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+         "Invalid kind!");
+  return (IsLittleEndian ? InfosLE : InfosBE)[Kind - FirstTargetFixupKind];
+}
 
-  const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override {
-    const static MCFixupKindInfo InfosLE[ARM::NumTargetFixupKinds] = {
-// This table *must* be in the order that the fixup_* kinds are defined in
-// ARMFixupKinds.h.
-//
-// Name                      Offset (bits) Size (bits)     Flags
-{ "fixup_arm_ldst_pcrel_12", 0,            32,  MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_t2_ldst_pcrel_12",  0,            32,  MCFixupKindInfo::FKF_IsPCRel |
-                                   MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
-{ "fixup_arm_pcrel_10_unscaled", 0,        32,  MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_pcrel_10",      0,            32,  MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_t2_pcrel_10",       0,            32,  MCFixupKindInfo::FKF_IsPCRel |
-                                   MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
-{ "fixup_thumb_adr_pcrel_10",0,            8,   MCFixupKindInfo::FKF_IsPCRel |
-                                   MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
-{ "fixup_arm_adr_pcrel_12",  0,            32,  MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_t2_adr_pcrel_12",   0,            32,  MCFixupKindInfo::FKF_IsPCRel |
-                                   MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
-{ "fixup_arm_condbranch",    0,            24,  MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_uncondbranch",  0,            24,  MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_t2_condbranch",     0,            32,  MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_t2_uncondbranch",   0,            32,  MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_thumb_br",      0,            16,  MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_uncondbl",      0,            24,  MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_condbl",        0,            24,  MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_blx",           0,            24,  MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_thumb_bl",      0,            32,  MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_thumb_blx",     0,            32,  MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_thumb_cb",      0,            16,  MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_thumb_cp",      0,             8,  MCFixupKindInfo::FKF_IsPCRel |
-                                   MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
-{ "fixup_arm_thumb_bcc",     0,             8,  MCFixupKindInfo::FKF_IsPCRel },
-// movw / movt: 16-bits immediate but scattered into two chunks 0 - 12, 16 - 19.
-{ "fixup_arm_movt_hi16",     0,            20,  0 },
-{ "fixup_arm_movw_lo16",     0,            20,  0 },
-{ "fixup_t2_movt_hi16",      0,            20,  0 },
-{ "fixup_t2_movw_lo16",      0,            20,  0 },
-    };
-    const static MCFixupKindInfo InfosBE[ARM::NumTargetFixupKinds] = {
-// This table *must* be in the order that the fixup_* kinds are defined in
-// ARMFixupKinds.h.
-//
-// Name                      Offset (bits) Size (bits)     Flags
-{ "fixup_arm_ldst_pcrel_12", 0,            32,  MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_t2_ldst_pcrel_12",  0,            32,  MCFixupKindInfo::FKF_IsPCRel |
-                                   MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
-{ "fixup_arm_pcrel_10_unscaled", 0,        32,  MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_pcrel_10",      0,            32,  MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_t2_pcrel_10",       0,            32,  MCFixupKindInfo::FKF_IsPCRel |
-                                   MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
-{ "fixup_thumb_adr_pcrel_10",8,            8,   MCFixupKindInfo::FKF_IsPCRel |
-                                   MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
-{ "fixup_arm_adr_pcrel_12",  0,            32,  MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_t2_adr_pcrel_12",   0,            32,  MCFixupKindInfo::FKF_IsPCRel |
-                                   MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
-{ "fixup_arm_condbranch",    8,            24,  MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_uncondbranch",  8,            24,  MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_t2_condbranch",     0,            32,  MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_t2_uncondbranch",   0,            32,  MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_thumb_br",      0,            16,  MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_uncondbl",      8,            24,  MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_condbl",        8,            24,  MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_blx",           8,            24,  MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_thumb_bl",      0,            32,  MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_thumb_blx",     0,            32,  MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_thumb_cb",      0,            16,  MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_thumb_cp",      8,             8,  MCFixupKindInfo::FKF_IsPCRel |
-                                   MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
-{ "fixup_arm_thumb_bcc",     8,             8,  MCFixupKindInfo::FKF_IsPCRel },
-// movw / movt: 16-bits immediate but scattered into two chunks 0 - 12, 16 - 19.
-{ "fixup_arm_movt_hi16",     12,           20,  0 },
-{ "fixup_arm_movw_lo16",     12,           20,  0 },
-{ "fixup_t2_movt_hi16",      12,           20,  0 },
-{ "fixup_t2_movw_lo16",      12,           20,  0 },
-    };
-
-    if (Kind < FirstTargetFixupKind)
-      return MCAsmBackend::getFixupKindInfo(Kind);
-
-    assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
-           "Invalid kind!");
-    return (IsLittleEndian ? InfosLE : InfosBE)[Kind - FirstTargetFixupKind];
-  }
-
-  /// processFixupValue - Target hook to process the literal value of a fixup
-  /// if necessary.
-  void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout,
-                         const MCFixup &Fixup, const MCFragment *DF,
-                         const MCValue &Target, uint64_t &Value,
-                         bool &IsResolved) override;
-
-
-  void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
-                  uint64_t Value, bool IsPCRel) const override;
-
-  bool mayNeedRelaxation(const MCInst &Inst) const override;
-
-  bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
-                            const MCRelaxableFragment *DF,
-                            const MCAsmLayout &Layout) const override;
-
-  void relaxInstruction(const MCInst &Inst, MCInst &Res) const override;
-
-  bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override;
-
-  void handleAssemblerFlag(MCAssemblerFlag Flag) override {
-    switch (Flag) {
-    default: break;
-    case MCAF_Code16:
-      setIsThumb(true);
-      break;
-    case MCAF_Code32:
-      setIsThumb(false);
-      break;
-    }
+void ARMAsmBackend::handleAssemblerFlag(MCAssemblerFlag Flag) {
+  switch (Flag) {
+  default:
+    break;
+  case MCAF_Code16:
+    setIsThumb(true);
+    break;
+  case MCAF_Code32:
+    setIsThumb(false);
+    break;
   }
-
-  unsigned getPointerSize() const { return 4; }
-  bool isThumb() const { return isThumbMode; }
-  void setIsThumb(bool it) { isThumbMode = it; }
-  bool isLittle() const { return IsLittleEndian; }
-};
+}
 } // end anonymous namespace
 
 static unsigned getRelaxedOpcode(unsigned Op) {
   switch (Op) {
-  default: return Op;
-  case ARM::tBcc:       return ARM::t2Bcc;
-  case ARM::tLDRpci:    return ARM::t2LDRpci;
-  case ARM::tADR:       return ARM::t2ADR;
-  case ARM::tB:         return ARM::t2B;
-  case ARM::tCBZ:       return ARM::tHINT;
-  case ARM::tCBNZ:      return ARM::tHINT;
+  default:
+    return Op;
+  case ARM::tBcc:
+    return ARM::t2Bcc;
+  case ARM::tLDRpci:
+    return ARM::t2LDRpci;
+  case ARM::tADR:
+    return ARM::t2ADR;
+  case ARM::tB:
+    return ARM::t2B;
+  case ARM::tCBZ:
+    return ARM::tHINT;
+  case ARM::tCBNZ:
+    return ARM::tHINT;
   }
 }
 
@@ -202,8 +178,7 @@ bool ARMAsmBackend::mayNeedRelaxation(const MCInst &Inst) const {
   return false;
 }
 
-bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
-                                         uint64_t Value,
+bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
                                          const MCRelaxableFragment *DF,
                                          const MCAsmLayout &Layout) const {
   switch ((unsigned)Fixup.getKind()) {
@@ -265,7 +240,7 @@ void ARMAsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const {
     Res.addOperand(MCOperand::CreateImm(14));
     Res.addOperand(MCOperand::CreateReg(0));
     return;
-  } 
+  }
 
   // The rest of instructions we're relaxing have the same operands.
   // We just need to update to the proper opcode.
@@ -276,11 +251,11 @@ void ARMAsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const {
 bool ARMAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
   const uint16_t Thumb1_16bitNopEncoding = 0x46c0; // using MOV r8,r8
   const uint16_t Thumb2_16bitNopEncoding = 0xbf00; // NOP
-  const uint32_t ARMv4_NopEncoding = 0xe1a00000; // using MOV r0,r0
+  const uint32_t ARMv4_NopEncoding = 0xe1a00000;   // using MOV r0,r0
   const uint32_t ARMv6T2_NopEncoding = 0xe320f000; // NOP
   if (isThumb()) {
-    const uint16_t nopEncoding = hasNOP() ? Thumb2_16bitNopEncoding
-                                          : Thumb1_16bitNopEncoding;
+    const uint16_t nopEncoding =
+        hasNOP() ? Thumb2_16bitNopEncoding : Thumb1_16bitNopEncoding;
     uint64_t NumNops = Count / 2;
     for (uint64_t i = 0; i != NumNops; ++i)
       OW->Write16(nopEncoding);
@@ -289,18 +264,26 @@ bool ARMAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
     return true;
   }
   // ARM mode
-  const uint32_t nopEncoding = hasNOP() ? ARMv6T2_NopEncoding
-                                        : ARMv4_NopEncoding;
+  const uint32_t nopEncoding =
+      hasNOP() ? ARMv6T2_NopEncoding : ARMv4_NopEncoding;
   uint64_t NumNops = Count / 4;
   for (uint64_t i = 0; i != NumNops; ++i)
     OW->Write32(nopEncoding);
   // FIXME: should this function return false when unable to write exactly
   // 'Count' bytes with NOP encodings?
   switch (Count % 4) {
-  default: break; // No leftover bytes to write
-  case 1: OW->Write8(0); break;
-  case 2: OW->Write16(0); break;
-  case 3: OW->Write16(0); OW->Write8(0xa0); break;
+  default:
+    break; // No leftover bytes to write
+  case 1:
+    OW->Write8(0);
+    break;
+  case 2:
+    OW->Write16(0);
+    break;
+  case 3:
+    OW->Write16(0);
+    OW->Write8(0xa0);
+    break;
   }
 
   return true;
@@ -313,8 +296,7 @@ static uint32_t swapHalfWords(uint32_t Value, bool IsLittleEndian) {
     uint32_t Swapped = (Value & 0xFFFF0000) >> 16;
     Swapped |= (Value & 0x0000FFFF) << 16;
     return Swapped;
-  }
-  else
+  } else
     return Value;
 }
 
@@ -351,7 +333,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
   case ARM::fixup_arm_movt_hi16:
     if (!IsPCRel)
       Value >>= 16;
-    // Fallthrough
+  // Fallthrough
   case ARM::fixup_arm_movw_lo16: {
     unsigned Hi4 = (Value & 0xF000) >> 12;
     unsigned Lo12 = Value & 0x0FFF;
@@ -363,7 +345,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
   case ARM::fixup_t2_movt_hi16:
     if (!IsPCRel)
       Value >>= 16;
-    // Fallthrough
+  // Fallthrough
   case ARM::fixup_t2_movw_lo16: {
     unsigned Hi4 = (Value & 0xF000) >> 12;
     unsigned i = (Value & 0x800) >> 11;
@@ -379,7 +361,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
   case ARM::fixup_arm_ldst_pcrel_12:
     // ARM PC-relative values are offset by 8.
     Value -= 4;
-    // FALLTHROUGH
+  // FALLTHROUGH
   case ARM::fixup_t2_ldst_pcrel_12: {
     // Offset by 4, adjusted by two due to the half-word ordering of thumb.
     Value -= 4;
@@ -438,7 +420,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
   case ARM::fixup_arm_blx:
     // These values don't encode the low two bits since they're always zero.
     // Offset by 8 just as above.
-    if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(Fixup.getValue()))
+    if (const MCSymbolRefExpr *SRE =
+            dyn_cast<MCSymbolRefExpr>(Fixup.getValue()))
       if (SRE->getKind() == MCSymbolRefExpr::VK_ARM_TLSCALL)
         return 0;
     return 0xffffff & ((Value - 8) >> 2);
@@ -447,17 +430,17 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
     Value >>= 1; // Low bit is not encoded.
 
     uint32_t out = 0;
-    bool I =  Value & 0x800000;
+    bool I = Value & 0x800000;
     bool J1 = Value & 0x400000;
     bool J2 = Value & 0x200000;
     J1 ^= I;
     J2 ^= I;
 
-    out |= I  << 26; // S bit
-    out |= !J1 << 13; // J1 bit
-    out |= !J2 << 11; // J2 bit
-    out |= (Value & 0x1FF800)  << 5; // imm6 field
-    out |= (Value & 0x0007FF);        // imm11 field
+    out |= I << 26;                 // S bit
+    out |= !J1 << 13;               // J1 bit
+    out |= !J2 << 11;               // J2 bit
+    out |= (Value & 0x1FF800) << 5; // imm6 field
+    out |= (Value & 0x0007FF);      // imm11 field
 
     return swapHalfWords(out, IsLittleEndian);
   }
@@ -498,7 +481,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
 
     uint32_t FirstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10Bits);
     uint32_t SecondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) |
-                          (uint16_t)imm11Bits);
+                           (uint16_t)imm11Bits);
     return joinHalfWords(FirstHalf, SecondHalf, IsLittleEndian);
   }
   case ARM::fixup_arm_thumb_blx: {
@@ -515,7 +498,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
     // Note that the halfwords are stored high first, low second; so we need
     // to transpose the fixup value here to map properly.
     uint32_t offset = (Value - 2) >> 2;
-    if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(Fixup.getValue()))
+    if (const MCSymbolRefExpr *SRE =
+            dyn_cast<MCSymbolRefExpr>(Fixup.getValue()))
       if (SRE->getKind() == MCSymbolRefExpr::VK_ARM_TLSCALL)
         offset = 0;
     uint32_t signBit = (offset & 0x400000) >> 22;
@@ -528,7 +512,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
 
     uint32_t FirstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10HBits);
     uint32_t SecondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) |
-                          ((uint16_t)imm10LBits) << 1);
+                           ((uint16_t)imm10LBits) << 1);
     return joinHalfWords(FirstHalf, SecondHalf, IsLittleEndian);
   }
   case ARM::fixup_arm_thumb_cp:
@@ -564,7 +548,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
   case ARM::fixup_arm_pcrel_10:
     Value = Value - 4; // ARM fixups offset by an additional word and don't
                        // need to adjust for the half-word ordering.
-    // Fall through.
+                       // Fall through.
   case ARM::fixup_t2_pcrel_10: {
     // Offset by 4, adjusted by two due to the half-word ordering of thumb.
     Value = Value - 4;
@@ -735,7 +719,8 @@ void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
                                bool IsPCRel) const {
   unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
   Value = adjustFixupValue(Fixup, Value, IsPCRel, nullptr, IsLittleEndian);
-  if (!Value) return;           // Doesn't change encoding.
+  if (!Value)
+    return; // Doesn't change encoding.
 
   unsigned Offset = Fixup.getOffset();
   assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
@@ -757,80 +742,36 @@ void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
   }
 }
 
-namespace {
-// FIXME: This should be in a separate file.
-class ARMWinCOFFAsmBackend : public ARMAsmBackend {
-public:
-  ARMWinCOFFAsmBackend(const Target &T, const StringRef &Triple)
-    : ARMAsmBackend(T, Triple, true) { }
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
-    return createARMWinCOFFObjectWriter(OS, /*Is64Bit=*/false);
-  }
-};
-
-// FIXME: This should be in a separate file.
-// ELF is an ELF of course...
-class ELFARMAsmBackend : public ARMAsmBackend {
-public:
-  uint8_t OSABI;
-  ELFARMAsmBackend(const Target &T, const StringRef TT,
-                   uint8_t OSABI, bool IsLittle)
-    : ARMAsmBackend(T, TT, IsLittle), OSABI(OSABI) { }
-
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
-    return createARMELFObjectWriter(OS, OSABI, isLittle());
-  }
-};
-
-// FIXME: This should be in a separate file.
-class DarwinARMAsmBackend : public ARMAsmBackend {
-public:
-  const MachO::CPUSubTypeARM Subtype;
-  DarwinARMAsmBackend(const Target &T, const StringRef TT,
-                      MachO::CPUSubTypeARM st)
-    : ARMAsmBackend(T, TT, /* IsLittleEndian */ true), Subtype(st) {
-      HasDataInCodeSupport = true;
-    }
-
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
-    return createARMMachObjectWriter(OS, /*Is64Bit=*/false,
-                                     MachO::CPU_TYPE_ARM,
-                                     Subtype);
-  }
-};
-
-} // end anonymous namespace
-
 MCAsmBackend *llvm::createARMAsmBackend(const Target &T,
-                                        const MCRegisterInfo &MRI,
-                                        StringRef TT, StringRef CPU,
-                                        bool isLittle) {
+                                        const MCRegisterInfo &MRI, StringRef TT,
+                                        StringRef CPU, bool isLittle) {
   Triple TheTriple(TT);
 
   switch (TheTriple.getObjectFormat()) {
-  default: llvm_unreachable("unsupported object format");
+  default:
+    llvm_unreachable("unsupported object format");
   case Triple::MachO: {
     MachO::CPUSubTypeARM CS =
-      StringSwitch<MachO::CPUSubTypeARM>(TheTriple.getArchName())
-      .Cases("armv4t", "thumbv4t", MachO::CPU_SUBTYPE_ARM_V4T)
-      .Cases("armv5e", "thumbv5e", MachO::CPU_SUBTYPE_ARM_V5TEJ)
-      .Cases("armv6", "thumbv6", MachO::CPU_SUBTYPE_ARM_V6)
-      .Cases("armv6m", "thumbv6m", MachO::CPU_SUBTYPE_ARM_V6M)
-      .Cases("armv7em", "thumbv7em", MachO::CPU_SUBTYPE_ARM_V7EM)
-      .Cases("armv7k", "thumbv7k", MachO::CPU_SUBTYPE_ARM_V7K)
-      .Cases("armv7m", "thumbv7m", MachO::CPU_SUBTYPE_ARM_V7M)
-      .Cases("armv7s", "thumbv7s", MachO::CPU_SUBTYPE_ARM_V7S)
-      .Default(MachO::CPU_SUBTYPE_ARM_V7);
-
-    return new DarwinARMAsmBackend(T, TT, CS);
+        StringSwitch<MachO::CPUSubTypeARM>(TheTriple.getArchName())
+            .Cases("armv4t", "thumbv4t", MachO::CPU_SUBTYPE_ARM_V4T)
+            .Cases("armv5e", "thumbv5e", MachO::CPU_SUBTYPE_ARM_V5TEJ)
+            .Cases("armv6", "thumbv6", MachO::CPU_SUBTYPE_ARM_V6)
+            .Cases("armv6m", "thumbv6m", MachO::CPU_SUBTYPE_ARM_V6M)
+            .Cases("armv7em", "thumbv7em", MachO::CPU_SUBTYPE_ARM_V7EM)
+            .Cases("armv7k", "thumbv7k", MachO::CPU_SUBTYPE_ARM_V7K)
+            .Cases("armv7m", "thumbv7m", MachO::CPU_SUBTYPE_ARM_V7M)
+            .Cases("armv7s", "thumbv7s", MachO::CPU_SUBTYPE_ARM_V7S)
+            .Default(MachO::CPU_SUBTYPE_ARM_V7);
+
+    return new ARMAsmBackendDarwin(T, TT, CS);
   }
   case Triple::COFF:
     assert(TheTriple.isOSWindows() && "non-Windows ARM COFF is not supported");
-    return new ARMWinCOFFAsmBackend(T, TT);
+    return new ARMAsmBackendWinCOFF(T, TT);
   case Triple::ELF:
     assert(TheTriple.isOSBinFormatELF() && "using ELF for non-ELF target");
     uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS());
-    return new ELFARMAsmBackend(T, TT, OSABI, isLittle);
+    return new ARMAsmBackendELF(T, TT, OSABI, isLittle);
   }
 }
 
@@ -847,14 +788,13 @@ MCAsmBackend *llvm::createARMBEAsmBackend(const Target &T,
 }
 
 MCAsmBackend *llvm::createThumbLEAsmBackend(const Target &T,
-                                          const MCRegisterInfo &MRI,
-                                          StringRef TT, StringRef CPU) {
+                                            const MCRegisterInfo &MRI,
+                                            StringRef TT, StringRef CPU) {
   return createARMAsmBackend(T, MRI, TT, CPU, true);
 }
 
 MCAsmBackend *llvm::createThumbBEAsmBackend(const Target &T,
-                                          const MCRegisterInfo &MRI,
-                                          StringRef TT, StringRef CPU) {
+                                            const MCRegisterInfo &MRI,
+                                            StringRef TT, StringRef CPU) {
   return createARMAsmBackend(T, MRI, TT, CPU, false);
 }
-
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
new file mode 100644
index 0000000..f4f1082
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
@@ -0,0 +1,69 @@
+//===-- ARMAsmBackend.h - ARM Assembler Backend -----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ARMASMBACKEND_H
+#define LLVM_LIB_TARGET_ARM_ARMASMBACKEND_H
+
+#include "MCTargetDesc/ARMFixupKinds.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+
+using namespace llvm;
+
+namespace {
+
+class ARMAsmBackend : public MCAsmBackend {
+  const MCSubtargetInfo *STI;
+  bool isThumbMode;    // Currently emitting Thumb code.
+  bool IsLittleEndian; // Big or little endian.
+public:
+  ARMAsmBackend(const Target &T, StringRef TT, bool IsLittle)
+      : MCAsmBackend(), STI(ARM_MC::createARMMCSubtargetInfo(TT, "", "")),
+        isThumbMode(TT.startswith("thumb")), IsLittleEndian(IsLittle) {}
+
+  ~ARMAsmBackend() override { delete STI; }
+
+  unsigned getNumFixupKinds() const override {
+    return ARM::NumTargetFixupKinds;
+  }
+
+  bool hasNOP() const { return (STI->getFeatureBits() & ARM::HasV6T2Ops) != 0; }
+
+  const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
+
+  /// processFixupValue - Target hook to process the literal value of a fixup
+  /// if necessary.
+  void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout,
+                         const MCFixup &Fixup, const MCFragment *DF,
+                         const MCValue &Target, uint64_t &Value,
+                         bool &IsResolved) override;
+
+  void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+                  uint64_t Value, bool IsPCRel) const override;
+
+  bool mayNeedRelaxation(const MCInst &Inst) const override;
+
+  bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
+                            const MCRelaxableFragment *DF,
+                            const MCAsmLayout &Layout) const override;
+
+  void relaxInstruction(const MCInst &Inst, MCInst &Res) const override;
+
+  bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override;
+
+  void handleAssemblerFlag(MCAssemblerFlag Flag) override;
+
+  unsigned getPointerSize() const { return 4; }
+  bool isThumb() const { return isThumbMode; }
+  void setIsThumb(bool it) { isThumbMode = it; }
+  bool isLittle() const { return IsLittleEndian; }
+};
+} // end anonymous namespace
+
+#endif
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
new file mode 100644
index 0000000..3bd7ab7
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
@@ -0,0 +1,33 @@
+//===-- ARMAsmBackendDarwin.h   ARM Asm Backend Darwin ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ARMASMBACKENDDARWIN_H
+#define LLVM_LIB_TARGET_ARM_ARMASMBACKENDDARWIN_H
+
+#include "llvm/Support/MachO.h"
+
+using namespace llvm;
+
+namespace {
+class ARMAsmBackendDarwin : public ARMAsmBackend {
+public:
+  const MachO::CPUSubTypeARM Subtype;
+  ARMAsmBackendDarwin(const Target &T, StringRef TT, MachO::CPUSubTypeARM st)
+      : ARMAsmBackend(T, TT, /* IsLittleEndian */ true), Subtype(st) {
+    HasDataInCodeSupport = true;
+  }
+
+  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+    return createARMMachObjectWriter(OS, /*Is64Bit=*/false, MachO::CPU_TYPE_ARM,
+                                     Subtype);
+  }
+};
+}
+
+#endif
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
new file mode 100644
index 0000000..4efd325
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
@@ -0,0 +1,27 @@
+//===-- ARMAsmBackendELF.h  ARM Asm Backend ELF -----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ELFARMASMBACKEND_H
+#define LLVM_LIB_TARGET_ARM_ELFARMASMBACKEND_H
+
+using namespace llvm;
+namespace {
+class ARMAsmBackendELF : public ARMAsmBackend {
+public:
+  uint8_t OSABI;
+  ARMAsmBackendELF(const Target &T, StringRef TT, uint8_t OSABI, bool IsLittle)
+      : ARMAsmBackend(T, TT, IsLittle), OSABI(OSABI) {}
+
+  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+    return createARMELFObjectWriter(OS, OSABI, isLittle());
+  }
+};
+}
+
+#endif
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
new file mode 100644
index 0000000..33be347
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
@@ -0,0 +1,26 @@
+//===-- ARMAsmBackendWinCOFF.h - ARM Asm Backend WinCOFF --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ARMASMBACKENDWINCOFF_H
+#define LLVM_LIB_TARGET_ARM_ARMASMBACKENDWINCOFF_H
+
+using namespace llvm;
+
+namespace {
+class ARMAsmBackendWinCOFF : public ARMAsmBackend {
+public:
+  ARMAsmBackendWinCOFF(const Target &T, StringRef Triple)
+      : ARMAsmBackend(T, Triple, true) {}
+  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+    return createARMWinCOFFObjectWriter(OS, /*Is64Bit=*/false);
+  }
+};
+}
+
+#endif
diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index 1686d76..4289a73 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -14,8 +14,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef ARMBASEINFO_H
-#define ARMBASEINFO_H
+#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMBASEINFO_H
+#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMBASEINFO_H
 
 #include "ARMMCTargetDesc.h"
 #include "llvm/Support/ErrorHandling.h"
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index 1c84263..f24b419 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -37,7 +37,8 @@ namespace {
     unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
                           bool IsPCRel) const override;
 
-    bool needsRelocateWithSymbol(unsigned Type) const override;
+    bool needsRelocateWithSymbol(const MCSymbolData &SD,
+                                 unsigned Type) const override;
   };
 }
 
@@ -48,7 +49,8 @@ ARMELFObjectWriter::ARMELFObjectWriter(uint8_t OSABI)
 
 ARMELFObjectWriter::~ARMELFObjectWriter() {}
 
-bool ARMELFObjectWriter::needsRelocateWithSymbol(unsigned Type) const {
+bool ARMELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD,
+                                                 unsigned Type) const {
   // FIXME: This is extremelly conservative. This really needs to use a
   // whitelist with a clear explanation for why each realocation needs to
   // point to the symbol, not to the section.
@@ -100,7 +102,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
     case ARM::fixup_arm_uncondbl:
       switch (Modifier) {
       case MCSymbolRefExpr::VK_PLT:
-        Type = ELF::R_ARM_PLT32;
+        Type = ELF::R_ARM_CALL;
         break;
       case MCSymbolRefExpr::VK_ARM_TLSCALL:
         Type = ELF::R_ARM_TLS_CALL;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 7b5d8b0..24ee537 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -848,6 +848,14 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() {
                      /* OverwriteExisting= */ false);
     break;
 
+  // FPV5_D16 is identical to FP_ARMV8 except for the number of D registers, so
+  // uses the FP_ARMV8_D16 build attribute.
+  case ARM::FPV5_D16:
+    setAttributeItem(ARMBuildAttrs::FP_arch,
+                     ARMBuildAttrs::AllowFPARMv8B,
+                     /* OverwriteExisting= */ false);
+    break;
+
   case ARM::NEON:
     setAttributeItem(ARMBuildAttrs::FP_arch,
                      ARMBuildAttrs::AllowFPv3A,
@@ -1339,10 +1347,9 @@ MCStreamer *createARMNullStreamer(MCContext &Ctx) {
   return S;
 }
 
-  MCELFStreamer* createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
-                                      raw_ostream &OS, MCCodeEmitter *Emitter,
-                                      bool RelaxAll, bool NoExecStack,
-                                      bool IsThumb) {
+MCELFStreamer *createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+                                    raw_ostream &OS, MCCodeEmitter *Emitter,
+                                    bool RelaxAll, bool IsThumb) {
     ARMELFStreamer *S = new ARMELFStreamer(Context, TAB, OS, Emitter, IsThumb);
     new ARMTargetELFStreamer(*S);
     // FIXME: This should eventually end up somewhere else where more
@@ -1352,8 +1359,6 @@ MCStreamer *createARMNullStreamer(MCContext &Ctx) {
 
     if (RelaxAll)
       S->getAssembler().setRelaxAll(true);
-    if (NoExecStack)
-      S->getAssembler().setNoExecStack(true);
     return S;
   }
 
diff --git a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
index bfd9e33..46ba571 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_ARM_ARMFIXUPKINDS_H
-#define LLVM_ARM_ARMFIXUPKINDS_H
+#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMFIXUPKINDS_H
+#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMFIXUPKINDS_H
 
 #include "llvm/MC/MCFixup.h"
 
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index 7a19208..1d82099 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -55,7 +55,6 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo(StringRef TT) {
   Code16Directive = ".code\t16";
   Code32Directive = ".code\t32";
 
-  HasLEB128 = true;
   SupportsDebugInformation = true;
 
   // Exceptions handling
@@ -103,7 +102,6 @@ ARMCOFFMCAsmInfoGNU::ARMCOFFMCAsmInfoGNU() {
   Code32Directive = ".code\t32";
   PrivateGlobalPrefix = ".L";
 
-  HasLEB128 = true;
   SupportsDebugInformation = true;
   ExceptionsType = ExceptionHandling::None;
   UseParensForSymbolVariant = true;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
index 51cfa0a..f1fef41 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_ARMTARGETASMINFO_H
-#define LLVM_ARMTARGETASMINFO_H
+#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMMCASMINFO_H
+#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMMCASMINFO_H
 
 #include "llvm/MC/MCAsmInfoCOFF.h"
 #include "llvm/MC/MCAsmInfoDarwin.h"
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
index e545e3c..68d32b2 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
@@ -35,12 +35,6 @@ void ARMMCExpr::PrintImpl(raw_ostream &OS) const {
     OS << ')';
 }
 
-bool
-ARMMCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
-                                     const MCAsmLayout *Layout) const {
-  return false;
-}
-
 void ARMMCExpr::visitUsedExpr(MCStreamer &Streamer) const {
   Streamer.visitUsedExpr(*getSubExpr());
 }
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
index c5c0b10..06bf6c9 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef ARMMCEXPR_H
-#define ARMMCEXPR_H
+#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMMCEXPR_H
+#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMMCEXPR_H
 
 #include "llvm/MC/MCExpr.h"
 
@@ -58,8 +58,11 @@ public:
 
   void PrintImpl(raw_ostream &OS) const override;
   bool EvaluateAsRelocatableImpl(MCValue &Res,
-                                 const MCAsmLayout *Layout) const override;
-  void visitUsedExpr(MCStreamer &Streamer) const override;
+                                 const MCAsmLayout *Layout,
+                                 const MCFixup *Fixup) const override {
+    return false;
+  }
+  void visitUsedExpr(MCStreamer &Streamer) const override; 
   const MCSection *FindAssociatedSection() const override {
     return getSubExpr()->FindAssociatedSection();
   }
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 2b3855d..98190ba 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -84,93 +84,89 @@ static bool getITDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
 std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) {
   Triple triple(TT);
 
-  // Set the boolean corresponding to the current target triple, or the default
-  // if one cannot be determined, to true.
-  unsigned Len = TT.size();
-  unsigned Idx = 0;
-
-  // FIXME: Enhance Triple helper class to extract ARM version.
   bool isThumb = triple.getArch() == Triple::thumb ||
                  triple.getArch() == Triple::thumbeb;
-  if (Len >= 5 && TT.substr(0, 4) == "armv")
-    Idx = 4;
-  else if (Len >= 7 && TT.substr(0, 6) == "armebv")
-    Idx = 6;
-  else if (Len >= 7 && TT.substr(0, 6) == "thumbv")
-    Idx = 6;
-  else if (Len >= 9 && TT.substr(0, 8) == "thumbebv")
-    Idx = 8;
 
   bool NoCPU = CPU == "generic" || CPU.empty();
   std::string ARMArchFeature;
-  if (Idx) {
-    unsigned SubVer = TT[Idx];
-    if (SubVer == '8') {
-      if (NoCPU)
-        // v8a: FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2,
-        //      FeatureMP, FeatureHWDiv, FeatureHWDivARM, FeatureTrustZone,
-        //      FeatureT2XtPk, FeatureCrypto, FeatureCRC
-        ARMArchFeature = "+v8,+db,+fp-armv8,+neon,+t2dsp,+mp,+hwdiv,+hwdiv-arm,"
-                         "+trustzone,+t2xtpk,+crypto,+crc";
-      else
-        // Use CPU to figure out the exact features
-        ARMArchFeature = "+v8";
-    } else if (SubVer == '7') {
-      if (Len >= Idx+2 && TT[Idx+1] == 'm') {
-        isThumb = true;
-        if (NoCPU)
-          // v7m: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureMClass
-          ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+mclass";
-        else
-          // Use CPU to figure out the exact features.
-          ARMArchFeature = "+v7";
-      } else if (Len >= Idx+3 && TT[Idx+1] == 'e'&& TT[Idx+2] == 'm') {
-        if (NoCPU)
-          // v7em: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureDSPThumb2,
-          //       FeatureT2XtPk, FeatureMClass
-          ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+t2dsp,t2xtpk,+mclass";
-        else
-          // Use CPU to figure out the exact features.
-          ARMArchFeature = "+v7";
-      } else if (Len >= Idx+2 && TT[Idx+1] == 's') {
-        if (NoCPU)
-          // v7s: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureHasRAS
-          //      Swift
-          ARMArchFeature = "+v7,+swift,+neon,+db,+t2dsp,+ras";
-        else
-          // Use CPU to figure out the exact features.
-          ARMArchFeature = "+v7";
-      } else {
-        // v7 CPUs have lots of different feature sets. If no CPU is specified,
-        // then assume v7a (e.g. cortex-a8) feature set. Otherwise, return
-        // the "minimum" feature set and use CPU string to figure out the exact
-        // features.
-        if (NoCPU)
-          // v7a: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureT2XtPk
-          ARMArchFeature = "+v7,+neon,+db,+t2dsp,+t2xtpk";
-        else
-          // Use CPU to figure out the exact features.
-          ARMArchFeature = "+v7";
-      }
-    } else if (SubVer == '6') {
-      if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2')
-        ARMArchFeature = "+v6t2";
-      else if (Len >= Idx+2 && TT[Idx+1] == 'm') {
-        isThumb = true;
-        if (NoCPU)
-          // v6m: FeatureNoARM, FeatureMClass
-          ARMArchFeature = "+v6m,+noarm,+mclass";
-        else
-          ARMArchFeature = "+v6";
-      } else
-        ARMArchFeature = "+v6";
-    } else if (SubVer == '5') {
-      if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == 'e')
-        ARMArchFeature = "+v5te";
-      else
-        ARMArchFeature = "+v5t";
-    } else if (SubVer == '4' && Len >= Idx+2 && TT[Idx+1] == 't')
-      ARMArchFeature = "+v4t";
+  switch (triple.getSubArch()) {
+  default:
+    llvm_unreachable("invalid sub-architecture for ARM");
+  case Triple::ARMSubArch_v8:
+    if (NoCPU)
+      // v8a: FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2,
+      //      FeatureMP, FeatureHWDiv, FeatureHWDivARM, FeatureTrustZone,
+      //      FeatureT2XtPk, FeatureCrypto, FeatureCRC
+      ARMArchFeature = "+v8,+db,+fp-armv8,+neon,+t2dsp,+mp,+hwdiv,+hwdiv-arm,"
+                       "+trustzone,+t2xtpk,+crypto,+crc";
+    else
+      // Use CPU to figure out the exact features
+      ARMArchFeature = "+v8";
+    break;
+  case Triple::ARMSubArch_v7m:
+    isThumb = true;
+    if (NoCPU)
+      // v7m: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureMClass
+      ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+mclass";
+    else
+      // Use CPU to figure out the exact features.
+      ARMArchFeature = "+v7";
+    break;
+  case Triple::ARMSubArch_v7em:
+    if (NoCPU)
+      // v7em: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureDSPThumb2,
+      //       FeatureT2XtPk, FeatureMClass
+      ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+t2dsp,t2xtpk,+mclass";
+    else
+      // Use CPU to figure out the exact features.
+      ARMArchFeature = "+v7";
+    break;
+  case Triple::ARMSubArch_v7s:
+    if (NoCPU)
+      // v7s: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureHasRAS
+      //      Swift
+      ARMArchFeature = "+v7,+swift,+neon,+db,+t2dsp,+ras";
+    else
+      // Use CPU to figure out the exact features.
+      ARMArchFeature = "+v7";
+    break;
+  case Triple::ARMSubArch_v7:
+    // v7 CPUs have lots of different feature sets. If no CPU is specified,
+    // then assume v7a (e.g. cortex-a8) feature set. Otherwise, return
+    // the "minimum" feature set and use CPU string to figure out the exact
+    // features.
+    if (NoCPU)
+      // v7a: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureT2XtPk
+      ARMArchFeature = "+v7,+neon,+db,+t2dsp,+t2xtpk";
+    else
+      // Use CPU to figure out the exact features.
+      ARMArchFeature = "+v7";
+    break;
+  case Triple::ARMSubArch_v6t2:
+    ARMArchFeature = "+v6t2";
+    break;
+  case Triple::ARMSubArch_v6m:
+    isThumb = true;
+    if (NoCPU)
+      // v6m: FeatureNoARM, FeatureMClass
+      ARMArchFeature = "+v6m,+noarm,+mclass";
+    else
+      ARMArchFeature = "+v6";
+    break;
+  case Triple::ARMSubArch_v6:
+    ARMArchFeature = "+v6";
+    break;
+  case Triple::ARMSubArch_v5te:
+    ARMArchFeature = "+v5te";
+    break;
+  case Triple::ARMSubArch_v5:
+    ARMArchFeature = "+v5t";
+    break;
+  case Triple::ARMSubArch_v4t:
+    ARMArchFeature = "+v4t";
+    break;
+  case Triple::NoSubArch:
+    break;
   }
 
   if (isThumb) {
@@ -221,31 +217,14 @@ static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) {
   Triple TheTriple(TT);
 
   MCAsmInfo *MAI;
-  switch (TheTriple.getOS()) {
-  case llvm::Triple::Darwin:
-  case llvm::Triple::IOS:
-  case llvm::Triple::MacOSX:
+  if (TheTriple.isOSDarwin() || TheTriple.isOSBinFormatMachO())
     MAI = new ARMMCAsmInfoDarwin(TT);
-    break;
-  case llvm::Triple::Win32:
-    switch (TheTriple.getEnvironment()) {
-    case llvm::Triple::Itanium:
-      MAI = new ARMCOFFMCAsmInfoGNU();
-      break;
-    case llvm::Triple::MSVC:
-      MAI = new ARMCOFFMCAsmInfoMicrosoft();
-      break;
-    default:
-      llvm_unreachable("invalid environment");
-    }
-    break;
-  default:
-    if (TheTriple.isOSBinFormatMachO())
-      MAI = new ARMMCAsmInfoDarwin(TT);
-    else
-      MAI = new ARMELFMCAsmInfo(TT);
-    break;
-  }
+  else if (TheTriple.isWindowsItaniumEnvironment())
+    MAI = new ARMCOFFMCAsmInfoGNU();
+  else if (TheTriple.isWindowsMSVCEnvironment())
+    MAI = new ARMCOFFMCAsmInfoMicrosoft();
+  else
+    MAI = new ARMELFMCAsmInfo(TT);
 
   unsigned Reg = MRI.getDwarfRegNum(ARM::SP, true);
   MAI->addInitialFrameState(MCCFIInstruction::createDefCfa(nullptr, Reg, 0));
@@ -269,11 +248,8 @@ static MCCodeGenInfo *createARMMCCodeGenInfo(StringRef TT, Reloc::Model RM,
 // This is duplicated code. Refactor this.
 static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
                                     MCContext &Ctx, MCAsmBackend &MAB,
-                                    raw_ostream &OS,
-                                    MCCodeEmitter *Emitter,
-                                    const MCSubtargetInfo &STI,
-                                    bool RelaxAll,
-                                    bool NoExecStack) {
+                                    raw_ostream &OS, MCCodeEmitter *Emitter,
+                                    const MCSubtargetInfo &STI, bool RelaxAll) {
   Triple TheTriple(TT);
 
   switch (TheTriple.getObjectFormat()) {
@@ -287,7 +263,7 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
     assert(TheTriple.isOSWindows() && "non-Windows ARM COFF is not supported");
     return createARMWinCOFFStreamer(Ctx, MAB, *Emitter, OS);
   case Triple::ELF:
-    return createARMELFStreamer(Ctx, MAB, OS, Emitter, false, NoExecStack,
+    return createARMELFStreamer(Ctx, MAB, OS, Emitter, false,
                                 TheTriple.getArch() == Triple::thumb);
   }
 }
@@ -362,8 +338,10 @@ extern "C" void LLVMInitializeARMTargetMC() {
   // Register the MC codegen info.
   TargetRegistry::RegisterMCCodeGenInfo(TheARMLETarget, createARMMCCodeGenInfo);
   TargetRegistry::RegisterMCCodeGenInfo(TheARMBETarget, createARMMCCodeGenInfo);
-  TargetRegistry::RegisterMCCodeGenInfo(TheThumbLETarget, createARMMCCodeGenInfo);
-  TargetRegistry::RegisterMCCodeGenInfo(TheThumbBETarget, createARMMCCodeGenInfo);
+  TargetRegistry::RegisterMCCodeGenInfo(TheThumbLETarget,
+                                        createARMMCCodeGenInfo);
+  TargetRegistry::RegisterMCCodeGenInfo(TheThumbBETarget,
+                                        createARMMCCodeGenInfo);
 
   // Register the MC instruction info.
   TargetRegistry::RegisterMCInstrInfo(TheARMLETarget, createARMMCInstrInfo);
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index 5326e56..a6c20d5 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef ARMMCTARGETDESC_H
-#define ARMMCTARGETDESC_H
+#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMMCTARGETDESC_H
+#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMMCTARGETDESC_H
 
 #include "llvm/Support/DataTypes.h"
 #include <string>
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index 186776a..7da5003 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -428,7 +428,7 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
       // For external relocations, make sure to offset the fixup value to
       // compensate for the addend of the symbol address, if it was
       // undefined. This occurs with weak definitions, for example.
-      if (!SD->Symbol->isUndefined())
+      if (!SD->getSymbol().isUndefined())
         FixedValue -= Layout.getSymbolOffset(SD);
     } else {
       // The index is the section ordinal (1-based).
diff --git a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index ad3f1ca..8acd7af 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -28,7 +28,7 @@ ARMTargetStreamer::~ARMTargetStreamer() {}
 // The constant pool handling is shared by all ARMTargetStreamer
 // implementations.
 const MCExpr *ARMTargetStreamer::addConstantPoolEntry(const MCExpr *Expr) {
-  return ConstantPools->addEntry(Streamer, Expr);
+  return ConstantPools->addEntry(Streamer, Expr, 4);
 }
 
 void ARMTargetStreamer::emitCurrentConstantPool() {
diff --git a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
index cd58759..e0c113e 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef ARM_UNWIND_OP_ASM_H
-#define ARM_UNWIND_OP_ASM_H
+#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMUNWINDOPASM_H
+#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMUNWINDOPASM_H
 
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/ARMEHABI.h"
@@ -90,4 +90,4 @@ private:
 
 } // namespace llvm
 
-#endif // ARM_UNWIND_OP_ASM_H
+#endif
diff --git a/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt b/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt
index 2a7fe61..db8fc92 100644
--- a/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt
@@ -19,5 +19,5 @@
 type = Library
 name = ARMDesc
 parent = ARM
-required_libraries = ARMAsmPrinter ARMInfo MC Support
+required_libraries = ARMAsmPrinter ARMInfo MC MCDisassembler Support
 add_to_library_groups = ARM
diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp
index f6d24e9..35fe9b3 100644
--- a/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/lib/Target/ARM/MLxExpansionPass.cpp
@@ -378,8 +378,8 @@ bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) {
 }
 
 bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) {
-  TII = static_cast<const ARMBaseInstrInfo*>(Fn.getTarget().getInstrInfo());
-  TRI = Fn.getTarget().getRegisterInfo();
+  TII = static_cast<const ARMBaseInstrInfo *>(Fn.getSubtarget().getInstrInfo());
+  TRI = Fn.getSubtarget().getRegisterInfo();
   MRI = &Fn.getRegInfo();
   const ARMSubtarget *STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
   isLikeA9 = STI->isLikeA9() || STI->isSwift();
diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile
index f069535..c1601a3 100644
--- a/lib/Target/ARM/Makefile
+++ b/lib/Target/ARM/Makefile
@@ -15,7 +15,7 @@ TARGET = ARM
 BUILT_SOURCES = ARMGenRegisterInfo.inc ARMGenInstrInfo.inc \
 		ARMGenAsmWriter.inc ARMGenAsmMatcher.inc \
                 ARMGenDAGISel.inc ARMGenSubtargetInfo.inc \
-                ARMGenCodeEmitter.inc ARMGenCallingConv.inc \
+                ARMGenCallingConv.inc \
                 ARMGenFastISel.inc ARMGenMCCodeEmitter.inc \
                 ARMGenMCPseudoLowering.inc ARMGenDisassemblerTables.inc
 
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index baa97a7..6deab4f 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -52,9 +52,9 @@ void Thumb1FrameLowering::
 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator I) const {
   const Thumb1InstrInfo &TII =
-    *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo());
-  const Thumb1RegisterInfo *RegInfo =
-    static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo());
+      *static_cast<const Thumb1InstrInfo *>(MF.getSubtarget().getInstrInfo());
+  const Thumb1RegisterInfo *RegInfo = static_cast<const Thumb1RegisterInfo *>(
+      MF.getSubtarget().getRegisterInfo());
   if (!hasReservedCallFrame(MF)) {
     // If we have alloca, convert as follows:
     // ADJCALLSTACKDOWN -> sub, sp, sp, amount
@@ -89,12 +89,15 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   MachineModuleInfo &MMI = MF.getMMI();
   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
-  const Thumb1RegisterInfo *RegInfo =
-    static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo());
+  const Thumb1RegisterInfo *RegInfo = static_cast<const Thumb1RegisterInfo *>(
+      MF.getSubtarget().getRegisterInfo());
   const Thumb1InstrInfo &TII =
-    *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo());
+      *static_cast<const Thumb1InstrInfo *>(MF.getSubtarget().getInstrInfo());
 
-  unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
+  unsigned Align = MF.getTarget()
+                       .getSubtargetImpl()
+                       ->getFrameLowering()
+                       ->getStackAlignment();
   unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
   unsigned NumBytes = MFI->getStackSize();
   assert(NumBytes >= ArgRegsSaveSize &&
@@ -321,12 +324,15 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
   DebugLoc dl = MBBI->getDebugLoc();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  const Thumb1RegisterInfo *RegInfo =
-    static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo());
+  const Thumb1RegisterInfo *RegInfo = static_cast<const Thumb1RegisterInfo *>(
+      MF.getSubtarget().getRegisterInfo());
   const Thumb1InstrInfo &TII =
-    *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo());
+      *static_cast<const Thumb1InstrInfo *>(MF.getSubtarget().getInstrInfo());
 
-  unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
+  unsigned Align = MF.getTarget()
+                       .getSubtargetImpl()
+                       ->getFrameLowering()
+                       ->getStackAlignment();
   unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
   int NumBytes = (int)MFI->getStackSize();
   assert((unsigned)NumBytes >= ArgRegsSaveSize &&
@@ -382,28 +388,65 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
     }
   }
 
-  if (ArgRegsSaveSize) {
-    // Unlike T2 and ARM mode, the T1 pop instruction cannot restore
-    // to LR, and we can't pop the value directly to the PC since
-    // we need to update the SP after popping the value. Therefore, we
-    // pop the old LR into R3 as a temporary.
-
+  bool IsV4PopReturn = false;
+  for (const CalleeSavedInfo &CSI : MFI->getCalleeSavedInfo())
+    if (CSI.getReg() == ARM::LR)
+      IsV4PopReturn = true;
+  IsV4PopReturn &= STI.hasV4TOps() && !STI.hasV5TOps();
+
+  // Unlike T2 and ARM mode, the T1 pop instruction cannot restore
+  // to LR, and we can't pop the value directly to the PC since
+  // we need to update the SP after popping the value. So instead
+  // we have to emit:
+  //   POP {r3}
+  //   ADD sp, #offset
+  //   BX r3
+  // If this would clobber a return value, then generate this sequence instead:
+  //   MOV ip, r3
+  //   POP {r3}
+  //   ADD sp, #offset
+  //   MOV lr, r3
+  //   MOV r3, ip
+  //   BX lr
+  if (ArgRegsSaveSize || IsV4PopReturn) {
     // Get the last instruction, tBX_RET
     MBBI = MBB.getLastNonDebugInstr();
     assert (MBBI->getOpcode() == ARM::tBX_RET);
-    // Epilogue for vararg functions: pop LR to R3 and branch off it.
-    AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
-      .addReg(ARM::R3, RegState::Define);
-
-    emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);
-
-    MachineInstrBuilder MIB =
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg))
-      .addReg(ARM::R3, RegState::Kill);
-    AddDefaultPred(MIB);
-    MIB.copyImplicitOps(&*MBBI);
-    // erase the old tBX_RET instruction
-    MBB.erase(MBBI);
+    DebugLoc dl = MBBI->getDebugLoc();
+
+    if (AFI->getReturnRegsCount() <= 3) {
+      // Epilogue: pop saved LR to R3 and branch off it. 
+      AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
+        .addReg(ARM::R3, RegState::Define);
+
+      emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);
+
+      MachineInstrBuilder MIB =
+        BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX))
+        .addReg(ARM::R3, RegState::Kill);
+      AddDefaultPred(MIB);
+      MIB.copyImplicitOps(&*MBBI);
+      // erase the old tBX_RET instruction
+      MBB.erase(MBBI);
+    } else {
+      AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
+        .addReg(ARM::R12, RegState::Define)
+        .addReg(ARM::R3, RegState::Kill));
+
+      AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
+        .addReg(ARM::R3, RegState::Define);
+
+      emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);
+
+      AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
+        .addReg(ARM::LR, RegState::Define)
+        .addReg(ARM::R3, RegState::Kill));
+
+      AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
+        .addReg(ARM::R3, RegState::Define)
+        .addReg(ARM::R12, RegState::Kill));
+      // Keep the tBX_RET instruction
+    }
   }
 }
 
@@ -417,7 +460,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
 
   DebugLoc DL;
   MachineFunction &MF = *MBB.getParent();
-  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
 
   if (MI != MBB.end()) DL = MI->getDebugLoc();
 
@@ -456,7 +499,7 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
 
   MachineFunction &MF = *MBB.getParent();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
 
   bool isVarArg = AFI->getArgRegsSaveSize() > 0;
   DebugLoc DL = MI->getDebugLoc();
@@ -470,6 +513,9 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
       // Special epilogue for vararg functions. See emitEpilogue
       if (isVarArg)
         continue;
+      // ARMv4T requires BX, see emitEpilogue
+      if (STI.hasV4TOps() && !STI.hasV5TOps())
+        continue;
       Reg = ARM::PC;
       (*MIB).setDesc(TII.get(ARM::tPOP_RET));
       MIB.copyImplicitOps(&*MI);
diff --git a/lib/Target/ARM/Thumb1FrameLowering.h b/lib/Target/ARM/Thumb1FrameLowering.h
index a227f8e..b785b28 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.h
+++ b/lib/Target/ARM/Thumb1FrameLowering.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_ARM_THUMB1FRAMELOWERING_H
-#define LLVM_ARM_THUMB1FRAMELOWERING_H
+#ifndef LLVM_LIB_TARGET_ARM_THUMB1FRAMELOWERING_H
+#define LLVM_LIB_TARGET_ARM_THUMB1FRAMELOWERING_H
 
 #include "ARMFrameLowering.h"
 #include "Thumb1InstrInfo.h"
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index 68cbb5c..8ea912e 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -11,6 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "ARMSubtarget.h"
 #include "Thumb1InstrInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -41,10 +42,30 @@ void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                                   MachineBasicBlock::iterator I, DebugLoc DL,
                                   unsigned DestReg, unsigned SrcReg,
                                   bool KillSrc) const {
-  AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
-    .addReg(SrcReg, getKillRegState(KillSrc)));
+  // Need to check the arch.
+  MachineFunction &MF = *MBB.getParent();
+  const ARMSubtarget &st = MF.getTarget().getSubtarget<ARMSubtarget>();
+
   assert(ARM::GPRRegClass.contains(DestReg, SrcReg) &&
          "Thumb1 can only copy GPR registers");
+
+  if (st.hasV6Ops() || ARM::hGPRRegClass.contains(SrcReg)
+      || !ARM::tGPRRegClass.contains(DestReg))
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
+      .addReg(SrcReg, getKillRegState(KillSrc)));
+  else {
+    // FIXME: The performance consequences of this are going to be atrocious.
+    // Some things to try that should be better:
+    //   * 'mov hi, $src; mov $dst, hi', with hi as either r10 or r11
+    //   * 'movs $dst, $src' if cpsr isn't live
+    // See: http://lists.cs.uiuc.edu/pipermail/llvmdev/2014-August/075998.html
+
+    // 'MOV lo, lo' is unpredictable on < v6, so use the stack to do it
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tPUSH)))
+      .addReg(SrcReg, getKillRegState(KillSrc));
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tPOP)))
+      .addReg(DestReg, getDefRegState(true));
+  }
 }
 
 void Thumb1InstrInfo::
@@ -101,3 +122,12 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                    .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
   }
 }
+
+void
+Thumb1InstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI,
+                                      Reloc::Model RM) const {
+  if (RM == Reloc::PIC_)
+    expandLoadStackGuardBase(MI, ARM::tLDRLIT_ga_pcrel, ARM::tLDRi, RM);
+  else
+    expandLoadStackGuardBase(MI, ARM::tLDRLIT_ga_abs, ARM::tLDRi, RM);
+}
diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h
index c5845b7..9fba760 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.h
+++ b/lib/Target/ARM/Thumb1InstrInfo.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef THUMB1INSTRUCTIONINFO_H
-#define THUMB1INSTRUCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_ARM_THUMB1INSTRINFO_H
+#define LLVM_LIB_TARGET_ARM_THUMB1INSTRINFO_H
 
 #include "ARMBaseInstrInfo.h"
 #include "Thumb1RegisterInfo.h"
@@ -54,7 +54,10 @@ public:
                             const TargetRegisterClass *RC,
                             const TargetRegisterInfo *TRI) const override;
 
+private:
+  void expandLoadStackGuard(MachineBasicBlock::iterator MI,
+                            Reloc::Model RM) const override;
 };
 }
 
-#endif // THUMB1INSTRUCTIONINFO_H
+#endif
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index f907b14..c10c809 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -66,8 +66,12 @@ Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
                                       int Val,
                                       ARMCC::CondCodes Pred, unsigned PredReg,
                                       unsigned MIFlags) const {
+  assert((isARMLowRegister(DestReg) ||
+          isVirtualRegister(DestReg)) &&
+             "Thumb1 does not have ldr to high register");
+
   MachineFunction &MF = *MBB.getParent();
-  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
   MachineConstantPool *ConstantPool = MF.getConstantPool();
   const Constant *C = ConstantInt::get(
           Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val);
@@ -106,15 +110,15 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
       NumBytes = -NumBytes;
     }
     unsigned LdReg = DestReg;
-    if (DestReg == ARM::SP) {
+    if (DestReg == ARM::SP)
       assert(BaseReg == ARM::SP && "Unexpected!");
+    if (!isARMLowRegister(DestReg) && !MRI.isVirtualRegister(DestReg))
       LdReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass);
-    }
 
-    if (NumBytes <= 255 && NumBytes >= 0)
+    if (NumBytes <= 255 && NumBytes >= 0 && CanChangeCC) {
       AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg))
         .addImm(NumBytes).setMIFlags(MIFlags);
-    else if (NumBytes < 0 && NumBytes >= -255) {
+    } else if (NumBytes < 0 && NumBytes >= -255 && CanChangeCC) {
       AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg))
         .addImm(NumBytes).setMIFlags(MIFlags);
       AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tRSB), LdReg))
@@ -124,7 +128,8 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
                             ARMCC::AL, 0, MIFlags);
 
     // Emit add / sub.
-    int Opc = (isSub) ? ARM::tSUBrr : (isHigh ? ARM::tADDhirr : ARM::tADDrr);
+    int Opc = (isSub) ? ARM::tSUBrr : ((isHigh || !CanChangeCC) ? ARM::tADDhirr
+                                                                : ARM::tADDrr);
     MachineInstrBuilder MIB =
       BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
     if (Opc != ARM::tADDhirr)
@@ -136,32 +141,10 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
     AddDefaultPred(MIB);
 }
 
-/// calcNumMI - Returns the number of instructions required to materialize
-/// the specific add / sub r, c instruction.
-static unsigned calcNumMI(int Opc, int ExtraOpc, unsigned Bytes,
-                          unsigned NumBits, unsigned Scale) {
-  unsigned NumMIs = 0;
-  unsigned Chunk = ((1 << NumBits) - 1) * Scale;
-
-  if (Opc == ARM::tADDrSPi) {
-    unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
-    Bytes -= ThisVal;
-    NumMIs++;
-    NumBits = 8;
-    Scale = 1;  // Followed by a number of tADDi8.
-    Chunk = ((1 << NumBits) - 1) * Scale;
-  }
-
-  NumMIs += Bytes / Chunk;
-  if ((Bytes % Chunk) != 0)
-    NumMIs++;
-  if (ExtraOpc)
-    NumMIs++;
-  return NumMIs;
-}
-
 /// emitThumbRegPlusImmediate - Emits a series of instructions to materialize
-/// a destreg = basereg + immediate in Thumb code.
+/// a destreg = basereg + immediate in Thumb code. Tries a series of ADDs or
+/// SUBs first, and uses a constant pool value if the instruction sequence would
+/// be too long. This is allowed to modify the condition flags.
 void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator &MBBI,
                                      DebugLoc dl,
@@ -172,151 +155,146 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
   bool isSub = NumBytes < 0;
   unsigned Bytes = (unsigned)NumBytes;
   if (isSub) Bytes = -NumBytes;
-  bool isMul4 = (Bytes & 3) == 0;
-  bool isTwoAddr = false;
-  bool DstNotEqBase = false;
-  unsigned NumBits = 1;
-  unsigned Scale = 1;
-  int Opc = 0;
+
+  int CopyOpc = 0;
+  unsigned CopyBits = 0;
+  unsigned CopyScale = 1;
+  bool CopyNeedsCC = false;
   int ExtraOpc = 0;
-  bool NeedCC = false;
-
-  if (DestReg == BaseReg && BaseReg == ARM::SP) {
-    assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!");
-    NumBits = 7;
-    Scale = 4;
-    Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
-    isTwoAddr = true;
-  } else if (!isSub && BaseReg == ARM::SP) {
-    // r1 = add sp, 403
-    // =>
-    // r1 = add sp, 100 * 4
-    // r1 = add r1, 3
-    if (!isMul4) {
-      Bytes &= ~3;
-      ExtraOpc = ARM::tADDi3;
+  unsigned ExtraBits = 0;
+  unsigned ExtraScale = 1;
+  bool ExtraNeedsCC = false;
+
+  // Strategy:
+  // We need to select two types of instruction, maximizing the available
+  // immediate range of each. The instructions we use will depend on whether
+  // DestReg and BaseReg are low, high or the stack pointer.
+  // * CopyOpc  - DestReg = BaseReg + imm
+  //              This will be emitted once if DestReg != BaseReg, and never if
+  //              DestReg == BaseReg.
+  // * ExtraOpc - DestReg = DestReg + imm
+  //              This will be emitted as many times as necessary to add the
+  //              full immediate.
+  // If the immediate ranges of these instructions are not large enough to cover
+  // NumBytes with a reasonable number of instructions, we fall back to using a
+  // value loaded from a constant pool.
+  if (DestReg == ARM::SP) {
+    if (BaseReg == ARM::SP) {
+      // sp -> sp
+      // Already in right reg, no copy needed
+    } else {
+      // low -> sp or high -> sp
+      CopyOpc = ARM::tMOVr;
+      CopyBits = 0;
     }
-    NumBits = 8;
-    Scale = 4;
-    Opc = ARM::tADDrSPi;
-  } else {
-    // sp = sub sp, c
-    // r1 = sub sp, c
-    // r8 = sub sp, c
-    if (DestReg != BaseReg)
-      DstNotEqBase = true;
-    NumBits = 8;
-    if (DestReg == ARM::SP) {
-      Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
-      assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!");
-      NumBits = 7;
-      Scale = 4;
+    ExtraOpc = isSub ? ARM::tSUBspi : ARM::tADDspi;
+    ExtraBits = 7;
+    ExtraScale = 4;
+  } else if (isARMLowRegister(DestReg)) {
+    if (BaseReg == ARM::SP) {
+      // sp -> low
+      assert(!isSub && "Thumb1 does not have tSUBrSPi");
+      CopyOpc = ARM::tADDrSPi;
+      CopyBits = 8;
+      CopyScale = 4;
+    } else if (DestReg == BaseReg) {
+      // low -> same low
+      // Already in right reg, no copy needed
+    } else if (isARMLowRegister(BaseReg)) {
+      // low -> different low
+      CopyOpc = isSub ? ARM::tSUBi3 : ARM::tADDi3;
+      CopyBits = 3;
+      CopyNeedsCC = true;
     } else {
-      Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
-      NumBits = 8;
-      NeedCC = true;
+      // high -> low
+      CopyOpc = ARM::tMOVr;
+      CopyBits = 0;
     }
-    isTwoAddr = true;
+    ExtraOpc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
+    ExtraBits = 8;
+    ExtraNeedsCC = true;
+  } else /* DestReg is high */ {
+    if (DestReg == BaseReg) {
+      // high -> same high
+      // Already in right reg, no copy needed
+    } else {
+      // {low,high,sp} -> high
+      CopyOpc = ARM::tMOVr;
+      CopyBits = 0;
+    }
+    ExtraOpc = 0;
   }
 
-  unsigned NumMIs = calcNumMI(Opc, ExtraOpc, Bytes, NumBits, Scale);
+  // We could handle an unaligned immediate with an unaligned copy instruction
+  // and an aligned extra instruction, but this case is not currently needed.
+  assert(((Bytes & 3) == 0 || ExtraScale == 1) &&
+         "Unaligned offset, but all instructions require alignment");
+
+  unsigned CopyRange = ((1 << CopyBits) - 1) * CopyScale;
+  // If we would emit the copy with an immediate of 0, just use tMOVr.
+  if (CopyOpc && Bytes < CopyScale) {
+    CopyOpc = ARM::tMOVr;
+    CopyBits = 0;
+    CopyScale = 1;
+    CopyNeedsCC = false;
+    CopyRange = 0;
+  }
+  unsigned ExtraRange = ((1 << ExtraBits) - 1) * ExtraScale; // per instruction
+  unsigned RequiredCopyInstrs = CopyOpc ? 1 : 0;
+  unsigned RangeAfterCopy = (CopyRange > Bytes) ? 0 : (Bytes - CopyRange);
+
+  // We could handle this case when the copy instruction does not require an
+  // aligned immediate, but we do not currently do this.
+  assert(RangeAfterCopy % ExtraScale == 0 &&
+         "Extra instruction requires immediate to be aligned");
+
+  unsigned RequiredExtraInstrs;
+  if (ExtraRange)
+    RequiredExtraInstrs = RoundUpToAlignment(RangeAfterCopy, ExtraRange) / ExtraRange;
+  else if (RangeAfterCopy > 0)
+    // We need an extra instruction but none is available
+    RequiredExtraInstrs = 1000000;
+  else
+    RequiredExtraInstrs = 0;
+  unsigned RequiredInstrs = RequiredCopyInstrs + RequiredExtraInstrs;
   unsigned Threshold = (DestReg == ARM::SP) ? 3 : 2;
-  if (NumMIs > Threshold) {
-    // This will expand into too many instructions. Load the immediate from a
-    // constpool entry.
+
+  // Use a constant pool, if the sequence of ADDs/SUBs is too expensive.
+  if (RequiredInstrs > Threshold) {
     emitThumbRegPlusImmInReg(MBB, MBBI, dl,
                              DestReg, BaseReg, NumBytes, true,
                              TII, MRI, MIFlags);
     return;
   }
 
-  if (DstNotEqBase) {
-    if (isARMLowRegister(DestReg) && isARMLowRegister(BaseReg)) {
-      // If both are low registers, emit DestReg = add BaseReg, max(Imm, 7)
-      unsigned Chunk = (1 << 3) - 1;
-      unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
-      Bytes -= ThisVal;
-      const MCInstrDesc &MCID = TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3);
-      const MachineInstrBuilder MIB =
-        AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg)
-                         .setMIFlags(MIFlags));
-      AddDefaultPred(MIB.addReg(BaseReg, RegState::Kill).addImm(ThisVal));
-    } else {
-      AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg)
-        .addReg(BaseReg, RegState::Kill))
-        .setMIFlags(MIFlags);
+  // Emit zero or one copy instructions
+  if (CopyOpc) {
+    unsigned CopyImm = std::min(Bytes, CopyRange) / CopyScale;
+    Bytes -= CopyImm * CopyScale;
+
+    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(CopyOpc), DestReg);
+    if (CopyNeedsCC)
+      MIB = AddDefaultT1CC(MIB);
+    MIB.addReg(BaseReg, RegState::Kill);
+    if (CopyOpc != ARM::tMOVr) {
+      MIB.addImm(CopyImm);
     }
+    AddDefaultPred(MIB.setMIFlags(MIFlags));
+
     BaseReg = DestReg;
   }
 
-  unsigned Chunk = ((1 << NumBits) - 1) * Scale;
+  // Emit zero or more in-place add/sub instructions
   while (Bytes) {
-    unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
-    Bytes -= ThisVal;
-    ThisVal /= Scale;
-    // Build the new tADD / tSUB.
-    if (isTwoAddr) {
-      MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
-      if (NeedCC)
-        MIB = AddDefaultT1CC(MIB);
-      MIB.addReg(DestReg).addImm(ThisVal);
-      MIB = AddDefaultPred(MIB);
-      MIB.setMIFlags(MIFlags);
-    } else {
-      bool isKill = BaseReg != ARM::SP;
-      MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
-      if (NeedCC)
-        MIB = AddDefaultT1CC(MIB);
-      MIB.addReg(BaseReg, getKillRegState(isKill)).addImm(ThisVal);
-      MIB = AddDefaultPred(MIB);
-      MIB.setMIFlags(MIFlags);
-
-      BaseReg = DestReg;
-      if (Opc == ARM::tADDrSPi) {
-        // r4 = add sp, imm
-        // r4 = add r4, imm
-        // ...
-        NumBits = 8;
-        Scale = 1;
-        Chunk = ((1 << NumBits) - 1) * Scale;
-        Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
-        NeedCC = isTwoAddr = true;
-      }
-    }
-  }
-
-  if (ExtraOpc) {
-    const MCInstrDesc &MCID = TII.get(ExtraOpc);
-    AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg))
-                   .addReg(DestReg, RegState::Kill)
-                   .addImm(((unsigned)NumBytes) & 3)
-                   .setMIFlags(MIFlags));
-  }
-}
+    unsigned ExtraImm = std::min(Bytes, ExtraRange) / ExtraScale;
+    Bytes -= ExtraImm * ExtraScale;
 
-/// emitThumbConstant - Emit a series of instructions to materialize a
-/// constant.
-static void emitThumbConstant(MachineBasicBlock &MBB,
-                              MachineBasicBlock::iterator &MBBI,
-                              unsigned DestReg, int Imm,
-                              const TargetInstrInfo &TII,
-                              const Thumb1RegisterInfo& MRI,
-                              DebugLoc dl) {
-  bool isSub = Imm < 0;
-  if (isSub) Imm = -Imm;
-
-  int Chunk = (1 << 8) - 1;
-  int ThisVal = (Imm > Chunk) ? Chunk : Imm;
-  Imm -= ThisVal;
-  AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8),
-                                        DestReg))
-                 .addImm(ThisVal));
-  if (Imm > 0)
-    emitThumbRegPlusImmediate(MBB, MBBI, dl, DestReg, DestReg, Imm, TII, MRI);
-  if (isSub) {
-    const MCInstrDesc &MCID = TII.get(ARM::tRSB);
-    AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg))
-                   .addReg(DestReg, RegState::Kill));
+    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(ExtraOpc), DestReg);
+    if (ExtraNeedsCC)
+      MIB = AddDefaultT1CC(MIB);
+    MIB.addReg(BaseReg).addImm(ExtraImm);
+    MIB = AddDefaultPred(MIB);
+    MIB.setMIFlags(MIFlags);
   }
 }
 
@@ -352,86 +330,13 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
   const MCInstrDesc &Desc = MI.getDesc();
   unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
 
-  if (Opcode == ARM::tADDrSPi) {
+  if (Opcode == ARM::tADDframe) {
     Offset += MI.getOperand(FrameRegIdx+1).getImm();
-
-    // Can't use tADDrSPi if it's based off the frame pointer.
-    unsigned NumBits = 0;
-    unsigned Scale = 1;
-    if (FrameReg != ARM::SP) {
-      Opcode = ARM::tADDi3;
-      NumBits = 3;
-    } else {
-      NumBits = 8;
-      Scale = 4;
-      assert((Offset & 3) == 0 &&
-             "Thumb add/sub sp, #imm immediate must be multiple of 4!");
-    }
-
-    unsigned PredReg;
-    if (Offset == 0 && getInstrPredicate(&MI, PredReg) == ARMCC::AL) {
-      // Turn it into a move.
-      MI.setDesc(TII.get(ARM::tMOVr));
-      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
-      // Remove offset
-      MI.RemoveOperand(FrameRegIdx+1);
-      return true;
-    }
-
-    // Common case: small offset, fits into instruction.
-    unsigned Mask = (1 << NumBits) - 1;
-    if (((Offset / Scale) & ~Mask) == 0) {
-      // Replace the FrameIndex with sp / fp
-      if (Opcode == ARM::tADDi3) {
-        MI.setDesc(TII.get(Opcode));
-        removeOperands(MI, FrameRegIdx);
-        AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg)
-                       .addImm(Offset / Scale));
-      } else {
-        MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
-        MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset / Scale);
-      }
-      return true;
-    }
-
     unsigned DestReg = MI.getOperand(0).getReg();
-    unsigned Bytes = (Offset > 0) ? Offset : -Offset;
-    unsigned NumMIs = calcNumMI(Opcode, 0, Bytes, NumBits, Scale);
-    // MI would expand into a large number of instructions. Don't try to
-    // simplify the immediate.
-    if (NumMIs > 2) {
-      emitThumbRegPlusImmediate(MBB, II, dl, DestReg, FrameReg, Offset, TII,
-                                *this);
-      MBB.erase(II);
-      return true;
-    }
 
-    if (Offset > 0) {
-      // Translate r0 = add sp, imm to
-      // r0 = add sp, 255*4
-      // r0 = add r0, (imm - 255*4)
-      if (Opcode == ARM::tADDi3) {
-        MI.setDesc(TII.get(Opcode));
-        removeOperands(MI, FrameRegIdx);
-        AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg).addImm(Mask));
-      } else {
-        MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
-        MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Mask);
-      }
-      Offset = (Offset - Mask * Scale);
-      MachineBasicBlock::iterator NII = std::next(II);
-      emitThumbRegPlusImmediate(MBB, NII, dl, DestReg, DestReg, Offset, TII,
-                                *this);
-    } else {
-      // Translate r0 = add sp, -imm to
-      // r0 = -imm (this is then translated into a series of instructions)
-      // r0 = add r0, sp
-      emitThumbConstant(MBB, II, DestReg, Offset, TII, *this, dl);
-
-      MI.setDesc(TII.get(ARM::tADDhirr));
-      MI.getOperand(FrameRegIdx).ChangeToRegister(DestReg, false, false, true);
-      MI.getOperand(FrameRegIdx+1).ChangeToRegister(FrameReg, false);
-    }
+    emitThumbRegPlusImmediate(MBB, II, dl, DestReg, FrameReg, Offset, TII,
+                              *this);
+    MBB.erase(II);
     return true;
   } else {
     if (AddrMode != ARMII::AddrModeT1_s)
@@ -485,8 +390,11 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
 void Thumb1RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
                                            int64_t Offset) const {
   const ARMBaseInstrInfo &TII =
-    *static_cast<const ARMBaseInstrInfo*>(
-      MI.getParent()->getParent()->getTarget().getInstrInfo());
+      *static_cast<const ARMBaseInstrInfo *>(MI.getParent()
+                                                 ->getParent()
+                                                 ->getTarget()
+                                                 .getSubtargetImpl()
+                                                 ->getInstrInfo());
   int Off = Offset; // ARM doesn't need the general 64-bit offsets
   unsigned i = 0;
 
@@ -512,7 +420,7 @@ Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
   // off the frame pointer (if, for example, there are alloca() calls in
   // the function, the offset will be negative. Use R12 instead since that's
   // a call clobbered register that we know won't be used in Thumb1 mode.
-  const TargetInstrInfo &TII = *MBB.getParent()->getTarget().getInstrInfo();
+  const TargetInstrInfo &TII = *MBB.getParent()->getSubtarget().getInstrInfo();
   DebugLoc DL;
   AddDefaultPred(BuildMI(MBB, I, DL, TII.get(ARM::tMOVr))
     .addReg(ARM::R12, RegState::Define)
@@ -559,7 +467,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   MachineBasicBlock &MBB = *MI.getParent();
   MachineFunction &MF = *MBB.getParent();
   const ARMBaseInstrInfo &TII =
-    *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+      *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   DebugLoc dl = MI.getDebugLoc();
   MachineInstrBuilder MIB(*MBB.getParent(), &MI);
@@ -570,7 +478,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                MF.getFrameInfo()->getStackSize() + SPAdj;
 
   if (MF.getFrameInfo()->hasVarSizedObjects()) {
-    assert(SPAdj == 0 && MF.getTarget().getFrameLowering()->hasFP(MF) &&
+    assert(SPAdj == 0 && MF.getSubtarget().getFrameLowering()->hasFP(MF) &&
            "Unexpected");
     // There are alloca()'s in this function, must reference off the frame
     // pointer or base pointer instead.
@@ -587,7 +495,10 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   // when !hasReservedCallFrame().
 #ifndef NDEBUG
   if (RS && FrameReg == ARM::SP && RS->isScavengingFrameIndex(FrameIndex)){
-    assert(MF.getTarget().getFrameLowering()->hasReservedCallFrame(MF) &&
+    assert(MF.getTarget()
+               .getSubtargetImpl()
+               ->getFrameLowering()
+               ->hasReservedCallFrame(MF) &&
            "Cannot use SP to access the emergency spill slot in "
            "functions without a reserved call frame");
     assert(!MF.getFrameInfo()->hasVarSizedObjects() &&
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h
index 0c0abbe..5feaf52 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.h
+++ b/lib/Target/ARM/Thumb1RegisterInfo.h
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef THUMB1REGISTERINFO_H
-#define THUMB1REGISTERINFO_H
+#ifndef LLVM_LIB_TARGET_ARM_THUMB1REGISTERINFO_H
+#define LLVM_LIB_TARGET_ARM_THUMB1REGISTERINFO_H
 
 #include "ARMBaseRegisterInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -60,4 +60,4 @@ public:
 };
 }
 
-#endif // THUMB1REGISTERINFO_H
+#endif
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index edb9ff3..fdcb522 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -188,7 +188,7 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
                                              true/*isImp*/, false/*isKill*/));
 
     MachineInstr *LastITMI = MI;
-    MachineBasicBlock::iterator InsertPos = MIB;
+    MachineBasicBlock::iterator InsertPos = MIB.getInstr();
     ++MBBI;
 
     // Form IT block.
@@ -255,8 +255,9 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
 bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) {
   const TargetMachine &TM = Fn.getTarget();
   AFI = Fn.getInfo<ARMFunctionInfo>();
-  TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo());
-  TRI = TM.getRegisterInfo();
+  TII = static_cast<const Thumb2InstrInfo *>(
+      TM.getSubtargetImpl()->getInstrInfo());
+  TRI = TM.getSubtargetImpl()->getRegisterInfo();
   restrictIT = TM.getSubtarget<ARMSubtarget>().restrictIT();
 
   if (!AFI->isThumbFunction())
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index a9df006..91973e1 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -209,6 +209,15 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI);
 }
 
+void
+Thumb2InstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI,
+                                      Reloc::Model RM) const {
+  if (RM == Reloc::PIC_)
+    expandLoadStackGuardBase(MI, ARM::t2MOV_ga_pcrel, ARM::t2LDRi12, RM);
+  else
+    expandLoadStackGuardBase(MI, ARM::t2MOVi32imm, ARM::t2LDRi12, RM);
+}
+
 void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
                                MachineBasicBlock::iterator &MBBI, DebugLoc dl,
                                unsigned DestReg, unsigned BaseReg, int NumBytes,
diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h
index 34d45d3..46a1f6d 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/lib/Target/ARM/Thumb2InstrInfo.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef THUMB2INSTRUCTIONINFO_H
-#define THUMB2INSTRUCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_ARM_THUMB2INSTRINFO_H
+#define LLVM_LIB_TARGET_ARM_THUMB2INSTRINFO_H
 
 #include "ARMBaseInstrInfo.h"
 #include "Thumb2RegisterInfo.h"
@@ -61,6 +61,10 @@ public:
   /// always be able to get register info as well (through this method).
   ///
   const Thumb2RegisterInfo &getRegisterInfo() const override { return RI; }
+
+private:
+  void expandLoadStackGuard(MachineBasicBlock::iterator MI,
+                            Reloc::Model RM) const override;
 };
 
 /// getITInstrPredicate - Valid only in Thumb2 mode. This function is identical
@@ -71,4 +75,4 @@ ARMCC::CondCodes getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg);
 
 }
 
-#endif // THUMB2INSTRUCTIONINFO_H
+#endif
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp
index 782d81f..0d5d85a 100644
--- a/lib/Target/ARM/Thumb2RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp
@@ -40,7 +40,7 @@ Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
                                       ARMCC::CondCodes Pred, unsigned PredReg,
                                       unsigned MIFlags) const {
   MachineFunction &MF = *MBB.getParent();
-  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
   MachineConstantPool *ConstantPool = MF.getConstantPool();
   const Constant *C = ConstantInt::get(
            Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val);
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.h b/lib/Target/ARM/Thumb2RegisterInfo.h
index 8a33e6c..1dd94cc 100644
--- a/lib/Target/ARM/Thumb2RegisterInfo.h
+++ b/lib/Target/ARM/Thumb2RegisterInfo.h
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef THUMB2REGISTERINFO_H
-#define THUMB2REGISTERINFO_H
+#ifndef LLVM_LIB_TARGET_ARM_THUMB2REGISTERINFO_H
+#define LLVM_LIB_TARGET_ARM_THUMB2REGISTERINFO_H
 
 #include "ARMBaseRegisterInfo.h"
 
@@ -35,4 +35,4 @@ public:
 };
 }
 
-#endif // THUMB2REGISTERINFO_H
+#endif
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index 09debe7..c51eb8b 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -335,7 +335,7 @@ static bool VerifyLowRegs(MachineInstr *MI) {
   bool isPCOk = (Opc == ARM::t2LDMIA_RET || Opc == ARM::t2LDMIA     ||
                  Opc == ARM::t2LDMDB     || Opc == ARM::t2LDMIA_UPD ||
                  Opc == ARM::t2LDMDB_UPD);
-  bool isLROk = (Opc == ARM::t2STMIA_UPD || Opc == ARM::t2STMDB_UPD);
+  bool isLROk = (Opc == ARM::t2STMDB_UPD);
   bool isSPOk = isPCOk || isLROk;
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
@@ -384,7 +384,6 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
     if (MI->getOperand(1).getReg() == ARM::SP) {
       Opc = Entry.NarrowOpc2;
       ImmLimit = Entry.Imm2Limit;
-      HasOffReg = false;
     }
 
     Scale = 4;
@@ -1003,7 +1002,8 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
 
 bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
   const TargetMachine &TM = MF.getTarget();
-  TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo());
+  TII = static_cast<const Thumb2InstrInfo *>(
+      TM.getSubtargetImpl()->getInstrInfo());
   STI = &TM.getSubtarget<ARMSubtarget>();
 
   // Optimizing / minimizing size?