43 files changed, 1270 insertions, 1068 deletions
diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp
index 94faf6f..92eaf9e 100644
--- a/lib/Target/ARM/A15SDOptimizer.cpp
+++ b/lib/Target/ARM/A15SDOptimizer.cpp
@@ -321,8 +321,7 @@ unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) {
       return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg());
   }
 
-  assert(0 && "Unhandled update pattern!");
-  return 0;
+  llvm_unreachable("Unhandled update pattern!");
 }
 
 // Return true if this MachineInstr inserts a scalar (SPR) value into
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 55e9fe5..28d2610 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -82,7 +82,8 @@ void ARMAsmPrinter::EmitXXStructor(const Constant *CV) {
   const GlobalValue *GV = dyn_cast<GlobalValue>(CV->stripPointerCasts());
   assert(GV && "C++ constructor pointer was not a GlobalValue!");
 
-  const MCExpr *E = MCSymbolRefExpr::Create(getSymbol(GV),
+  const MCExpr *E = MCSymbolRefExpr::Create(GetARMGVSymbol(GV,
+                                                           ARMII::MO_NO_FLAG),
                                             (Subtarget->isTargetELF()
                                              ? MCSymbolRefExpr::VK_ARM_TARGET1
                                              : MCSymbolRefExpr::VK_None),
@@ -164,7 +165,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
     else if ((Modifier && strcmp(Modifier, "hi16") == 0) ||
              (TF & ARMII::MO_HI16))
       O << ":upper16:";
-    O << *getSymbol(GV);
+    O << *GetARMGVSymbol(GV, TF);
 
     printOffset(MO.getOffset(), O);
     if (TF == ARMII::MO_PLT)
@@ -730,6 +731,32 @@ void ARMAsmPrinter::emitAttributes() {
   if (Subtarget->hasDivideInARMMode() && !Subtarget->hasV8Ops())
       ATS.emitAttribute(ARMBuildAttrs::DIV_use, ARMBuildAttrs::AllowDIVExt);
 
+  if (MMI) {
+    if (const Module *SourceModule = MMI->getModule()) {
+      // ABI_PCS_wchar_t to indicate wchar_t width
+      // FIXME: There is no way to emit value 0 (wchar_t prohibited).
+      if (auto WCharWidthValue = cast_or_null<ConstantInt>(
+              SourceModule->getModuleFlag("wchar_size"))) {
+        int WCharWidth = WCharWidthValue->getZExtValue();
+        assert((WCharWidth == 2 || WCharWidth == 4) &&
+               "wchar_t width must be 2 or 4 bytes");
+        ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_wchar_t, WCharWidth);
+      }
+
+      // ABI_enum_size to indicate enum width
+      // FIXME: There is no way to emit value 0 (enums prohibited) or value 3
+      //        (all enums contain a value needing 32 bits to encode).
+      if (auto EnumWidthValue = cast_or_null<ConstantInt>(
+              SourceModule->getModuleFlag("min_enum_size"))) {
+        int EnumWidth = EnumWidthValue->getZExtValue();
+        assert((EnumWidth == 1 || EnumWidth == 4) &&
+               "Minimum enum width must be 1 or 4 bytes");
+        int EnumBuildAttr = EnumWidth == 1 ? 1 : 2;
+        ATS.emitAttribute(ARMBuildAttrs::ABI_enum_size, EnumBuildAttr);
+      }
+    }
+  }
+
   if (Subtarget->hasTrustZone() && Subtarget->hasVirtualization())
       ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
                         ARMBuildAttrs::AllowTZVirtualization);
@@ -768,23 +795,41 @@ getModifierVariantKind(ARMCP::ARMCPModifier Modifier) {
 
 MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV,
                                         unsigned char TargetFlags) {
-  bool isIndirect = Subtarget->isTargetMachO() &&
-    (TargetFlags & ARMII::MO_NONLAZY) &&
-    Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel());
-  if (!isIndirect)
-    return getSymbol(GV);
+  if (Subtarget->isTargetMachO()) {
+    bool IsIndirect = (TargetFlags & ARMII::MO_NONLAZY) &&
+      Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel());
+
+    if (!IsIndirect)
+      return getSymbol(GV);
 
-  // FIXME: Remove this when Darwin transition to @GOT like syntax.
-  MCSymbol *MCSym = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
-  MachineModuleInfoMachO &MMIMachO =
-    MMI->getObjFileInfo<MachineModuleInfoMachO>();
-  MachineModuleInfoImpl::StubValueTy &StubSym =
-    GV->hasHiddenVisibility() ? MMIMachO.getHiddenGVStubEntry(MCSym) :
-    MMIMachO.getGVStubEntry(MCSym);
-  if (!StubSym.getPointer())
-    StubSym = MachineModuleInfoImpl::
-      StubValueTy(getSymbol(GV), !GV->hasInternalLinkage());
-  return MCSym;
+    // FIXME: Remove this when Darwin transition to @GOT like syntax.
+    MCSymbol *MCSym = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+    MachineModuleInfoMachO &MMIMachO =
+      MMI->getObjFileInfo<MachineModuleInfoMachO>();
+    MachineModuleInfoImpl::StubValueTy &StubSym =
+      GV->hasHiddenVisibility() ? MMIMachO.getHiddenGVStubEntry(MCSym)
+                                : MMIMachO.getGVStubEntry(MCSym);
+    if (!StubSym.getPointer())
+      StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(GV),
+                                                   !GV->hasInternalLinkage());
+    return MCSym;
+  } else if (Subtarget->isTargetCOFF()) {
+    assert(Subtarget->isTargetWindows() &&
+           "Windows is the only supported COFF target");
+
+    bool IsIndirect = (TargetFlags & ARMII::MO_DLLIMPORT);
+    if (!IsIndirect)
+      return getSymbol(GV);
+
+    SmallString<128> Name;
+    Name = "__imp_";
+    getNameWithPrefix(Name, GV);
+
+    return OutContext.GetOrCreateSymbol(Name);
+  } else if (Subtarget->isTargetELF()) {
+    return getSymbol(GV);
+  }
+  llvm_unreachable("unexpected target");
 }
 
 void ARMAsmPrinter::
@@ -928,7 +973,7 @@ void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) {
   for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
     MachineBasicBlock *MBB = JTBBs[i];
     const MCExpr *MBBSymbolExpr = MCSymbolRefExpr::Create(MBB->getSymbol(),
-                                                      OutContext);
+                                                          OutContext);
     // If this isn't a TBB or TBH, the entries are direct branch instructions.
     if (OffsetWidth == 4) {
       EmitToStreamer(OutStreamer, MCInstBuilder(ARM::t2B)
@@ -1225,8 +1270,10 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       // Add 's' bit operand (always reg0 for this)
       .addReg(0));
 
-    const GlobalValue *GV = MI->getOperand(0).getGlobal();
-    MCSymbol *GVSym = getSymbol(GV);
+    const MachineOperand &Op = MI->getOperand(0);
+    const GlobalValue *GV = Op.getGlobal();
+    const unsigned TF = Op.getTargetFlags();
+    MCSymbol *GVSym = GetARMGVSymbol(GV, TF);
     const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
     EmitToStreamer(OutStreamer, MCInstBuilder(ARM::Bcc)
       .addExpr(GVSymExpr)
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index bc266e8..0288db9 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -32,6 +32,7 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/Support/BranchProbability.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -102,14 +103,15 @@ ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI)
 
 // Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
 // currently defaults to no prepass hazard recognizer.
-ScheduleHazardRecognizer *ARMBaseInstrInfo::
-CreateTargetHazardRecognizer(const TargetMachine *TM,
-                             const ScheduleDAG *DAG) const {
+ScheduleHazardRecognizer *
+ARMBaseInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
+                                               const ScheduleDAG *DAG) const {
   if (usePreRAHazardRecognizer()) {
-    const InstrItineraryData *II = TM->getInstrItineraryData();
+    const InstrItineraryData *II =
+        &static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData();
     return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
   }
-  return TargetInstrInfo::CreateTargetHazardRecognizer(TM, DAG);
+  return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG);
 }
 
 ScheduleHazardRecognizer *ARMBaseInstrInfo::
@@ -1885,7 +1887,8 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
                                       unsigned NumBytes) {
   // This optimisation potentially adds lots of load and store
   // micro-operations, it's only really a great benefit to code-size.
-  if (!Subtarget.isMinSize())
+  if (!MF.getFunction()->getAttributes().hasAttribute(
+          AttributeSet::FunctionIndex, Attribute::MinSize))
     return false;
 
   // If only one register is pushed/popped, LLVM can use an LDR/STR
@@ -4358,6 +4361,29 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI,
   MI->addRegisterKilled(DReg, TRI, true);
 }
 
+void ARMBaseInstrInfo::getUnconditionalBranch(
+    MCInst &Branch, const MCSymbolRefExpr *BranchTarget) const {
+  if (Subtarget.isThumb())
+    Branch.setOpcode(ARM::tB);
+  else if (Subtarget.isThumb2())
+    Branch.setOpcode(ARM::t2B);
+  else
+    Branch.setOpcode(ARM::Bcc);
+
+  Branch.addOperand(MCOperand::CreateExpr(BranchTarget));
+  Branch.addOperand(MCOperand::CreateImm(ARMCC::AL));
+  Branch.addOperand(MCOperand::CreateReg(0));
+}
+
+void ARMBaseInstrInfo::getTrap(MCInst &MI) const {
+  if (Subtarget.isThumb())
+    MI.setOpcode(ARM::tTRAP);
+  else if (Subtarget.useNaClTrap())
+    MI.setOpcode(ARM::TRAPNaCl);
+  else
+    MI.setOpcode(ARM::TRAP);
+}
+
 bool ARMBaseInstrInfo::hasNOP() const {
   return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0;
 }
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 4b3e740..b8d6758 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -50,7 +50,7 @@ public:
   const ARMSubtarget &getSubtarget() const { return Subtarget; }
 
   ScheduleHazardRecognizer *
-  CreateTargetHazardRecognizer(const TargetMachine *TM,
+  CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
                                const ScheduleDAG *DAG) const override;
 
   ScheduleHazardRecognizer *
@@ -229,6 +229,13 @@ public:
                                       const TargetRegisterInfo*) const override;
   void breakPartialRegDependency(MachineBasicBlock::iterator, unsigned,
                                  const TargetRegisterInfo *TRI) const override;
+
+  void
+  getUnconditionalBranch(MCInst &Branch,
+                         const MCSymbolRefExpr *BranchTarget) const override;
+
+  void getTrap(MCInst &MI) const override;
+
   /// Get the number of addresses by LDM or VLDM or zero for unknown.
   unsigned getNumLDMAddresses(const MachineInstr *MI) const;
 
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index a2eee9f..cdd91c7 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -45,9 +45,12 @@ using namespace llvm;
 
 ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMSubtarget &sti)
     : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), STI(sti), BasePtr(ARM::R6) {
-  if (STI.isTargetMachO())
-    FramePtr = ARM::R7;
-  else if (STI.isTargetWindows())
+  if (STI.isTargetMachO()) {
+    if (STI.isTargetDarwin() || STI.isThumb1Only())
+      FramePtr = ARM::R7;
+    else
+      FramePtr = ARM::R11;
+  } else if (STI.isTargetWindows())
     FramePtr = ARM::R11;
   else // ARM EABI
     FramePtr = STI.isThumb() ? ARM::R7 : ARM::R11;
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index 2fd7edd..5fb6ebf 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -15,6 +15,7 @@
 #include "ARM.h"
 #include "ARMBaseInstrInfo.h"
 #include "ARMConstantPoolValue.h"
+#include "ARMMachineFunctionInfo.h"
 #include "ARMRelocations.h"
 #include "ARMSubtarget.h"
 #include "ARMTargetMachine.h"
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 6045738..51d3dbb 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -927,10 +927,16 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
     }
     case ARM::tTPsoft:
     case ARM::TPsoft: {
-      MachineInstrBuilder MIB =
-        BuildMI(MBB, MBBI, MI.getDebugLoc(),
-                TII->get(Opcode == ARM::tTPsoft ? ARM::tBL : ARM::BL))
-        .addExternalSymbol("__aeabi_read_tp", 0);
+      MachineInstrBuilder MIB;
+      if (Opcode == ARM::tTPsoft)
+        MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                      TII->get( ARM::tBL))
+              .addImm((unsigned)ARMCC::AL).addReg(0)
+              .addExternalSymbol("__aeabi_read_tp", 0);
+      else
+        MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                      TII->get( ARM::BL))
+              .addExternalSymbol("__aeabi_read_tp", 0);
 
       MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
       TransferImpOps(MI, MIB, MIB);
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 6f8fb1a..e2d90cd 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -590,7 +590,7 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
 
   // Use movw+movt when possible, it avoids constant pool entries.
   // Non-darwin targets only support static movt relocations in FastISel.
-  if (Subtarget->useMovt() &&
+  if (Subtarget->useMovt(*FuncInfo.MF) &&
       (Subtarget->isTargetMachO() || RelocM == Reloc::Static)) {
     unsigned Opc;
     unsigned char TF = 0;
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 0caf4bf..a67b360 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -39,6 +39,10 @@ static MachineBasicBlock::iterator
 skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
                         unsigned NumAlignedDPRCS2Regs);
 
+ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti)
+    : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4),
+      STI(sti) {}
+
 /// hasFP - Return true if the specified function should have a dedicated frame
 /// pointer register.  This is true if the function has variable sized allocas
 /// or if frame pointer elimination is disabled.
@@ -220,7 +224,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
     case ARM::R10:
     case ARM::R11:
     case ARM::R12:
-      if (STI.isTargetMachO()) {
+      if (STI.isTargetDarwin()) {
         GPRCS2Size += 4;
         break;
       }
@@ -380,7 +384,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
       case ARM::R10:
       case ARM::R11:
       case ARM::R12:
-        if (STI.isTargetMachO())
+        if (STI.isTargetDarwin())
           break;
         // fallthrough
       case ARM::R0:
@@ -445,7 +449,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
       case ARM::R10:
       case ARM::R11:
       case ARM::R12:
-        if (STI.isTargetMachO()) {
+        if (STI.isTargetDarwin()) {
           unsigned DwarfReg =  MRI->getDwarfRegNum(Reg, true);
           unsigned Offset = MFI->getObjectOffset(FI);
           unsigned CFIIndex = MMI.addFrameInst(
@@ -810,7 +814,7 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
     unsigned LastReg = 0;
     for (; i != 0; --i) {
       unsigned Reg = CSI[i-1].getReg();
-      if (!(Func)(Reg, STI.isTargetMachO())) continue;
+      if (!(Func)(Reg, STI.isTargetDarwin())) continue;
 
       // D-registers in the aligned area DPRCS2 are NOT spilled here.
       if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
@@ -888,7 +892,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
     bool DeleteRet = false;
     for (; i != 0; --i) {
       unsigned Reg = CSI[i-1].getReg();
-      if (!(Func)(Reg, STI.isTargetMachO())) continue;
+      if (!(Func)(Reg, STI.isTargetDarwin())) continue;
 
       // The aligned reloads from area DPRCS2 are not inserted here.
       if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
@@ -1438,7 +1442,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
     if (Spilled) {
       NumGPRSpills++;
 
-      if (!STI.isTargetMachO()) {
+      if (!STI.isTargetDarwin()) {
         if (Reg == ARM::LR)
           LRSpilled = true;
         CS1Spilled = true;
@@ -1460,7 +1464,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
         break;
       }
     } else {
-      if (!STI.isTargetMachO()) {
+      if (!STI.isTargetDarwin()) {
         UnspilledCS1GPRs.push_back(Reg);
         continue;
       }
diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h
index 981d320..709afbc 100644
--- a/lib/Target/ARM/ARMFrameLowering.h
+++ b/lib/Target/ARM/ARMFrameLowering.h
@@ -14,7 +14,6 @@
 #ifndef ARM_FRAMEINFO_H
 #define ARM_FRAMEINFO_H
 
-#include "ARMSubtarget.h"
 #include "llvm/Target/TargetFrameLowering.h"
 
 namespace llvm {
@@ -25,10 +24,7 @@ protected:
   const ARMSubtarget &STI;
 
 public:
-  explicit ARMFrameLowering(const ARMSubtarget &sti)
-    : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4),
-      STI(sti) {
-  }
+  explicit ARMFrameLowering(const ARMSubtarget &sti);
 
   /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
   /// the function.
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 08d598d..38547cf 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -60,22 +60,17 @@ enum AddrMode2Type {
 };
 
 class ARMDAGToDAGISel : public SelectionDAGISel {
-  ARMBaseTargetMachine &TM;
-
   /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
   /// make the right decision when generating code for different targets.
   const ARMSubtarget *Subtarget;
 
 public:
-  explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm,
-                           CodeGenOpt::Level OptLevel)
-    : SelectionDAGISel(tm, OptLevel), TM(tm),
-      Subtarget(&TM.getSubtarget<ARMSubtarget>()) {
-  }
+  explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
+      : SelectionDAGISel(tm, OptLevel) {}
 
   bool runOnMachineFunction(MachineFunction &MF) override {
     // Reset the subtarget each time through.
-    Subtarget = &TM.getSubtarget<ARMSubtarget>();
+    Subtarget = &MF.getTarget().getSubtarget<ARMSubtarget>();
     SelectionDAGISel::runOnMachineFunction(MF);
     return true;
   }
@@ -429,8 +424,8 @@ bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
   if (Use->getOpcode() == ISD::CopyToReg)
     return true;
   if (Use->isMachineOpcode()) {
-    const ARMBaseInstrInfo *TII =
-      static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo());
+    const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
+        CurDAG->getTarget().getInstrInfo());
 
     const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
     if (MCID.mayStore())
@@ -2444,7 +2439,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
   case ISD::Constant: {
     unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
     bool UseCP = true;
-    if (Subtarget->useMovt())
+    if (Subtarget->useMovt(*MF))
       // Thumb2-aware targets have the MOVT instruction, so all immediates can
       // be done with MOV + MOVT, at worst.
       UseCP = false;
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 00d07e8..4bfa5a8 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -155,16 +155,16 @@ void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
   addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
 }
 
-static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
-  if (TM.getSubtarget<ARMSubtarget>().isTargetMachO())
+static TargetLoweringObjectFile *createTLOF(const Triple &TT) {
+  if (TT.isOSBinFormatMachO())
     return new TargetLoweringObjectFileMachO();
-  if (TM.getSubtarget<ARMSubtarget>().isTargetWindows())
+  if (TT.isOSWindows())
     return new TargetLoweringObjectFileCOFF();
   return new ARMElfTargetObjectFile();
 }
 
 ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
-    : TargetLowering(TM, createTLOF(TM)) {
+    : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))) {
   Subtarget = &TM.getSubtarget<ARMSubtarget>();
   RegInfo = TM.getRegisterInfo();
   Itins = TM.getInstrItineraryData();
@@ -710,7 +710,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     setExceptionSelectorRegister(ARM::R1);
   }
 
-  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+  if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
+    setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
+  else
+    setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+
   // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
   // the default expansion.
   if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) {
@@ -983,6 +987,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
 
   case ARMISD::PRELOAD:       return "ARMISD::PRELOAD";
 
+  case ARMISD::WIN__CHKSTK:   return "ARMISD:::WIN__CHKSTK";
+
   case ARMISD::VCEQ:          return "ARMISD::VCEQ";
   case ARMISD::VCEQZ:         return "ARMISD::VCEQZ";
   case ARMISD::VCGE:          return "ARMISD::VCGE";
@@ -1199,7 +1205,7 @@ ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
   case CallingConv::C:
     if (!Subtarget->isAAPCS_ABI())
       return CallingConv::ARM_APCS;
-    else if (Subtarget->hasVFP2() &&
+    else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() &&
              getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
              !isVarArg)
       return CallingConv::ARM_AAPCS_VFP;
@@ -1207,10 +1213,10 @@ ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
       return CallingConv::ARM_AAPCS;
   case CallingConv::Fast:
     if (!Subtarget->isAAPCS_ABI()) {
-      if (Subtarget->hasVFP2() && !isVarArg)
+      if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
         return CallingConv::Fast;
       return CallingConv::ARM_APCS;
-    } else if (Subtarget->hasVFP2() && !isVarArg)
+    } else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
       return CallingConv::ARM_AAPCS_VFP;
     else
       return CallingConv::ARM_AAPCS;
@@ -1598,8 +1604,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
 
   if (EnableARMLongCalls) {
-    assert (getTargetMachine().getRelocationModel() == Reloc::Static
-            && "long-calls with non-static relocation model!");
+    assert((Subtarget->isTargetWindows() ||
+            getTargetMachine().getRelocationModel() == Reloc::Static) &&
+           "long-calls with non-static relocation model!");
     // Handle a global address or an external symbol. If it's not one of
     // those, the target's already in a register, so we don't need to do
     // anything extra.
@@ -1647,6 +1654,19 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
       Callee = DAG.getNode(ARMISD::WrapperPIC, dl, getPointerTy(),
                            DAG.getTargetGlobalAddress(GV, dl, getPointerTy()));
+    } else if (Subtarget->isTargetCOFF()) {
+      assert(Subtarget->isTargetWindows() &&
+             "Windows is the only supported COFF target");
+      unsigned TargetFlags = GV->hasDLLImportStorageClass()
+                                 ? ARMII::MO_DLLIMPORT
+                                 : ARMII::MO_NO_FLAG;
+      Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), /*Offset=*/0,
+                                          TargetFlags);
+      if (GV->hasDLLImportStorageClass())
+        Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+                             DAG.getNode(ARMISD::Wrapper, dl, getPointerTy(),
+                                         Callee), MachinePointerInfo::getGOT(),
+                             false, false, false, 0);
     } else {
       // On ELF targets for PIC code, direct calls should go through the PLT
       unsigned OpFlags = 0;
@@ -1688,7 +1708,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
   // FIXME: handle tail calls differently.
   unsigned CallOpc;
-  bool HasMinSizeAttr = Subtarget->isMinSize();
+  bool HasMinSizeAttr = MF.getFunction()->getAttributes().hasAttribute(
+      AttributeSet::FunctionIndex, Attribute::MinSize);
   if (Subtarget->isThumb()) {
     if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
       CallOpc = ARMISD::CALL_NOLINK;
@@ -2326,7 +2347,8 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
   TargetLowering::CallLoweringInfo CLI(DAG);
   CLI.setDebugLoc(dl).setChain(Chain)
     .setCallee(CallingConv::C, Type::getInt32Ty(*DAG.getContext()),
-               DAG.getExternalSymbol("__tls_get_addr", PtrVT), &Args, 0);
+               DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args),
+               0);
 
   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
   return CallResult.first;
@@ -2434,7 +2456,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
 
   // If we have T2 ops, we can materialize the address directly via movt/movw
   // pair. This is always cheaper.
-  if (Subtarget->useMovt()) {
+  if (Subtarget->useMovt(DAG.getMachineFunction())) {
     ++NumMovwMovt;
     // FIXME: Once remat is capable of dealing with instructions with register
     // operands, expand this into two nodes.
@@ -2456,7 +2478,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
 
-  if (Subtarget->useMovt())
+  if (Subtarget->useMovt(DAG.getMachineFunction()))
     ++NumMovwMovt;
 
   // FIXME: Once remat is capable of dealing with instructions with register
@@ -2476,18 +2498,27 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
 SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
                                                      SelectionDAG &DAG) const {
   assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
-  assert(Subtarget->useMovt() && "Windows on ARM expects to use movw/movt");
+  assert(Subtarget->useMovt(DAG.getMachineFunction()) &&
+         "Windows on ARM expects to use movw/movt");
 
   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  const ARMII::TOF TargetFlags =
+    (GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG);
   EVT PtrVT = getPointerTy();
+  SDValue Result;
   SDLoc DL(Op);
 
   ++NumMovwMovt;
 
   // FIXME: Once remat is capable of dealing with instructions with register
   // operands, expand this into two nodes.
-  return DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
-                     DAG.getTargetGlobalAddress(GV, DL, PtrVT));
+  Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
+                       DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0,
+                                                  TargetFlags));
+  if (GV->hasDLLImportStorageClass())
+    Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
+                         MachinePointerInfo::getGOT(), false, false, false, 0);
+  return Result;
 }
 
 SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
@@ -2535,6 +2566,11 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
   SDLoc dl(Op);
   switch (IntNo) {
   default: return SDValue();    // Don't custom lower most intrinsics.
+  case Intrinsic::arm_rbit: {
+    assert(Op.getOperand(0).getValueType() == MVT::i32 &&
+           "RBIT intrinsic must have i32 type!");
+    return DAG.getNode(ARMISD::RBIT, dl, MVT::i32, Op.getOperand(0));
+  }
   case Intrinsic::arm_thread_pointer: {
     EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
     return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
@@ -4492,6 +4528,11 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
       BitMask <<= 8;
       ImmMask <<= 1;
     }
+
+    if (DAG.getTargetLoweringInfo().isBigEndian())
+      // swap higher and lower 32 bit word
+      Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4);
+
     // Op=1, Cmode=1110.
     OpCmode = 0x1e;
     VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
@@ -6078,7 +6119,7 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
   TargetLowering::CallLoweringInfo CLI(DAG);
   CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
     .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), Callee,
-               &Args, 0)
+               std::move(Args), 0)
     .setDiscardResult();
 
   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
@@ -6213,6 +6254,10 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::FSINCOS:       return LowerFSINCOS(Op, DAG);
   case ISD::SDIVREM:
   case ISD::UDIVREM:       return LowerDivRem(Op, DAG);
+  case ISD::DYNAMIC_STACKALLOC:
+    if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
+      return LowerDYNAMIC_STACKALLOC(Op, DAG);
+    llvm_unreachable("Don't know how to custom lower this!");
   }
 }
 
@@ -7112,6 +7157,73 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI,
 }
 
 MachineBasicBlock *
+ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI,
+                                       MachineBasicBlock *MBB) const {
+  const TargetMachine &TM = getTargetMachine();
+  const TargetInstrInfo &TII = *TM.getInstrInfo();
+  DebugLoc DL = MI->getDebugLoc();
+
+  assert(Subtarget->isTargetWindows() &&
+         "__chkstk is only supported on Windows");
+  assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode");
+
+  // __chkstk takes the number of words to allocate on the stack in R4, and
+  // returns the stack adjustment in number of bytes in R4.  This will not
+  // clober any other registers (other than the obvious lr).
+  //
+  // Although, technically, IP should be considered a register which may be
+  // clobbered, the call itself will not touch it.  Windows on ARM is a pure
+  // thumb-2 environment, so there is no interworking required.  As a result, we
+  // do not expect a veneer to be emitted by the linker, clobbering IP.
+  //
+  // Each module receives its own copy of __chkstk, so no import thunk is
+  // required, again, ensuring that IP is not clobbered.
+  //
+  // Finally, although some linkers may theoretically provide a trampoline for
+  // out of range calls (which is quite common due to a 32M range limitation of
+  // branches for Thumb), we can generate the long-call version via
+  // -mcmodel=large, alleviating the need for the trampoline which may clobber
+  // IP.
+
+  switch (TM.getCodeModel()) {
+  case CodeModel::Small:
+  case CodeModel::Medium:
+  case CodeModel::Default:
+  case CodeModel::Kernel:
+    BuildMI(*MBB, MI, DL, TII.get(ARM::tBL))
+      .addImm((unsigned)ARMCC::AL).addReg(0)
+      .addExternalSymbol("__chkstk")
+      .addReg(ARM::R4, RegState::Implicit | RegState::Kill)
+      .addReg(ARM::R4, RegState::Implicit | RegState::Define)
+      .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead);
+    break;
+  case CodeModel::Large:
+  case CodeModel::JITDefault: {
+    MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+    unsigned Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
+
+    BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg)
+      .addExternalSymbol("__chkstk");
+    BuildMI(*MBB, MI, DL, TII.get(ARM::tBLXr))
+      .addImm((unsigned)ARMCC::AL).addReg(0)
+      .addReg(Reg, RegState::Kill)
+      .addReg(ARM::R4, RegState::Implicit | RegState::Kill)
+      .addReg(ARM::R4, RegState::Implicit | RegState::Define)
+      .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead);
+    break;
+  }
+  }
+
+  AddDefaultCC(AddDefaultPred(BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr),
+                                      ARM::SP)
+                              .addReg(ARM::SP, RegState::Define)
+                              .addReg(ARM::R4, RegState::Kill)));
+
+  MI->eraseFromParent();
+  return MBB;
+}
+
+MachineBasicBlock *
 ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
                                                MachineBasicBlock *BB) const {
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
@@ -7360,6 +7472,8 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   case ARM::COPY_STRUCT_BYVAL_I32:
     ++NumLoopByVals;
     return EmitStructByval(MI, BB);
+  case ARM::WIN__CHKSTK:
+    return EmitLowered__chkstk(MI, BB);
   }
 }
 
@@ -8315,6 +8429,8 @@ static SDValue PerformVMOVRRDCombine(SDNode *N,
                                  std::min(4U, LD->getAlignment() / 2));
 
     DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
+    if (DCI.DAG.getTargetLoweringInfo().isBigEndian())
+      std::swap (NewLD1, NewLD2);
     SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
     DCI.RemoveFromWorklist(LD);
     DAG.DeleteNode(LD);
@@ -8382,7 +8498,8 @@ static SDValue PerformSTORECombine(SDNode *N,
     SDLoc DL(St);
     SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
     SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
-    for (unsigned i = 0; i < NumElems; ++i) ShuffleVec[i] = i * SizeRatio;
+    for (unsigned i = 0; i < NumElems; ++i)
+      ShuffleVec[i] = TLI.isBigEndian() ? (i+1) * SizeRatio - 1 : i * SizeRatio;
 
     // Can't shuffle using an illegal type.
     if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
@@ -10471,13 +10588,39 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
   SDLoc dl(Op);
   TargetLowering::CallLoweringInfo CLI(DAG);
   CLI.setDebugLoc(dl).setChain(InChain)
-    .setCallee(getLibcallCallingConv(LC), RetTy, Callee, &Args, 0)
+    .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0)
     .setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned);
 
   std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
   return CallInfo.first;
 }
 
+SDValue
+ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
+  assert(Subtarget->isTargetWindows() && "unsupported target platform");
+  SDLoc DL(Op);
+
+  // Get the inputs.
+  SDValue Chain = Op.getOperand(0);
+  SDValue Size  = Op.getOperand(1);
+
+  SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size,
+                              DAG.getConstant(2, MVT::i32));
+
+  SDValue Flag;
+  Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Flag);
+  Flag = Chain.getValue(1);
+
+  SDVTList NodeTys = DAG.getVTList(MVT::i32, MVT::Glue);
+  Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Flag);
+
+  SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
+  Chain = NewSP.getValue(1);
+
+  SDValue Ops[2] = { NewSP, Chain };
+  return DAG.getMergeValues(Ops, DL);
+}
+
 bool
 ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
   // The ARM target isn't yet aware of offsets.
@@ -10635,14 +10778,20 @@ bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
 bool ARMTargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const {
   // Loads and stores less than 64-bits are already atomic; ones above that
   // are doomed anyway, so defer to the default libcall and blame the OS when
-  // things go wrong:
-  if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
-    return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 64;
-  else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
-    return LI->getType()->getPrimitiveSizeInBits() == 64;
-
-  // For the real atomic operations, we have ldrex/strex up to 64 bits.
-  return Inst->getType()->getPrimitiveSizeInBits() <= 64;
+  // things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
+  // anything for those.
+  bool IsMClass = Subtarget->isMClass();
+  if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+    unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
+    return Size == 64 && !IsMClass;
+  } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+    return LI->getType()->getPrimitiveSizeInBits() == 64 && !IsMClass;
+  }
+
+  // For the real atomic operations, we have ldrex/strex up to 32 bits,
+  // and up to 64 bits on the non-M profiles
+  unsigned AtomicLimit = IsMClass ? 32 : 64;
+  return Inst->getType()->getPrimitiveSizeInBits() <= AtomicLimit;
 }
 
 Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index c15305c..1ace0f3 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -95,6 +95,8 @@ namespace llvm {
 
       PRELOAD,      // Preload
 
+      WIN__CHKSTK,  // Windows' __chkstk call to do stack probing.
+
       VCEQ,         // Vector compare equal.
       VCEQZ,        // Vector compare equal to zero.
       VCGE,         // Vector compare greater than or equal.
@@ -470,6 +472,7 @@ namespace llvm {
                               const ARMSubtarget *ST) const;
     SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
 
     unsigned getRegisterByName(const char* RegName, EVT VT) const override;
 
@@ -578,6 +581,9 @@ namespace llvm {
 
     MachineBasicBlock *EmitStructByval(MachineInstr *MI,
                                        MachineBasicBlock *MBB) const;
+
+    MachineBasicBlock *EmitLowered__chkstk(MachineInstr *MI,
+                                           MachineBasicBlock *MBB) const;
   };
 
   enum NEONModImmType {
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 718d5da..2bb8976 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -270,8 +270,8 @@ def UseNaClTrap      : Predicate<"Subtarget->useNaClTrap()">,
 def DontUseNaClTrap  : Predicate<"!Subtarget->useNaClTrap()">;
 
 // FIXME: Eventually this will be just "hasV6T2Ops".
-def UseMovt          : Predicate<"Subtarget->useMovt()">;
-def DontUseMovt      : Predicate<"!Subtarget->useMovt()">;
+def UseMovt          : Predicate<"Subtarget->useMovt(*MF)">;
+def DontUseMovt      : Predicate<"!Subtarget->useMovt(*MF)">;
 def UseFPVMLx        : Predicate<"Subtarget->useFPVMLx()">;
 def UseMulOps        : Predicate<"Subtarget->useMulOps()">;
 
@@ -493,7 +493,7 @@ def neon_vcvt_imm32 : Operand<i32> {
 // rot_imm: An integer that encodes a rotate amount. Must be 8, 16, or 24.
 def rot_imm_XFORM: SDNodeXForm<imm, [{
   switch (N->getZExtValue()){
-  default: assert(0);
+  default: llvm_unreachable(nullptr);
   case 0:  return CurDAG->getTargetConstant(0, MVT::i32);
   case 8:  return CurDAG->getTargetConstant(1, MVT::i32);
   case 16: return CurDAG->getTargetConstant(2, MVT::i32);
@@ -594,7 +594,7 @@ def so_imm2part : PatLeaf<(imm), [{
 /// arm_i32imm - True for +V6T2, or true only if so_imm2part is true.
 ///
 def arm_i32imm : PatLeaf<(imm), [{
-  if (Subtarget->useMovt())
+  if (Subtarget->useMovt(*MF))
     return true;
   return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue());
 }]>;
@@ -3334,8 +3334,8 @@ def SBFX  : I<(outs GPRnopc:$Rd),
   let Inst{3-0}   = Rn;
 }
 
-def UBFX  : I<(outs GPR:$Rd),
-              (ins GPR:$Rn, imm0_31:$lsb, imm1_32:$width),
+def UBFX  : I<(outs GPRnopc:$Rd),
+              (ins GPRnopc:$Rn, imm0_31:$lsb, imm1_32:$width),
                AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
                "ubfx", "\t$Rd, $Rn, $lsb, $width", "", []>,
                Requires<[IsARM, HasV6T2]> {
@@ -4443,7 +4443,7 @@ def instsyncb_opt : Operand<i32> {
   let DecoderMethod = "DecodeInstSyncBarrierOption";
 }
 
-// memory barriers protect the atomic sequences
+// Memory barriers protect the atomic sequences
 let hasSideEffects = 1 in {
 def DMB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
                 "dmb", "\t$opt", [(int_arm_dmb (i32 imm0_15:$opt))]>,
@@ -4452,7 +4452,6 @@ def DMB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
   let Inst{31-4} = 0xf57ff05;
   let Inst{3-0} = opt;
 }
-}
 
 def DSB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
                 "dsb", "\t$opt", [(int_arm_dsb (i32 imm0_15:$opt))]>,
@@ -4464,12 +4463,13 @@ def DSB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
 
 // ISB has only full system option
 def ISB : AInoP<(outs), (ins instsyncb_opt:$opt), MiscFrm, NoItinerary,
-                "isb", "\t$opt", []>,
+                "isb", "\t$opt", [(int_arm_isb (i32 imm0_15:$opt))]>,
                 Requires<[IsARM, HasDB]> {
   bits<4> opt;
   let Inst{31-4} = 0xf57ff06;
   let Inst{3-0} = opt;
 }
+}
 
 let usesCustomInserter = 1, Defs = [CPSR] in {
 
@@ -5093,6 +5093,19 @@ def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask,  so_imm:$a), NoItinerary,
   let Inst{11-0} = a;
 }
 
+// Dynamic stack allocation yields a _chkstk for Windows targets.  These calls
+// are needed to probe the stack when allocating more than
+// 4k bytes in one go. Touching the stack at 4K increments is necessary to
+// ensure that the guard pages used by the OS virtual memory manager are
+// allocated in correct sequence.
+// The main point of having separate instruction are extra unmodelled effects
+// (compared to ordinary calls) like stack pointer change.
+
+def win__chkstk : SDNode<"ARMISD::WIN__CHKSTK", SDTNone,
+                      [SDNPHasChain, SDNPSideEffect]>;
+let usesCustomInserter = 1, Uses = [R4], Defs = [R4, SP] in
+  def WIN__CHKSTK : PseudoInst<(outs), (ins), NoItinerary, [(win__chkstk)]>;
+
 //===----------------------------------------------------------------------===//
 // TLS Instructions
 //
@@ -5100,9 +5113,11 @@ def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask,  so_imm:$a), NoItinerary,
 // __aeabi_read_tp preserves the registers r1-r3.
 // This is a pseudo inst so that we can get the encoding right,
 // complete with fixup for the aeabi_read_tp function.
+// TPsoft is valid for ARM mode only, in case of Thumb mode a tTPsoft pattern
+// is defined in "ARMInstrThumb.td".
 let isCall = 1,
   Defs = [R0, R12, LR, CPSR], Uses = [SP] in {
-  def TPsoft : PseudoInst<(outs), (ins), IIC_Br,
+  def TPsoft : ARMPseudoInst<(outs), (ins), 4, IIC_Br,
                [(set R0, ARMthread_pointer)]>, Sched<[WriteBr]>;
 }
 
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index b32b5d2..c02bb3b 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -6372,6 +6372,32 @@ multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy,
          dsub_0)>;
 }
 
+// The following class definition is basically a copy of the
+// Lengthen_HalfSingle definition above, however with an additional parameter
+// "RevLanes" to select the correct VREV32dXX instruction. This is to convert
+// data loaded by VLD1LN into proper vector format in big endian mode.
+multiclass Lengthen_HalfSingle_Big_Endian<string DestLanes, string DestTy, string SrcTy,
+                               string InsnLanes, string InsnTy, string RevLanes> {
+  def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+                   (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
+       (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
+         (!cast<Instruction>("VREV32d" # RevLanes)
+           (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+         dsub_0)>;
+  def _Z   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+                   (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
+       (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
+         (!cast<Instruction>("VREV32d" # RevLanes)
+           (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+         dsub_0)>;
+  def _S   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+                   (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
+       (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
+         (!cast<Instruction>("VREV32d" # RevLanes)
+           (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+         dsub_0)>;
+}
+
 // extload, zextload and sextload for a lengthening load followed by another
 // lengthening load, to quadruple the initial length.
 //
@@ -6406,6 +6432,36 @@ multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy,
              dsub_0))>;
 }
 
+// The following class definition is basically a copy of the
+// Lengthen_Double definition above, however with an additional parameter
+// "RevLanes" to select the correct VREV32dXX instruction. This is to convert
+// data loaded by VLD1LN into proper vector format in big endian mode.
+multiclass Lengthen_Double_Big_Endian<string DestLanes, string DestTy, string SrcTy,
+                           string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
+                           string Insn2Ty, string RevLanes> {
+  def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+                   (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
+         (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
+           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
+            (!cast<Instruction>("VREV32d" # RevLanes)
+             (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+             dsub_0))>;
+  def _Z   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+                   (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
+         (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
+           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
+            (!cast<Instruction>("VREV32d" # RevLanes)
+             (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+             dsub_0))>;
+  def _S   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+                   (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
+         (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
+           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
+            (!cast<Instruction>("VREV32d" # RevLanes)
+             (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+             dsub_0))>;
+}
+
 // extload, zextload and sextload for a lengthening load followed by another
 // lengthening load, to quadruple the initial length, but which ends up only
 // requiring half the available lanes (a 64-bit outcome instead of a 128-bit).
@@ -6443,33 +6499,102 @@ multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy,
           dsub_0)>;
 }
 
+// The following class definition is basically a copy of the
+// Lengthen_HalfDouble definition above, however with an additional VREV16d8
+// instruction to convert data loaded by VLD1LN into proper vector format
+// in big endian mode.
+multiclass Lengthen_HalfDouble_Big_Endian<string DestLanes, string DestTy, string SrcTy,
+                           string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
+                           string Insn2Ty> {
+  def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+                   (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)),
+         (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
+           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
+            (!cast<Instruction>("VREV16d8")
+             (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+             dsub_0)),
+          dsub_0)>;
+  def _Z   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+                   (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)),
+         (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
+           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
+            (!cast<Instruction>("VREV16d8")
+             (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+             dsub_0)),
+          dsub_0)>;
+  def _S   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+                   (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)),
+         (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
+           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
+            (!cast<Instruction>("VREV16d8")
+             (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+             dsub_0)),
+          dsub_0)>;
+}
+
 defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16
 defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32
 defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64
 
-defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16
-defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32
+let Predicates = [IsLE] in {
+  defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16
+  defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32
 
-// Double lengthening - v4i8 -> v4i16 -> v4i32
-defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">;
-// v2i8 -> v2i16 -> v2i32
-defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">;
-// v2i16 -> v2i32 -> v2i64
-defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">;
+  // Double lengthening - v4i8 -> v4i16 -> v4i32
+  defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">;
+  // v2i8 -> v2i16 -> v2i32
+  defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">;
+  // v2i16 -> v2i32 -> v2i64
+  defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">;
+}
+
+let Predicates = [IsBE] in {
+  defm : Lengthen_HalfSingle_Big_Endian<"4", "i16", "i8", "8", "i16", "8">; // v4i8 -> v4i16
+  defm : Lengthen_HalfSingle_Big_Endian<"2", "i32", "i16", "4", "i32", "16">; // v2i16 -> v2i32
+
+  // Double lengthening - v4i8 -> v4i16 -> v4i32
+  defm : Lengthen_Double_Big_Endian<"4", "i32", "i8", "8", "i16", "4", "i32", "8">;
+  // v2i8 -> v2i16 -> v2i32
+  defm : Lengthen_HalfDouble_Big_Endian<"2", "i32", "i8", "8", "i16", "4", "i32">;
+  // v2i16 -> v2i32 -> v2i64
+  defm : Lengthen_Double_Big_Endian<"2", "i64", "i16", "4", "i32", "2", "i64", "16">;
+}
 
 // Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64
-def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
-      (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
-         (VLD1LNd16 addrmode6:$addr,
-                    (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
-def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
-      (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
-         (VLD1LNd16 addrmode6:$addr,
-                    (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
-def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)),
-      (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
-         (VLD1LNd16 addrmode6:$addr,
-                    (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
+let Predicates = [IsLE] in {
+  def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
+        (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
+           (VLD1LNd16 addrmode6:$addr,
+                      (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
+  def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
+        (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
+           (VLD1LNd16 addrmode6:$addr,
+                      (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
+  def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)),
+        (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
+           (VLD1LNd16 addrmode6:$addr,
+                      (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
+}
+// The following patterns are basically a copy of the patterns above, 
+// however with an additional VREV16d instruction to convert data
+// loaded by VLD1LN into proper vector format in big endian mode.
+let Predicates = [IsBE] in {
+  def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
+        (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
+           (!cast<Instruction>("VREV16d8")
+             (VLD1LNd16 addrmode6:$addr,
+                        (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
+  def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
+        (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
+           (!cast<Instruction>("VREV16d8")
+             (VLD1LNd16 addrmode6:$addr,
+                        (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
+  def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)),
+        (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
+           (!cast<Instruction>("VREV16d8")
+             (VLD1LNd16 addrmode6:$addr,
+                        (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
+}
 
 //===----------------------------------------------------------------------===//
 // Assembler aliases
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index c30d6ab..85e9351 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -3209,27 +3209,28 @@ def t2MOVCCi32imm
 let hasSideEffects = 1 in {
 def t2DMB : T2I<(outs), (ins memb_opt:$opt), NoItinerary,
                 "dmb", "\t$opt", [(int_arm_dmb (i32 imm0_15:$opt))]>,
-                Requires<[HasDB]> {
+                Requires<[IsThumb, HasDB]> {
   bits<4> opt;
   let Inst{31-4} = 0xf3bf8f5;
   let Inst{3-0} = opt;
 }
-}
 
 def t2DSB : T2I<(outs), (ins memb_opt:$opt), NoItinerary,
                 "dsb", "\t$opt", [(int_arm_dsb (i32 imm0_15:$opt))]>,
-                Requires<[HasDB]> {
+                Requires<[IsThumb, HasDB]> {
   bits<4> opt;
   let Inst{31-4} = 0xf3bf8f4;
   let Inst{3-0} = opt;
 }
 
 def t2ISB : T2I<(outs), (ins instsyncb_opt:$opt), NoItinerary,
-                "isb", "\t$opt", []>, Requires<[HasDB]> {
+                "isb", "\t$opt", [(int_arm_isb (i32 imm0_15:$opt))]>,
+                Requires<[IsThumb, HasDB]> {
   bits<4> opt;
   let Inst{31-4} = 0xf3bf8f6;
   let Inst{3-0} = opt;
 }
+}
 
 class T2I_ldrex<bits<4> opcod, dag oops, dag iops, AddrMode am, int sz,
                 InstrItinClass itin, string opc, string asm, string cstr,
diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp
index 8821c2d..6d1114d 100644
--- a/lib/Target/ARM/ARMJITInfo.cpp
+++ b/lib/Target/ARM/ARMJITInfo.cpp
@@ -13,6 +13,7 @@
 
 #include "ARMJITInfo.h"
 #include "ARMConstantPoolValue.h"
+#include "ARMMachineFunctionInfo.h"
 #include "ARMRelocations.h"
 #include "MCTargetDesc/ARMBaseInfo.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
@@ -334,3 +335,10 @@ void ARMJITInfo::relocate(void *Function, MachineRelocation *MR,
     }
   }
 }
+
+void ARMJITInfo::Initialize(const MachineFunction &MF, bool isPIC) {
+  const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  ConstPoolId2AddrMap.resize(AFI->getNumPICLabels());
+  JumpTableId2AddrMap.resize(AFI->getNumJumpTables());
+  IsPIC = isPIC;
+}
diff --git a/lib/Target/ARM/ARMJITInfo.h b/lib/Target/ARM/ARMJITInfo.h
index ee4c863..27e2a20 100644
--- a/lib/Target/ARM/ARMJITInfo.h
+++ b/lib/Target/ARM/ARMJITInfo.h
@@ -14,7 +14,6 @@
 #ifndef ARMJITINFO_H
 #define ARMJITINFO_H
 
-#include "ARMMachineFunctionInfo.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
@@ -103,12 +102,7 @@ namespace llvm {
     /// Resize constant pool ids to CONSTPOOL_ENTRY addresses map; resize
     /// jump table ids to jump table bases map; remember if codegen relocation
     /// model is PIC.
-    void Initialize(const MachineFunction &MF, bool isPIC) {
-      const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-      ConstPoolId2AddrMap.resize(AFI->getNumPICLabels());
-      JumpTableId2AddrMap.resize(AFI->getNumJumpTables());
-      IsPIC = isPIC;
-    }
+    void Initialize(const MachineFunction &MF, bool isPIC);
 
     /// getConstantPoolEntryAddr - The ARM target puts all constant
     /// pool entries into constant islands. This returns the address of the
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index ee7df54..a03bcdb 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -505,7 +505,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
 
   // Exception: If the base register is in the input reglist, Thumb1 LDM is
   // non-writeback. Check for this.
-  if (Opcode == ARM::tLDRi && isThumb1)
+  if (Opcode == ARM::tLDMIA && isThumb1)
     for (unsigned I = 0; I < NumRegs; ++I)
       if (Base == Regs[I].first) {
         Writeback = false;
@@ -519,17 +519,17 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
       // Update tLDMIA with writeback if necessary.
       Opcode = ARM::tLDMIA_UPD;
 
-    // The base isn't dead after a merged instruction with writeback. Update
-    // future uses of the base with the added offset (if possible), or reset
-    // the base register as necessary.
-    if (!BaseKill)
-      UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg);
-
     MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
 
     // Thumb1: we might need to set base writeback when building the MI.
     MIB.addReg(Base, getDefRegState(true))
        .addReg(Base, getKillRegState(BaseKill));
+
+    // The base isn't dead after a merged instruction with writeback. Update
+    // future uses of the base with the added offset (if possible), or reset
+    // the base register as necessary.
+    if (!BaseKill)
+      UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg);
   } else {
     // No writeback, simply build the MachineInstr.
     MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
@@ -1734,6 +1734,12 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
   isThumb2 = AFI->isThumb2Function();
   isThumb1 = AFI->isThumbFunction() && !isThumb2;
 
+  // FIXME: Temporarily disabling for Thumb-1 due to miscompiles
+  if (isThumb1) {
+    delete RS;
+    return false;
+  }
+
   bool Modified = false;
   for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
        ++MFI) {
diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp
index 48141b1..023f5f8 100644
--- a/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/ARMMCInstLower.cpp
@@ -34,7 +34,7 @@ MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO,
                                    OutContext);
     switch (Option) {
     default: llvm_unreachable("Unknown target flag on symbol operand");
-    case 0:
+    case ARMII::MO_NO_FLAG:
       break;
     case ARMII::MO_LO16:
       Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/lib/Target/ARM/ARMMachineFunctionInfo.cpp
index af445e2..892b269 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.cpp
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.cpp
@@ -12,3 +12,13 @@
 using namespace llvm;
 
 void ARMFunctionInfo::anchor() { }
+
+ARMFunctionInfo::ARMFunctionInfo(MachineFunction &MF)
+    : isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
+      hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()),
+      StByValParamsPadding(0), ArgRegsSaveSize(0), HasStackFrame(false),
+      RestoreSPFromFP(false), LRSpilledForFarJump(false),
+      FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
+      GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), JumpTableUId(0),
+      PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false),
+      GlobalBaseReg(0) {}
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index d7ec6eb..44a9e34 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -130,16 +130,7 @@ public:
     JumpTableUId(0), PICLabelUId(0),
     VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {}
 
-  explicit ARMFunctionInfo(MachineFunction &MF) :
-    isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
-    hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()),
-    StByValParamsPadding(0),
-    ArgRegsSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
-    LRSpilledForFarJump(false),
-    FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
-    GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
-    JumpTableUId(0), PICLabelUId(0),
-    VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {}
+  explicit ARMFunctionInfo(MachineFunction &MF);
 
   bool isThumbFunction() const { return isThumb; }
   bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; }
@@ -220,7 +211,7 @@ public:
 
   void recordCPEClone(unsigned CPIdx, unsigned CPCloneIdx) {
     if (!CPEClones.insert(std::make_pair(CPCloneIdx, CPIdx)).second)
-      assert(0 && "Duplicate entries!");
+      llvm_unreachable("Duplicate entries!");
   }
 
   unsigned getOriginalCPIdx(unsigned CloneIdx) const {
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index 008ad64..3dcc0df 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -18,10 +18,8 @@ using namespace llvm;
 
 #define DEBUG_TYPE "arm-selectiondag-info"
 
-ARMSelectionDAGInfo::ARMSelectionDAGInfo(const TargetMachine &TM)
-  : TargetSelectionDAGInfo(TM),
-    Subtarget(&TM.getSubtarget<ARMSubtarget>()) {
-}
+ARMSelectionDAGInfo::ARMSelectionDAGInfo(const DataLayout &DL)
+    : TargetSelectionDAGInfo(&DL) {}
 
 ARMSelectionDAGInfo::~ARMSelectionDAGInfo() {
 }
@@ -34,6 +32,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
                                              bool isVolatile, bool AlwaysInline,
                                              MachinePointerInfo DstPtrInfo,
                                           MachinePointerInfo SrcPtrInfo) const {
+  const ARMSubtarget &Subtarget = DAG.getTarget().getSubtarget<ARMSubtarget>();
   // Do repeated 4-byte loads and stores. To be improved.
   // This requires 4-byte alignment.
   if ((Align & 3) != 0)
@@ -44,7 +43,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
   if (!ConstantSize)
     return SDValue();
   uint64_t SizeVal = ConstantSize->getZExtValue();
-  if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold())
+  if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold())
     return SDValue();
 
   unsigned BytesLeft = SizeVal & 3;
@@ -54,7 +53,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
   unsigned VTSize = 4;
   unsigned i = 0;
   // Emit a maximum of 4 loads in Thumb1 since we have fewer registers
-  const unsigned MAX_LOADS_IN_LDM = Subtarget->isThumb1Only() ? 4 : 6;
+  const unsigned MAX_LOADS_IN_LDM = Subtarget.isThumb1Only() ? 4 : 6;
   SDValue TFOps[6];
   SDValue Loads[6];
   uint64_t SrcOff = 0, DstOff = 0;
@@ -151,9 +150,10 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
                         SDValue Src, SDValue Size,
                         unsigned Align, bool isVolatile,
                         MachinePointerInfo DstPtrInfo) const {
+  const ARMSubtarget &Subtarget = DAG.getTarget().getSubtarget<ARMSubtarget>();
   // Use default for non-AAPCS (or MachO) subtargets
-  if (!Subtarget->isAAPCS_ABI() || Subtarget->isTargetMachO() ||
-      Subtarget->isTargetWindows())
+  if (!Subtarget.isAAPCS_ABI() || Subtarget.isTargetMachO() ||
+      Subtarget.isTargetWindows())
     return SDValue();
 
   const ARMTargetLowering &TLI =
@@ -191,7 +191,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
     .setCallee(TLI.getLibcallCallingConv(RTLIB::MEMSET),
                Type::getVoidTy(*DAG.getContext()),
                DAG.getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET),
-                                     TLI.getPointerTy()), &Args, 0)
+                                     TLI.getPointerTy()), std::move(Args), 0)
     .setDiscardResult();
 
   std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.h b/lib/Target/ARM/ARMSelectionDAGInfo.h
index 8c2397b..13769dc 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.h
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.h
@@ -36,12 +36,8 @@ namespace ARM_AM {
 }  // end namespace ARM_AM
 
 class ARMSelectionDAGInfo : public TargetSelectionDAGInfo {
-  /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
-  /// make the right decision when generating code for different targets.
-  const ARMSubtarget *Subtarget;
-
 public:
-  explicit ARMSelectionDAGInfo(const TargetMachine &TM);
+  explicit ARMSelectionDAGInfo(const DataLayout &DL);
   ~ARMSelectionDAGInfo();
 
   SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 5b204f6..0eb24ef 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -12,8 +12,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "ARMSubtarget.h"
-#include "ARMBaseInstrInfo.h"
-#include "ARMBaseRegisterInfo.h"
+#include "ARMFrameLowering.h"
+#include "ARMISelLowering.h"
+#include "ARMInstrInfo.h"
+#include "ARMJITInfo.h"
+#include "ARMSelectionDAGInfo.h"
+#include "ARMSubtarget.h"
+#include "Thumb1FrameLowering.h"
+#include "Thumb1InstrInfo.h"
+#include "Thumb2InstrInfo.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalValue.h"
@@ -76,22 +83,89 @@ IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT),
                          "Allow IT blocks based on ARMv7"),
               clEnumValEnd));
 
-ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
-                           const std::string &FS, bool IsLittle,
-                           const TargetOptions &Options)
-  : ARMGenSubtargetInfo(TT, CPU, FS)
-  , ARMProcFamily(Others)
-  , ARMProcClass(None)
-  , stackAlignment(4)
-  , CPUString(CPU)
-  , IsLittle(IsLittle)
-  , TargetTriple(TT)
-  , Options(Options)
-  , TargetABI(ARM_ABI_UNKNOWN) {
+static std::string computeDataLayout(ARMSubtarget &ST) {
+  std::string Ret = "";
+
+  if (ST.isLittle())
+    // Little endian.
+    Ret += "e";
+  else
+    // Big endian.
+    Ret += "E";
+
+  Ret += DataLayout::getManglingComponent(ST.getTargetTriple());
+
+  // Pointers are 32 bits and aligned to 32 bits.
+  Ret += "-p:32:32";
+
+  // On thumb, i16,i18 and i1 have natural aligment requirements, but we try to
+  // align to 32.
+  if (ST.isThumb())
+    Ret += "-i1:8:32-i8:8:32-i16:16:32";
+
+  // ABIs other than APCS have 64 bit integers with natural alignment.
+  if (!ST.isAPCS_ABI())
+    Ret += "-i64:64";
+
+  // We have 64 bits floats. The APCS ABI requires them to be aligned to 32
+  // bits, others to 64 bits. We always try to align to 64 bits.
+  if (ST.isAPCS_ABI())
+    Ret += "-f64:32:64";
+
+  // We have 128 and 64 bit vectors. The APCS ABI aligns them to 32 bits, others
+  // to 64. We always ty to give them natural alignment.
+  if (ST.isAPCS_ABI())
+    Ret += "-v64:32:64-v128:32:128";
+  else
+    Ret += "-v128:64:128";
+
+  // On thumb and APCS, only try to align aggregates to 32 bits (the default is
+  // 64 bits).
+  if (ST.isThumb() || ST.isAPCS_ABI())
+    Ret += "-a:0:32";
+
+  // Integer registers are 32 bits.
+  Ret += "-n32";
+
+  // The stack is 128 bit aligned on NaCl, 64 bit aligned on AAPCS and 32 bit
+  // aligned everywhere else.
+  if (ST.isTargetNaCl())
+    Ret += "-S128";
+  else if (ST.isAAPCS_ABI())
+    Ret += "-S64";
+  else
+    Ret += "-S32";
+
+  return Ret;
+}
+
+/// initializeSubtargetDependencies - Initializes using a CPU and feature string
+/// so that we can use initializer lists for subtarget initialization.
+ARMSubtarget &ARMSubtarget::initializeSubtargetDependencies(StringRef CPU,
+                                                            StringRef FS) {
   initializeEnvironment();
   resetSubtargetFeatures(CPU, FS);
+  return *this;
 }
 
+ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
+                           const std::string &FS, TargetMachine &TM,
+                           bool IsLittle, const TargetOptions &Options)
+    : ARMGenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others),
+      ARMProcClass(None), stackAlignment(4), CPUString(CPU), IsLittle(IsLittle),
+      TargetTriple(TT), Options(Options), TargetABI(ARM_ABI_UNKNOWN),
+      DL(computeDataLayout(initializeSubtargetDependencies(CPU, FS))),
+      TSInfo(DL), JITInfo(),
+      InstrInfo(isThumb1Only()
+                    ? (ARMBaseInstrInfo *)new Thumb1InstrInfo(*this)
+                    : !isThumb()
+                          ? (ARMBaseInstrInfo *)new ARMInstrInfo(*this)
+                          : (ARMBaseInstrInfo *)new Thumb2InstrInfo(*this)),
+      TLInfo(TM),
+      FrameLowering(!isThumb1Only()
+                        ? new ARMFrameLowering(*this)
+                        : (ARMFrameLowering *)new Thumb1FrameLowering(*this)) {}
+
 void ARMSubtarget::initializeEnvironment() {
   HasV4TOps = false;
   HasV5TOps = false;
@@ -106,7 +180,6 @@ void ARMSubtarget::initializeEnvironment() {
   HasVFPv4 = false;
   HasFPARMv8 = false;
   HasNEON = false;
-  MinSize = false;
   UseNEONForSinglePrecisionFP = false;
   UseMulOps = UseFusedMulOps;
   SlowFPVMLx = false;
@@ -158,9 +231,6 @@ void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) {
     initializeEnvironment();
     resetSubtargetFeatures(CPU, FS);
   }
-
-  MinSize =
-      FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
 }
 
 void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
@@ -353,6 +423,17 @@ bool ARMSubtarget::hasSinCos() const {
     !getTargetTriple().isOSVersionLT(7, 0);
 }
 
+// Enable the PostMachineScheduler if the target selects it instead of
+// PostRAScheduler. Currently only available on the command line via
+// -misched-postra.
+bool ARMSubtarget::enablePostMachineScheduler() const {
+  return PostRAScheduler;
+}
+
+bool ARMSubtarget::enableAtomicExpandLoadLinked() const {
+  return hasAnyDataBarrier() && !isThumb1Only();
+}
+
 bool ARMSubtarget::enablePostRAScheduler(
            CodeGenOpt::Level OptLevel,
            TargetSubtargetInfo::AntiDepBreakMode& Mode,
@@ -360,3 +441,12 @@ bool ARMSubtarget::enablePostRAScheduler(
   Mode = TargetSubtargetInfo::ANTIDEP_NONE;
   return PostRAScheduler && OptLevel >= CodeGenOpt::Default;
 }
+
+bool ARMSubtarget::useMovt(const MachineFunction &MF) const {
+  // NOTE Windows on ARM needs to use mov.w/mov.t pairs to materialise 32-bit
+  // immediates as it is inherently position independent, and may be out of
+  // range otherwise.
+  return UseMovt && (isTargetWindows() ||
+                     !MF.getFunction()->getAttributes().hasAttribute(
+                         AttributeSet::FunctionIndex, Attribute::MinSize));
+}
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 38536b2..8f6c165 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -14,8 +14,20 @@
 #ifndef ARMSUBTARGET_H
 #define ARMSUBTARGET_H
 
+
+#include "ARMFrameLowering.h"
+#include "ARMISelLowering.h"
+#include "ARMInstrInfo.h"
+#include "ARMJITInfo.h"
+#include "ARMSelectionDAGInfo.h"
+#include "ARMSubtarget.h"
+#include "Thumb1FrameLowering.h"
+#include "Thumb1InstrInfo.h"
+#include "Thumb2InstrInfo.h"
+#include "ARMJITInfo.h"
 #include "MCTargetDesc/ARMMCTargetDesc.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include <string>
@@ -64,10 +76,6 @@ protected:
   bool HasFPARMv8;
   bool HasNEON;
 
-  /// MinSize - True if the function being compiled has the "minsize" attribute
-  /// and should be optimised for size at the expense of speed.
-  bool MinSize;
-
   /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been
   /// specified. Use the method useNEONForSinglePrecisionFP() to
   /// determine if NEON should actually be used.
@@ -236,7 +244,7 @@ protected:
   /// of the specified triple.
   ///
   ARMSubtarget(const std::string &TT, const std::string &CPU,
-               const std::string &FS, bool IsLittle,
+               const std::string &FS, TargetMachine &TM, bool IsLittle,
                const TargetOptions &Options);
 
   /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
@@ -250,7 +258,31 @@ protected:
 
   /// \brief Reset the features for the ARM target.
   void resetSubtargetFeatures(const MachineFunction *MF) override;
+
+  /// initializeSubtargetDependencies - Initializes using a CPU and feature string
+  /// so that we can use initializer lists for subtarget initialization.
+  ARMSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
+
+  const DataLayout *getDataLayout() const { return &DL; }
+  const ARMSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
+  ARMJITInfo *getJITInfo() { return &JITInfo; }
+  const ARMBaseInstrInfo *getInstrInfo() const { return InstrInfo.get(); }
+  const ARMTargetLowering *getTargetLowering() const { return &TLInfo; }
+  const ARMFrameLowering *getFrameLowering() const { return FrameLowering.get(); }
+  const ARMBaseRegisterInfo *getRegisterInfo() const {
+    return &InstrInfo->getRegisterInfo();
+  }
+
 private:
+  const DataLayout DL;
+  ARMSelectionDAGInfo TSInfo;
+  ARMJITInfo JITInfo;
+  // Either Thumb1InstrInfo or Thumb2InstrInfo.
+  std::unique_ptr<ARMBaseInstrInfo> InstrInfo;
+  ARMTargetLowering   TLInfo;
+  // Either Thumb1FrameLowering or ARMFrameLowering.
+  std::unique_ptr<ARMFrameLowering> FrameLowering;
+
   void initializeEnvironment();
   void resetSubtargetFeatures(StringRef CPU, StringRef FS);
 public:
@@ -286,7 +318,6 @@ public:
   bool hasCrypto() const { return HasCrypto; }
   bool hasCRC() const { return HasCRC; }
   bool hasVirtualization() const { return HasVirtualization; }
-  bool isMinSize() const { return MinSize; }
   bool useNEONForSinglePrecisionFP() const {
     return hasNEON() && UseNEONForSinglePrecisionFP; }
 
@@ -382,7 +413,8 @@ public:
 
   bool isR9Reserved() const { return IsR9Reserved; }
 
-  bool useMovt() const { return UseMovt && !isMinSize(); }
+  bool useMovt(const MachineFunction &MF) const;
+
   bool supportsTailCall() const { return SupportsTailCall; }
 
   bool allowsUnalignedMem() const { return AllowsUnalignedMem; }
@@ -399,11 +431,17 @@ public:
   /// compiler runtime or math libraries.
   bool hasSinCos() const;
 
+  /// True for some subtargets at > -O0.
+  bool enablePostMachineScheduler() const;
+
   /// enablePostRAScheduler - True at 'More' optimization.
   bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
                              TargetSubtargetInfo::AntiDepBreakMode& Mode,
                              RegClassVector& CriticalPathRCs) const override;
 
+  // enableAtomicExpandLoadLinked - True if we need to expand our atomics.
+  bool enableAtomicExpandLoadLinked() const override;
+
   /// getInstrItins - Return the instruction itineraies based on subtarget
   /// selection.
   const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 8876227..d85194b 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -28,6 +28,12 @@ DisableA15SDOptimization("disable-a15-sd-optimization", cl::Hidden,
                    cl::desc("Inhibit optimization of S->D register accesses on A15"),
                    cl::init(false));
 
+static cl::opt<bool>
+EnableAtomicTidy("arm-atomic-cfg-tidy", cl::Hidden,
+                 cl::desc("Run SimplifyCFG after expanding atomic operations"
+                          " to make use of cmpxchg flow-based information"),
+                 cl::init(true));
+
 extern "C" void LLVMInitializeARMTarget() {
   // Register the target.
   RegisterTargetMachine<ARMLETargetMachine> X(TheARMLETarget);
@@ -43,12 +49,9 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT,
                                            StringRef CPU, StringRef FS,
                                            const TargetOptions &Options,
                                            Reloc::Model RM, CodeModel::Model CM,
-                                           CodeGenOpt::Level OL,
-                                           bool isLittle)
-  : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
-    Subtarget(TT, CPU, FS, isLittle, Options),
-    JITInfo(),
-    InstrItins(Subtarget.getInstrItineraryData()) {
+                                           CodeGenOpt::Level OL, bool isLittle)
+    : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+      Subtarget(TT, CPU, FS, *this, isLittle, Options) {
 
   // Default to triple-appropriate float ABI
   if (Options.FloatABIType == FloatABI::Default)
@@ -67,74 +70,11 @@ void ARMBaseTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
 
 void ARMTargetMachine::anchor() { }
 
-static std::string computeDataLayout(ARMSubtarget &ST) {
-  std::string Ret = "";
-
-  if (ST.isLittle())
-    // Little endian.
-    Ret += "e";
-  else
-    // Big endian.
-    Ret += "E";
-
-  Ret += DataLayout::getManglingComponent(ST.getTargetTriple());
-
-  // Pointers are 32 bits and aligned to 32 bits.
-  Ret += "-p:32:32";
-
-  // On thumb, i16,i18 and i1 have natural aligment requirements, but we try to
-  // align to 32.
-  if (ST.isThumb())
-    Ret += "-i1:8:32-i8:8:32-i16:16:32";
-
-  // ABIs other than APCS have 64 bit integers with natural alignment.
-  if (!ST.isAPCS_ABI())
-    Ret += "-i64:64";
-
-  // We have 64 bits floats. The APCS ABI requires them to be aligned to 32
-  // bits, others to 64 bits. We always try to align to 64 bits.
-  if (ST.isAPCS_ABI())
-    Ret += "-f64:32:64";
-
-  // We have 128 and 64 bit vectors. The APCS ABI aligns them to 32 bits, others
-  // to 64. We always ty to give them natural alignment.
-  if (ST.isAPCS_ABI())
-    Ret += "-v64:32:64-v128:32:128";
-  else
-    Ret += "-v128:64:128";
-
-  // On thumb and APCS, only try to align aggregates to 32 bits (the default is
-  // 64 bits).
-  if (ST.isThumb() || ST.isAPCS_ABI())
-    Ret += "-a:0:32";
-
-  // Integer registers are 32 bits.
-  Ret += "-n32";
-
-  // The stack is 128 bit aligned on NaCl, 64 bit aligned on AAPCS and 32 bit
-  // aligned everywhere else.
-  if (ST.isTargetNaCl())
-    Ret += "-S128";
-  else if (ST.isAAPCS_ABI())
-    Ret += "-S64";
-  else
-    Ret += "-S32";
-
-  return Ret;
-}
-
-ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
-                                   StringRef CPU, StringRef FS,
-                                   const TargetOptions &Options,
+ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, StringRef CPU,
+                                   StringRef FS, const TargetOptions &Options,
                                    Reloc::Model RM, CodeModel::Model CM,
-                                   CodeGenOpt::Level OL,
-                                   bool isLittle)
-  : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, isLittle),
-    InstrInfo(Subtarget),
-    DL(computeDataLayout(Subtarget)),
-    TLInfo(*this),
-    TSInfo(*this),
-    FrameLowering(Subtarget) {
+                                   CodeGenOpt::Level OL, bool isLittle)
+    : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, isLittle) {
   initAsmInfo();
   if (!Subtarget.hasARMOps())
     report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not "
@@ -143,21 +83,21 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
 
 void ARMLETargetMachine::anchor() { }
 
-ARMLETargetMachine::
-ARMLETargetMachine(const Target &T, StringRef TT,
-                       StringRef CPU, StringRef FS, const TargetOptions &Options,
-                       Reloc::Model RM, CodeModel::Model CM,
-                       CodeGenOpt::Level OL)
-  : ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
+ARMLETargetMachine::ARMLETargetMachine(const Target &T, StringRef TT,
+                                       StringRef CPU, StringRef FS,
+                                       const TargetOptions &Options,
+                                       Reloc::Model RM, CodeModel::Model CM,
+                                       CodeGenOpt::Level OL)
+    : ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
 
 void ARMBETargetMachine::anchor() { }
 
-ARMBETargetMachine::
-ARMBETargetMachine(const Target &T, StringRef TT,
-                       StringRef CPU, StringRef FS, const TargetOptions &Options,
-                       Reloc::Model RM, CodeModel::Model CM,
-                       CodeGenOpt::Level OL)
-  : ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
+ARMBETargetMachine::ARMBETargetMachine(const Target &T, StringRef TT,
+                                       StringRef CPU, StringRef FS,
+                                       const TargetOptions &Options,
+                                       Reloc::Model RM, CodeModel::Model CM,
+                                       CodeGenOpt::Level OL)
+    : ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
 
 void ThumbTargetMachine::anchor() { }
 
@@ -165,38 +105,29 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT,
                                        StringRef CPU, StringRef FS,
                                        const TargetOptions &Options,
                                        Reloc::Model RM, CodeModel::Model CM,
-                                       CodeGenOpt::Level OL,
-                                       bool isLittle)
-  : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, isLittle),
-    InstrInfo(Subtarget.hasThumb2()
-              ? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget))
-              : ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))),
-    DL(computeDataLayout(Subtarget)),
-    TLInfo(*this),
-    TSInfo(*this),
-    FrameLowering(Subtarget.hasThumb2()
-              ? new ARMFrameLowering(Subtarget)
-              : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) {
+                                       CodeGenOpt::Level OL, bool isLittle)
+    : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL,
+                           isLittle) {
   initAsmInfo();
 }
 
 void ThumbLETargetMachine::anchor() { }
 
-ThumbLETargetMachine::
-ThumbLETargetMachine(const Target &T, StringRef TT,
-                       StringRef CPU, StringRef FS, const TargetOptions &Options,
-                       Reloc::Model RM, CodeModel::Model CM,
-                       CodeGenOpt::Level OL)
-  : ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
+ThumbLETargetMachine::ThumbLETargetMachine(const Target &T, StringRef TT,
+                                           StringRef CPU, StringRef FS,
+                                           const TargetOptions &Options,
+                                           Reloc::Model RM, CodeModel::Model CM,
+                                           CodeGenOpt::Level OL)
+    : ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
 
 void ThumbBETargetMachine::anchor() { }
 
-ThumbBETargetMachine::
-ThumbBETargetMachine(const Target &T, StringRef TT,
-                       StringRef CPU, StringRef FS, const TargetOptions &Options,
-                       Reloc::Model RM, CodeModel::Model CM,
-                       CodeGenOpt::Level OL)
-  : ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
+ThumbBETargetMachine::ThumbBETargetMachine(const Target &T, StringRef TT,
+                                           StringRef CPU, StringRef FS,
+                                           const TargetOptions &Options,
+                                           Reloc::Model RM, CodeModel::Model CM,
+                                           CodeGenOpt::Level OL)
+    : ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
 
 namespace {
 /// ARM Code Generator Pass Configuration Options.
@@ -213,6 +144,7 @@ public:
     return *getARMTargetMachine().getSubtargetImpl();
   }
 
+  void addIRPasses() override;
   bool addPreISel() override;
   bool addInstSelector() override;
   bool addPreRegAlloc() override;
@@ -225,11 +157,21 @@ TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) {
   return new ARMPassConfig(this, PM);
 }
 
-bool ARMPassConfig::addPreISel() {
+void ARMPassConfig::addIRPasses() {
+  addPass(createAtomicExpandLoadLinkedPass(TM));
+
+  // Cmpxchg instructions are often used with a subsequent comparison to
+  // determine whether it succeeded. We can exploit existing control-flow in
+  // ldrex/strex loops to simplify this, but it needs tidying up.
   const ARMSubtarget *Subtarget = &getARMSubtarget();
   if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only())
-    addPass(createAtomicExpandLoadLinkedPass(TM));
+    if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
+      addPass(createCFGSimplificationPass());
 
+  TargetPassConfig::addIRPasses();
+}
+
+bool ARMPassConfig::addPreISel() {
   if (TM->getOptLevel() != CodeGenOpt::None)
     addPass(createGlobalMergePass(TM));
 
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index 664c992..b72b1df 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -14,17 +14,9 @@
 #ifndef ARMTARGETMACHINE_H
 #define ARMTARGETMACHINE_H
 
-#include "ARMFrameLowering.h"
-#include "ARMISelLowering.h"
 #include "ARMInstrInfo.h"
-#include "ARMJITInfo.h"
-#include "ARMSelectionDAGInfo.h"
 #include "ARMSubtarget.h"
-#include "Thumb1FrameLowering.h"
-#include "Thumb1InstrInfo.h"
-#include "Thumb2InstrInfo.h"
 #include "llvm/IR/DataLayout.h"
-#include "llvm/MC/MCStreamer.h"
 #include "llvm/Target/TargetMachine.h"
 
 namespace llvm {
@@ -32,10 +24,6 @@ namespace llvm {
 class ARMBaseTargetMachine : public LLVMTargetMachine {
 protected:
   ARMSubtarget        Subtarget;
-private:
-  ARMJITInfo          JITInfo;
-  InstrItineraryData  InstrItins;
-
 public:
   ARMBaseTargetMachine(const Target &T, StringRef TT,
                        StringRef CPU, StringRef FS,
@@ -44,15 +32,29 @@ public:
                        CodeGenOpt::Level OL,
                        bool isLittle);
 
-  ARMJITInfo *getJITInfo() override { return &JITInfo; }
   const ARMSubtarget *getSubtargetImpl() const override { return &Subtarget; }
+  const ARMBaseRegisterInfo *getRegisterInfo() const override {
+    return getSubtargetImpl()->getRegisterInfo();
+  }
   const ARMTargetLowering *getTargetLowering() const override {
-    // Implemented by derived classes
-    llvm_unreachable("getTargetLowering not implemented");
+    return getSubtargetImpl()->getTargetLowering();
+  }
+  const ARMSelectionDAGInfo *getSelectionDAGInfo() const override {
+    return getSubtargetImpl()->getSelectionDAGInfo();
+  }
+  const ARMBaseInstrInfo *getInstrInfo() const override {
+    return getSubtargetImpl()->getInstrInfo();
+  }
+  const ARMFrameLowering *getFrameLowering() const override {
+    return getSubtargetImpl()->getFrameLowering();
   }
   const InstrItineraryData *getInstrItineraryData() const override {
-    return &InstrItins;
+    return &getSubtargetImpl()->getInstrItineraryData();
   }
+  const DataLayout *getDataLayout() const override {
+    return getSubtargetImpl()->getDataLayout();
+  }
+  ARMJITInfo *getJITInfo() override { return Subtarget.getJITInfo(); }
 
   /// \brief Register ARM analysis passes with a pass manager.
   void addAnalysisPasses(PassManagerBase &PM) override;
@@ -67,35 +69,10 @@ public:
 ///
 class ARMTargetMachine : public ARMBaseTargetMachine {
   virtual void anchor();
-  ARMInstrInfo        InstrInfo;
-  const DataLayout    DL;       // Calculates type size & alignment
-  ARMTargetLowering   TLInfo;
-  ARMSelectionDAGInfo TSInfo;
-  ARMFrameLowering    FrameLowering;
  public:
-  ARMTargetMachine(const Target &T, StringRef TT,
-                   StringRef CPU, StringRef FS,
-                   const TargetOptions &Options,
-                   Reloc::Model RM, CodeModel::Model CM,
-                   CodeGenOpt::Level OL,
-                   bool isLittle);
-
-  const ARMRegisterInfo *getRegisterInfo() const override {
-    return &InstrInfo.getRegisterInfo();
-  }
-
-  const ARMTargetLowering *getTargetLowering() const override {
-    return &TLInfo;
-  }
-
-  const ARMSelectionDAGInfo *getSelectionDAGInfo() const override {
-    return &TSInfo;
-  }
-  const ARMFrameLowering *getFrameLowering() const override {
-    return &FrameLowering;
-  }
-  const ARMInstrInfo *getInstrInfo() const override { return &InstrInfo; }
-  const DataLayout *getDataLayout() const override { return &DL; }
+   ARMTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS,
+                    const TargetOptions &Options, Reloc::Model RM,
+                    CodeModel::Model CM, CodeGenOpt::Level OL, bool isLittle);
 };
 
 /// ARMLETargetMachine - ARM little endian target machine.
@@ -114,10 +91,9 @@ public:
 class ARMBETargetMachine : public ARMTargetMachine {
   void anchor() override;
 public:
-  ARMBETargetMachine(const Target &T, StringRef TT,
-                     StringRef CPU, StringRef FS, const TargetOptions &Options,
-                     Reloc::Model RM, CodeModel::Model CM,
-                     CodeGenOpt::Level OL);
+  ARMBETargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS,
+                     const TargetOptions &Options, Reloc::Model RM,
+                     CodeModel::Model CM, CodeGenOpt::Level OL);
 };
 
 /// ThumbTargetMachine - Thumb target machine.
@@ -126,43 +102,10 @@ public:
 ///
 class ThumbTargetMachine : public ARMBaseTargetMachine {
   virtual void anchor();
-  // Either Thumb1InstrInfo or Thumb2InstrInfo.
-  std::unique_ptr<ARMBaseInstrInfo> InstrInfo;
-  const DataLayout    DL;   // Calculates type size & alignment
-  ARMTargetLowering   TLInfo;
-  ARMSelectionDAGInfo TSInfo;
-  // Either Thumb1FrameLowering or ARMFrameLowering.
-  std::unique_ptr<ARMFrameLowering> FrameLowering;
 public:
-  ThumbTargetMachine(const Target &T, StringRef TT,
-                     StringRef CPU, StringRef FS,
-                     const TargetOptions &Options,
-                     Reloc::Model RM, CodeModel::Model CM,
-                     CodeGenOpt::Level OL,
-                     bool isLittle);
-
-  /// returns either Thumb1RegisterInfo or Thumb2RegisterInfo
-  const ARMBaseRegisterInfo *getRegisterInfo() const override {
-    return &InstrInfo->getRegisterInfo();
-  }
-
-  const ARMTargetLowering *getTargetLowering() const override {
-    return &TLInfo;
-  }
-
-  const ARMSelectionDAGInfo *getSelectionDAGInfo() const override {
-    return &TSInfo;
-  }
-
-  /// returns either Thumb1InstrInfo or Thumb2InstrInfo
-  const ARMBaseInstrInfo *getInstrInfo() const override {
-    return InstrInfo.get();
-  }
-  /// returns either Thumb1FrameLowering or ARMFrameLowering
-  const ARMFrameLowering *getFrameLowering() const override {
-    return FrameLowering.get();
-  }
-  const DataLayout *getDataLayout() const override { return &DL; }
+  ThumbTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS,
+                     const TargetOptions &Options, Reloc::Model RM,
+                     CodeModel::Model CM, CodeGenOpt::Level OL, bool isLittle);
 };
 
 /// ThumbLETargetMachine - Thumb little endian target machine.
@@ -170,10 +113,10 @@ public:
 class ThumbLETargetMachine : public ThumbTargetMachine {
   void anchor() override;
 public:
-  ThumbLETargetMachine(const Target &T, StringRef TT,
-                     StringRef CPU, StringRef FS, const TargetOptions &Options,
-                     Reloc::Model RM, CodeModel::Model CM,
-                     CodeGenOpt::Level OL);
+  ThumbLETargetMachine(const Target &T, StringRef TT, StringRef CPU,
+                       StringRef FS, const TargetOptions &Options,
+                       Reloc::Model RM, CodeModel::Model CM,
+                       CodeGenOpt::Level OL);
 };
 
 /// ThumbBETargetMachine - Thumb big endian target machine.
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 57df7da..a2ace62 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -443,31 +443,58 @@ unsigned ARMTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
 
 unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
                                 Type *SubTp) const {
-  // We only handle costs of reverse shuffles for now.
-  if (Kind != SK_Reverse)
+  // We only handle costs of reverse and alternate shuffles for now.
+  if (Kind != SK_Reverse && Kind != SK_Alternate)
     return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
 
-  static const CostTblEntry<MVT::SimpleValueType> NEONShuffleTbl[] = {
-    // Reverse shuffle cost one instruction if we are shuffling within a double
-    // word (vrev) or two if we shuffle a quad word (vrev, vext).
-    { ISD::VECTOR_SHUFFLE, MVT::v2i32, 1 },
-    { ISD::VECTOR_SHUFFLE, MVT::v2f32, 1 },
-    { ISD::VECTOR_SHUFFLE, MVT::v2i64, 1 },
-    { ISD::VECTOR_SHUFFLE, MVT::v2f64, 1 },
-
-    { ISD::VECTOR_SHUFFLE, MVT::v4i32, 2 },
-    { ISD::VECTOR_SHUFFLE, MVT::v4f32, 2 },
-    { ISD::VECTOR_SHUFFLE, MVT::v8i16, 2 },
-    { ISD::VECTOR_SHUFFLE, MVT::v16i8, 2 }
-  };
+  if (Kind == SK_Reverse) {
+    static const CostTblEntry<MVT::SimpleValueType> NEONShuffleTbl[] = {
+        // Reverse shuffle cost one instruction if we are shuffling within a
+        // double word (vrev) or two if we shuffle a quad word (vrev, vext).
+        {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
+        {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
+        {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
+        {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
 
-  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+        {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
+        {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
+        {ISD::VECTOR_SHUFFLE, MVT::v8i16, 2},
+        {ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}};
 
-  int Idx = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
-  if (Idx == -1)
-    return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+
+    int Idx = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
+    if (Idx == -1)
+      return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
 
-  return LT.first * NEONShuffleTbl[Idx].Cost;
+    return LT.first * NEONShuffleTbl[Idx].Cost;
+  }
+  if (Kind == SK_Alternate) {
+    static const CostTblEntry<MVT::SimpleValueType> NEONAltShuffleTbl[] = {
+        // Alt shuffle cost table for ARM. Cost is the number of instructions
+        // required to create the shuffled vector.
+
+        {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
+        {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
+        {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
+        {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
+
+        {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
+        {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
+        {ISD::VECTOR_SHUFFLE, MVT::v4i16, 2},
+
+        {ISD::VECTOR_SHUFFLE, MVT::v8i16, 16},
+
+        {ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}};
+
+    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+    int Idx =
+        CostTableLookup(NEONAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
+    if (Idx == -1)
+      return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+    return LT.first * NEONAltShuffleTbl[Idx].Cost;
+  }
+  return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
 }
 
 unsigned ARMTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 5cdf394..b62706c 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -190,11 +190,11 @@ class ARMAsmParser : public MCTargetAsmParser {
   }
 
   int tryParseRegister();
-  bool tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &);
-  int tryParseShiftRegister(SmallVectorImpl<MCParsedAsmOperand*> &);
-  bool parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &);
-  bool parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &);
-  bool parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &, StringRef Mnemonic);
+  bool tryParseRegisterWithWriteBack(OperandVector &);
+  int tryParseShiftRegister(OperandVector &);
+  bool parseRegisterList(OperandVector &);
+  bool parseMemory(OperandVector &);
+  bool parseOperand(OperandVector &, StringRef Mnemonic);
   bool parsePrefix(ARMMCExpr::VariantKind &RefKind);
   bool parseMemRegOffsetShift(ARM_AM::ShiftOpc &ShiftType,
                               unsigned &ShiftAmount);
@@ -282,54 +282,42 @@ class ARMAsmParser : public MCTargetAsmParser {
 
   /// }
 
-  OperandMatchResultTy parseITCondCode(SmallVectorImpl<MCParsedAsmOperand*>&);
-  OperandMatchResultTy parseCoprocNumOperand(
-    SmallVectorImpl<MCParsedAsmOperand*>&);
-  OperandMatchResultTy parseCoprocRegOperand(
-    SmallVectorImpl<MCParsedAsmOperand*>&);
-  OperandMatchResultTy parseCoprocOptionOperand(
-    SmallVectorImpl<MCParsedAsmOperand*>&);
-  OperandMatchResultTy parseMemBarrierOptOperand(
-    SmallVectorImpl<MCParsedAsmOperand*>&);
-  OperandMatchResultTy parseInstSyncBarrierOptOperand(
-    SmallVectorImpl<MCParsedAsmOperand*>&);
-  OperandMatchResultTy parseProcIFlagsOperand(
-    SmallVectorImpl<MCParsedAsmOperand*>&);
-  OperandMatchResultTy parseMSRMaskOperand(
-    SmallVectorImpl<MCParsedAsmOperand*>&);
-  OperandMatchResultTy parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &O,
-                                   StringRef Op, int Low, int High);
-  OperandMatchResultTy parsePKHLSLImm(SmallVectorImpl<MCParsedAsmOperand*> &O) {
+  OperandMatchResultTy parseITCondCode(OperandVector &);
+  OperandMatchResultTy parseCoprocNumOperand(OperandVector &);
+  OperandMatchResultTy parseCoprocRegOperand(OperandVector &);
+  OperandMatchResultTy parseCoprocOptionOperand(OperandVector &);
+  OperandMatchResultTy parseMemBarrierOptOperand(OperandVector &);
+  OperandMatchResultTy parseInstSyncBarrierOptOperand(OperandVector &);
+  OperandMatchResultTy parseProcIFlagsOperand(OperandVector &);
+  OperandMatchResultTy parseMSRMaskOperand(OperandVector &);
+  OperandMatchResultTy parsePKHImm(OperandVector &O, StringRef Op, int Low,
+                                   int High);
+  OperandMatchResultTy parsePKHLSLImm(OperandVector &O) {
     return parsePKHImm(O, "lsl", 0, 31);
   }
-  OperandMatchResultTy parsePKHASRImm(SmallVectorImpl<MCParsedAsmOperand*> &O) {
+  OperandMatchResultTy parsePKHASRImm(OperandVector &O) {
     return parsePKHImm(O, "asr", 1, 32);
   }
-  OperandMatchResultTy parseSetEndImm(SmallVectorImpl<MCParsedAsmOperand*>&);
-  OperandMatchResultTy parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*>&);
-  OperandMatchResultTy parseRotImm(SmallVectorImpl<MCParsedAsmOperand*>&);
-  OperandMatchResultTy parseBitfield(SmallVectorImpl<MCParsedAsmOperand*>&);
-  OperandMatchResultTy parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*>&);
-  OperandMatchResultTy parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*>&);
-  OperandMatchResultTy parseFPImm(SmallVectorImpl<MCParsedAsmOperand*>&);
-  OperandMatchResultTy parseVectorList(SmallVectorImpl<MCParsedAsmOperand*>&);
+  OperandMatchResultTy parseSetEndImm(OperandVector &);
+  OperandMatchResultTy parseShifterImm(OperandVector &);
+  OperandMatchResultTy parseRotImm(OperandVector &);
+  OperandMatchResultTy parseBitfield(OperandVector &);
+  OperandMatchResultTy parsePostIdxReg(OperandVector &);
+  OperandMatchResultTy parseAM3Offset(OperandVector &);
+  OperandMatchResultTy parseFPImm(OperandVector &);
+  OperandMatchResultTy parseVectorList(OperandVector &);
   OperandMatchResultTy parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index,
                                        SMLoc &EndLoc);
 
   // Asm Match Converter Methods
-  void cvtThumbMultiply(MCInst &Inst,
-                        const SmallVectorImpl<MCParsedAsmOperand*> &);
-  void cvtThumbBranches(MCInst &Inst,
-                        const SmallVectorImpl<MCParsedAsmOperand*> &);
-
-  bool validateInstruction(MCInst &Inst,
-                           const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
-  bool processInstruction(MCInst &Inst,
-                          const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
-  bool shouldOmitCCOutOperand(StringRef Mnemonic,
-                              SmallVectorImpl<MCParsedAsmOperand*> &Operands);
-  bool shouldOmitPredicateOperand(StringRef Mnemonic,
-                              SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+  void cvtThumbMultiply(MCInst &Inst, const OperandVector &);
+  void cvtThumbBranches(MCInst &Inst, const OperandVector &);
+
+  bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
+  bool processInstruction(MCInst &Inst, const OperandVector &Ops);
+  bool shouldOmitCCOutOperand(StringRef Mnemonic, OperandVector &Operands);
+  bool shouldOmitPredicateOperand(StringRef Mnemonic, OperandVector &Operands);
+
 public:
   enum ARMMatchResultTy {
     Match_RequiresITBlock = FIRST_TARGET_MATCH_RESULT_TY,
@@ -361,19 +349,17 @@ public:
 
   // Implementation of the MCTargetAsmParser interface:
   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
-  bool
-  ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
-                   SMLoc NameLoc,
-                   SmallVectorImpl<MCParsedAsmOperand*> &Operands) override;
+  bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+                        SMLoc NameLoc, OperandVector &Operands) override;
   bool ParseDirective(AsmToken DirectiveID) override;
 
-  unsigned validateTargetOperandClass(MCParsedAsmOperand *Op,
+  unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
                                       unsigned Kind) override;
   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
 
   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
-                               SmallVectorImpl<MCParsedAsmOperand*> &Operands,
-                               MCStreamer &Out, unsigned &ErrorInfo,
+                               OperandVector &Operands, MCStreamer &Out,
+                               unsigned &ErrorInfo,
                                bool MatchingInlineAsm) override;
   void onLabelParsed(MCSymbol *Symbol) override;
 };
@@ -545,8 +531,8 @@ class ARMOperand : public MCParsedAsmOperand {
     struct BitfieldOp Bitfield;
   };
 
-  ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
 public:
+  ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
   ARMOperand(const ARMOperand &o) : MCParsedAsmOperand() {
     Kind = o.Kind;
     StartLoc = o.StartLoc;
@@ -2481,56 +2467,58 @@ public:
 
   void print(raw_ostream &OS) const override;
 
-  static ARMOperand *CreateITMask(unsigned Mask, SMLoc S) {
-    ARMOperand *Op = new ARMOperand(k_ITCondMask);
+  static std::unique_ptr<ARMOperand> CreateITMask(unsigned Mask, SMLoc S) {
+    auto Op = make_unique<ARMOperand>(k_ITCondMask);
     Op->ITMask.Mask = Mask;
     Op->StartLoc = S;
     Op->EndLoc = S;
     return Op;
   }
 
-  static ARMOperand *CreateCondCode(ARMCC::CondCodes CC, SMLoc S) {
-    ARMOperand *Op = new ARMOperand(k_CondCode);
+  static std::unique_ptr<ARMOperand> CreateCondCode(ARMCC::CondCodes CC,
+                                                    SMLoc S) {
+    auto Op = make_unique<ARMOperand>(k_CondCode);
     Op->CC.Val = CC;
     Op->StartLoc = S;
     Op->EndLoc = S;
     return Op;
   }
 
-  static ARMOperand *CreateCoprocNum(unsigned CopVal, SMLoc S) {
-    ARMOperand *Op = new ARMOperand(k_CoprocNum);
+  static std::unique_ptr<ARMOperand> CreateCoprocNum(unsigned CopVal, SMLoc S) {
+    auto Op = make_unique<ARMOperand>(k_CoprocNum);
     Op->Cop.Val = CopVal;
     Op->StartLoc = S;
     Op->EndLoc = S;
     return Op;
   }
 
-  static ARMOperand *CreateCoprocReg(unsigned CopVal, SMLoc S) {
-    ARMOperand *Op = new ARMOperand(k_CoprocReg);
+  static std::unique_ptr<ARMOperand> CreateCoprocReg(unsigned CopVal, SMLoc S) {
+    auto Op = make_unique<ARMOperand>(k_CoprocReg);
     Op->Cop.Val = CopVal;
     Op->StartLoc = S;
     Op->EndLoc = S;
     return Op;
   }
 
-  static ARMOperand *CreateCoprocOption(unsigned Val, SMLoc S, SMLoc E) {
-    ARMOperand *Op = new ARMOperand(k_CoprocOption);
+  static std::unique_ptr<ARMOperand> CreateCoprocOption(unsigned Val, SMLoc S,
+                                                        SMLoc E) {
+    auto Op = make_unique<ARMOperand>(k_CoprocOption);
     Op->Cop.Val = Val;
     Op->StartLoc = S;
     Op->EndLoc = E;
     return Op;
   }
 
-  static ARMOperand *CreateCCOut(unsigned RegNum, SMLoc S) {
-    ARMOperand *Op = new ARMOperand(k_CCOut);
+  static std::unique_ptr<ARMOperand> CreateCCOut(unsigned RegNum, SMLoc S) {
+    auto Op = make_unique<ARMOperand>(k_CCOut);
     Op->Reg.RegNum = RegNum;
     Op->StartLoc = S;
     Op->EndLoc = S;
     return Op;
   }
 
-  static ARMOperand *CreateToken(StringRef Str, SMLoc S) {
-    ARMOperand *Op = new ARMOperand(k_Token);
+  static std::unique_ptr<ARMOperand> CreateToken(StringRef Str, SMLoc S) {
+    auto Op = make_unique<ARMOperand>(k_Token);
     Op->Tok.Data = Str.data();
     Op->Tok.Length = Str.size();
     Op->StartLoc = S;
@@ -2538,20 +2526,20 @@ public:
     return Op;
   }
 
-  static ARMOperand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) {
-    ARMOperand *Op = new ARMOperand(k_Register);
+  static std::unique_ptr<ARMOperand> CreateReg(unsigned RegNum, SMLoc S,
+                                               SMLoc E) {
+    auto Op = make_unique<ARMOperand>(k_Register);
     Op->Reg.RegNum = RegNum;
     Op->StartLoc = S;
     Op->EndLoc = E;
     return Op;
   }
 
-  static ARMOperand *CreateShiftedRegister(ARM_AM::ShiftOpc ShTy,
-                                           unsigned SrcReg,
-                                           unsigned ShiftReg,
-                                           unsigned ShiftImm,
-                                           SMLoc S, SMLoc E) {
-    ARMOperand *Op = new ARMOperand(k_ShiftedRegister);
+  static std::unique_ptr<ARMOperand>
+  CreateShiftedRegister(ARM_AM::ShiftOpc ShTy, unsigned SrcReg,
+                        unsigned ShiftReg, unsigned ShiftImm, SMLoc S,
+                        SMLoc E) {
+    auto Op = make_unique<ARMOperand>(k_ShiftedRegister);
     Op->RegShiftedReg.ShiftTy = ShTy;
     Op->RegShiftedReg.SrcReg = SrcReg;
     Op->RegShiftedReg.ShiftReg = ShiftReg;
@@ -2561,11 +2549,10 @@ public:
     return Op;
   }
 
-  static ARMOperand *CreateShiftedImmediate(ARM_AM::ShiftOpc ShTy,
-                                            unsigned SrcReg,
-                                            unsigned ShiftImm,
-                                            SMLoc S, SMLoc E) {
-    ARMOperand *Op = new ARMOperand(k_ShiftedImmediate);
+  static std::unique_ptr<ARMOperand>
+  CreateShiftedImmediate(ARM_AM::ShiftOpc ShTy, unsigned SrcReg,
+                         unsigned ShiftImm, SMLoc S, SMLoc E) {
+    auto Op = make_unique<ARMOperand>(k_ShiftedImmediate);
     Op->RegShiftedImm.ShiftTy = ShTy;
     Op->RegShiftedImm.SrcReg = SrcReg;
     Op->RegShiftedImm.ShiftImm = ShiftImm;
@@ -2574,9 +2561,9 @@ public:
     return Op;
   }
 
-  static ARMOperand *CreateShifterImm(bool isASR, unsigned Imm,
-                                   SMLoc S, SMLoc E) {
-    ARMOperand *Op = new ARMOperand(k_ShifterImmediate);
+  static std::unique_ptr<ARMOperand> CreateShifterImm(bool isASR, unsigned Imm,
+                                                      SMLoc S, SMLoc E) {
+    auto Op = make_unique<ARMOperand>(k_ShifterImmediate);
     Op->ShifterImm.isASR = isASR;
     Op->ShifterImm.Imm = Imm;
     Op->StartLoc = S;
@@ -2584,17 +2571,18 @@ public:
     return Op;
   }
 
-  static ARMOperand *CreateRotImm(unsigned Imm, SMLoc S, SMLoc E) {
-    ARMOperand *Op = new ARMOperand(k_RotateImmediate);
+  static std::unique_ptr<ARMOperand> CreateRotImm(unsigned Imm, SMLoc S,
+                                                  SMLoc E) {
+    auto Op = make_unique<ARMOperand>(k_RotateImmediate);
     Op->RotImm.Imm = Imm;
     Op->StartLoc = S;
     Op->EndLoc = E;
     return Op;
   }
 
-  static ARMOperand *CreateBitfield(unsigned LSB, unsigned Width,
-                                    SMLoc S, SMLoc E) {
-    ARMOperand *Op = new ARMOperand(k_BitfieldDescriptor);
+  static std::unique_ptr<ARMOperand>
+  CreateBitfield(unsigned LSB, unsigned Width, SMLoc S, SMLoc E) {
+    auto Op = make_unique<ARMOperand>(k_BitfieldDescriptor);
     Op->Bitfield.LSB = LSB;
     Op->Bitfield.Width = Width;
     Op->StartLoc = S;
@@ -2602,8 +2590,8 @@ public:
     return Op;
   }
 
-  static ARMOperand *
-  CreateRegList(SmallVectorImpl<std::pair<unsigned, unsigned> > &Regs,
+  static std::unique_ptr<ARMOperand>
+  CreateRegList(SmallVectorImpl<std::pair<unsigned, unsigned>> &Regs,
                 SMLoc StartLoc, SMLoc EndLoc) {
     assert (Regs.size() > 0 && "RegList contains no registers?");
     KindTy Kind = k_RegisterList;
@@ -2617,7 +2605,7 @@ public:
     // Sort based on the register encoding values.
     array_pod_sort(Regs.begin(), Regs.end());
 
-    ARMOperand *Op = new ARMOperand(Kind);
+    auto Op = make_unique<ARMOperand>(Kind);
     for (SmallVectorImpl<std::pair<unsigned, unsigned> >::const_iterator
            I = Regs.begin(), E = Regs.end(); I != E; ++I)
       Op->Registers.push_back(I->second);
@@ -2626,9 +2614,11 @@ public:
     return Op;
   }
 
-  static ARMOperand *CreateVectorList(unsigned RegNum, unsigned Count,
-                                      bool isDoubleSpaced, SMLoc S, SMLoc E) {
-    ARMOperand *Op = new ARMOperand(k_VectorList);
+  static std::unique_ptr<ARMOperand> CreateVectorList(unsigned RegNum,
+                                                      unsigned Count,
+                                                      bool isDoubleSpaced,
+                                                      SMLoc S, SMLoc E) {
+    auto Op = make_unique<ARMOperand>(k_VectorList);
     Op->VectorList.RegNum = RegNum;
     Op->VectorList.Count = Count;
     Op->VectorList.isDoubleSpaced = isDoubleSpaced;
@@ -2637,10 +2627,10 @@ public:
     return Op;
   }
 
-  static ARMOperand *CreateVectorListAllLanes(unsigned RegNum, unsigned Count,
-                                              bool isDoubleSpaced,
-                                              SMLoc S, SMLoc E) {
-    ARMOperand *Op = new ARMOperand(k_VectorListAllLanes);
+  static std::unique_ptr<ARMOperand>
+  CreateVectorListAllLanes(unsigned RegNum, unsigned Count, bool isDoubleSpaced,
+                           SMLoc S, SMLoc E) {
+    auto Op = make_unique<ARMOperand>(k_VectorListAllLanes);
     Op->VectorList.RegNum = RegNum;
     Op->VectorList.Count = Count;
     Op->VectorList.isDoubleSpaced = isDoubleSpaced;
@@ -2649,11 +2639,10 @@ public:
     return Op;
   }
 
-  static ARMOperand *CreateVectorListIndexed(unsigned RegNum, unsigned Count,
-                                             unsigned Index,
-                                             bool isDoubleSpaced,
-                                             SMLoc S, SMLoc E) {
-    ARMOperand *Op = new ARMOperand(k_VectorListIndexed);
+  static std::unique_ptr<ARMOperand>
+  CreateVectorListIndexed(unsigned RegNum, unsigned Count, unsigned Index,
+                          bool isDoubleSpaced, SMLoc S, SMLoc E) {
+    auto Op = make_unique<ARMOperand>(k_VectorListIndexed);
     Op->VectorList.RegNum = RegNum;
     Op->VectorList.Count = Count;
     Op->VectorList.LaneIndex = Index;
@@ -2663,33 +2652,30 @@ public:
     return Op;
   }
 
-  static ARMOperand *CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E,
-                                       MCContext &Ctx) {
-    ARMOperand *Op = new ARMOperand(k_VectorIndex);
+  static std::unique_ptr<ARMOperand>
+  CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E, MCContext &Ctx) {
+    auto Op = make_unique<ARMOperand>(k_VectorIndex);
     Op->VectorIndex.Val = Idx;
     Op->StartLoc = S;
     Op->EndLoc = E;
     return Op;
   }
 
-  static ARMOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) {
-    ARMOperand *Op = new ARMOperand(k_Immediate);
+  static std::unique_ptr<ARMOperand> CreateImm(const MCExpr *Val, SMLoc S,
+                                               SMLoc E) {
+    auto Op = make_unique<ARMOperand>(k_Immediate);
     Op->Imm.Val = Val;
     Op->StartLoc = S;
     Op->EndLoc = E;
     return Op;
   }
 
-  static ARMOperand *CreateMem(unsigned BaseRegNum,
-                               const MCConstantExpr *OffsetImm,
-                               unsigned OffsetRegNum,
-                               ARM_AM::ShiftOpc ShiftType,
-                               unsigned ShiftImm,
-                               unsigned Alignment,
-                               bool isNegative,
-                               SMLoc S, SMLoc E,
-                               SMLoc AlignmentLoc = SMLoc()) {
-    ARMOperand *Op = new ARMOperand(k_Memory);
+  static std::unique_ptr<ARMOperand>
+  CreateMem(unsigned BaseRegNum, const MCConstantExpr *OffsetImm,
+            unsigned OffsetRegNum, ARM_AM::ShiftOpc ShiftType,
+            unsigned ShiftImm, unsigned Alignment, bool isNegative, SMLoc S,
+            SMLoc E, SMLoc AlignmentLoc = SMLoc()) {
+    auto Op = make_unique<ARMOperand>(k_Memory);
     Op->Memory.BaseRegNum = BaseRegNum;
     Op->Memory.OffsetImm = OffsetImm;
     Op->Memory.OffsetRegNum = OffsetRegNum;
@@ -2703,11 +2689,10 @@ public:
     return Op;
   }
 
-  static ARMOperand *CreatePostIdxReg(unsigned RegNum, bool isAdd,
-                                      ARM_AM::ShiftOpc ShiftTy,
-                                      unsigned ShiftImm,
-                                      SMLoc S, SMLoc E) {
-    ARMOperand *Op = new ARMOperand(k_PostIndexRegister);
+  static std::unique_ptr<ARMOperand>
+  CreatePostIdxReg(unsigned RegNum, bool isAdd, ARM_AM::ShiftOpc ShiftTy,
+                   unsigned ShiftImm, SMLoc S, SMLoc E) {
+    auto Op = make_unique<ARMOperand>(k_PostIndexRegister);
     Op->PostIdxReg.RegNum = RegNum;
     Op->PostIdxReg.isAdd = isAdd;
     Op->PostIdxReg.ShiftTy = ShiftTy;
@@ -2717,33 +2702,35 @@ public:
     return Op;
   }
 
-  static ARMOperand *CreateMemBarrierOpt(ARM_MB::MemBOpt Opt, SMLoc S) {
-    ARMOperand *Op = new ARMOperand(k_MemBarrierOpt);
+  static std::unique_ptr<ARMOperand> CreateMemBarrierOpt(ARM_MB::MemBOpt Opt,
+                                                         SMLoc S) {
+    auto Op = make_unique<ARMOperand>(k_MemBarrierOpt);
     Op->MBOpt.Val = Opt;
     Op->StartLoc = S;
     Op->EndLoc = S;
     return Op;
   }
 
-  static ARMOperand *CreateInstSyncBarrierOpt(ARM_ISB::InstSyncBOpt Opt,
-                                              SMLoc S) {
-    ARMOperand *Op = new ARMOperand(k_InstSyncBarrierOpt);
+  static std::unique_ptr<ARMOperand>
+  CreateInstSyncBarrierOpt(ARM_ISB::InstSyncBOpt Opt, SMLoc S) {
+    auto Op = make_unique<ARMOperand>(k_InstSyncBarrierOpt);
     Op->ISBOpt.Val = Opt;
     Op->StartLoc = S;
     Op->EndLoc = S;
     return Op;
   }
 
-  static ARMOperand *CreateProcIFlags(ARM_PROC::IFlags IFlags, SMLoc S) {
-    ARMOperand *Op = new ARMOperand(k_ProcIFlags);
+  static std::unique_ptr<ARMOperand> CreateProcIFlags(ARM_PROC::IFlags IFlags,
+                                                      SMLoc S) {
+    auto Op = make_unique<ARMOperand>(k_ProcIFlags);
     Op->IFlags.Val = IFlags;
     Op->StartLoc = S;
     Op->EndLoc = S;
     return Op;
   }
 
-  static ARMOperand *CreateMSRMask(unsigned MMask, SMLoc S) {
-    ARMOperand *Op = new ARMOperand(k_MSRMask);
+  static std::unique_ptr<ARMOperand> CreateMSRMask(unsigned MMask, SMLoc S) {
+    auto Op = make_unique<ARMOperand>(k_MSRMask);
     Op->MMask.Val = MMask;
     Op->StartLoc = S;
     Op->EndLoc = S;
@@ -2947,8 +2934,7 @@ int ARMAsmParser::tryParseRegister() {
 // occurs, return -1. An irrecoverable error is one where tokens have been
 // consumed in the process of trying to parse the shifter (i.e., when it is
 // indeed a shifter operand, but malformed).
-int ARMAsmParser::tryParseShiftRegister(
-                               SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+int ARMAsmParser::tryParseShiftRegister(OperandVector &Operands) {
   SMLoc S = Parser.getTok().getLoc();
   const AsmToken &Tok = Parser.getTok();
   if (Tok.isNot(AsmToken::Identifier))
@@ -2972,7 +2958,8 @@ int ARMAsmParser::tryParseShiftRegister(
   // The source register for the shift has already been added to the
   // operand list, so we need to pop it off and combine it into the shifted
   // register operand instead.
-  std::unique_ptr<ARMOperand> PrevOp((ARMOperand*)Operands.pop_back_val());
+  std::unique_ptr<ARMOperand> PrevOp(
+      (ARMOperand *)Operands.pop_back_val().release());
   if (!PrevOp->isReg())
     return Error(PrevOp->getStartLoc(), "shift must be of a register");
   int SrcReg = PrevOp->getReg();
@@ -3049,8 +3036,7 @@ int ARMAsmParser::tryParseShiftRegister(
 ///
 /// TODO this is likely to change to allow different register types and or to
 /// parse for a specific register type.
-bool ARMAsmParser::
-tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+bool ARMAsmParser::tryParseRegisterWithWriteBack(OperandVector &Operands) {
   const AsmToken &RegTok = Parser.getTok();
   int RegNo = tryParseRegister();
   if (RegNo == -1)
@@ -3096,17 +3082,25 @@ tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
 }
 
 /// MatchCoprocessorOperandName - Try to parse an coprocessor related
-/// instruction with a symbolic operand name. Example: "p1", "p7", "c3",
-/// "c5", ...
+/// instruction with a symbolic operand name.
+/// We accept "crN" syntax for GAS compatibility.
+/// <operand-name> ::= <prefix><number>
+/// If CoprocOp is 'c', then:
+///   <prefix> ::= c | cr
+/// If CoprocOp is 'p', then :
+///   <prefix> ::= p
+/// <number> ::= integer in range [0, 15]
 static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) {
   // Use the same layout as the tablegen'erated register name matcher. Ugly,
   // but efficient.
+  if (Name.size() < 2 || Name[0] != CoprocOp)
+    return -1;
+  Name = (Name[1] == 'r') ? Name.drop_front(2) : Name.drop_front();
+
   switch (Name.size()) {
   default: return -1;
-  case 2:
-    if (Name[0] != CoprocOp)
-      return -1;
-    switch (Name[1]) {
+  case 1:
+    switch (Name[0]) {
     default:  return -1;
     case '0': return 0;
     case '1': return 1;
@@ -3119,10 +3113,10 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) {
     case '8': return 8;
     case '9': return 9;
     }
-  case 3:
-    if (Name[0] != CoprocOp || Name[1] != '1')
+  case 2:
+    if (Name[0] != '1')
       return -1;
-    switch (Name[2]) {
+    switch (Name[1]) {
     default:  return -1;
     // p10 and p11 are invalid for coproc instructions (reserved for FP/NEON)
     case '0': return CoprocOp == 'p'? -1: 10;
@@ -3136,8 +3130,8 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) {
 }
 
 /// parseITCondCode - Try to parse a condition code for an IT instruction.
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseITCondCode(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseITCondCode(OperandVector &Operands) {
   SMLoc S = Parser.getTok().getLoc();
   const AsmToken &Tok = Parser.getTok();
   if (!Tok.is(AsmToken::Identifier))
@@ -3173,8 +3167,8 @@ parseITCondCode(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
 /// parseCoprocNumOperand - Try to parse an coprocessor number operand. The
 /// token must be an Identifier when called, and if it is a coprocessor
 /// number, the token is eaten and the operand is added to the operand list.
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseCoprocNumOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseCoprocNumOperand(OperandVector &Operands) {
   SMLoc S = Parser.getTok().getLoc();
   const AsmToken &Tok = Parser.getTok();
   if (Tok.isNot(AsmToken::Identifier))
@@ -3192,8 +3186,8 @@ parseCoprocNumOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
 /// parseCoprocRegOperand - Try to parse an coprocessor register operand. The
 /// token must be an Identifier when called, and if it is a coprocessor
 /// number, the token is eaten and the operand is added to the operand list.
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseCoprocRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseCoprocRegOperand(OperandVector &Operands) {
   SMLoc S = Parser.getTok().getLoc();
   const AsmToken &Tok = Parser.getTok();
   if (Tok.isNot(AsmToken::Identifier))
@@ -3210,8 +3204,8 @@ parseCoprocRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
 
 /// parseCoprocOptionOperand - Try to parse an coprocessor option operand.
 /// coproc_option : '{' imm0_255 '}'
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseCoprocOptionOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseCoprocOptionOperand(OperandVector &Operands) {
   SMLoc S = Parser.getTok().getLoc();
 
   // If this isn't a '{', this isn't a coprocessor immediate operand.
@@ -3288,8 +3282,7 @@ static unsigned getDRegFromQReg(unsigned QReg) {
 }
 
 /// Parse a register list.
-bool ARMAsmParser::
-parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+bool ARMAsmParser::parseRegisterList(OperandVector &Operands) {
   assert(Parser.getTok().is(AsmToken::LCurly) &&
          "Token is not a Left Curly Brace");
   SMLoc S = Parser.getTok().getLoc();
@@ -3470,8 +3463,8 @@ parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index, SMLoc &EndLoc) {
 }
 
 // parse a vector register list
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseVectorList(OperandVector &Operands) {
   VectorLaneTy LaneKind;
   unsigned LaneIndex;
   SMLoc S = Parser.getTok().getLoc();
@@ -3721,8 +3714,8 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
 }
 
 /// parseMemBarrierOptOperand - Try to parse DSB/DMB data barrier options.
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseMemBarrierOptOperand(OperandVector &Operands) {
   SMLoc S = Parser.getTok().getLoc();
   const AsmToken &Tok = Parser.getTok();
   unsigned Opt;
@@ -3792,8 +3785,8 @@ parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
 }
 
 /// parseInstSyncBarrierOptOperand - Try to parse ISB inst sync barrier options.
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseInstSyncBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseInstSyncBarrierOptOperand(OperandVector &Operands) {
   SMLoc S = Parser.getTok().getLoc();
   const AsmToken &Tok = Parser.getTok();
   unsigned Opt;
@@ -3843,8 +3836,8 @@ parseInstSyncBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
 
 
 /// parseProcIFlagsOperand - Try to parse iflags from CPS instruction.
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseProcIFlagsOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseProcIFlagsOperand(OperandVector &Operands) {
   SMLoc S = Parser.getTok().getLoc();
   const AsmToken &Tok = Parser.getTok();
   if (!Tok.is(AsmToken::Identifier)) 
@@ -3877,8 +3870,8 @@ parseProcIFlagsOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
 }
 
 /// parseMSRMaskOperand - Try to parse mask flags from MSR instruction.
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) {
   SMLoc S = Parser.getTok().getLoc();
   const AsmToken &Tok = Parser.getTok();
   if (!Tok.is(AsmToken::Identifier))
@@ -4005,9 +3998,9 @@ parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   return MatchOperand_Success;
 }
 
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op,
-            int Low, int High) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parsePKHImm(OperandVector &Operands, StringRef Op, int Low,
+                          int High) {
   const AsmToken &Tok = Parser.getTok();
   if (Tok.isNot(AsmToken::Identifier)) {
     Error(Parser.getTok().getLoc(), Op + " operand expected.");
@@ -4053,8 +4046,8 @@ parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op,
   return MatchOperand_Success;
 }
 
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseSetEndImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseSetEndImm(OperandVector &Operands) {
   const AsmToken &Tok = Parser.getTok();
   SMLoc S = Tok.getLoc();
   if (Tok.isNot(AsmToken::Identifier)) {
@@ -4082,8 +4075,8 @@ parseSetEndImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
 ///     lsl #n  'n' in [0,31]
 ///     asr #n  'n' in [1,32]
 ///             n == 32 encoded as n == 0.
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseShifterImm(OperandVector &Operands) {
   const AsmToken &Tok = Parser.getTok();
   SMLoc S = Tok.getLoc();
   if (Tok.isNot(AsmToken::Identifier)) {
@@ -4152,8 +4145,8 @@ parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
 /// parseRotImm - Parse the shifter immediate operand for SXTB/UXTB family
 /// of instructions. Legal values are:
 ///     ror #n  'n' in {0, 8, 16, 24}
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseRotImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseRotImm(OperandVector &Operands) {
   const AsmToken &Tok = Parser.getTok();
   SMLoc S = Tok.getLoc();
   if (Tok.isNot(AsmToken::Identifier))
@@ -4198,8 +4191,8 @@ parseRotImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   return MatchOperand_Success;
 }
 
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseBitfield(OperandVector &Operands) {
   SMLoc S = Parser.getTok().getLoc();
   // The bitfield descriptor is really two operands, the LSB and the width.
   if (Parser.getTok().isNot(AsmToken::Hash) &&
@@ -4266,8 +4259,8 @@ parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   return MatchOperand_Success;
 }
 
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parsePostIdxReg(OperandVector &Operands) {
   // Check for a post-index addressing register operand. Specifically:
   // postidx_reg := '+' register {, shift}
   //              | '-' register {, shift}
@@ -4315,8 +4308,8 @@ parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   return MatchOperand_Success;
 }
 
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseAM3Offset(OperandVector &Operands) {
   // Check for a post-index addressing register operand. Specifically:
   // am3offset := '+' register
   //              | '-' register
@@ -4388,26 +4381,24 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
 /// Convert parsed operands to MCInst.  Needed here because this instruction
 /// only has two register operands, but multiplication is commutative so
 /// assemblers should accept both "mul rD, rN, rD" and "mul rD, rD, rN".
-void ARMAsmParser::
-cvtThumbMultiply(MCInst &Inst,
-           const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
-  ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1);
-  ((ARMOperand*)Operands[1])->addCCOutOperands(Inst, 1);
+void ARMAsmParser::cvtThumbMultiply(MCInst &Inst,
+                                    const OperandVector &Operands) {
+  ((ARMOperand &)*Operands[3]).addRegOperands(Inst, 1);
+  ((ARMOperand &)*Operands[1]).addCCOutOperands(Inst, 1);
   // If we have a three-operand form, make sure to set Rn to be the operand
   // that isn't the same as Rd.
   unsigned RegOp = 4;
   if (Operands.size() == 6 &&
-      ((ARMOperand*)Operands[4])->getReg() ==
-        ((ARMOperand*)Operands[3])->getReg())
+      ((ARMOperand &)*Operands[4]).getReg() ==
+          ((ARMOperand &)*Operands[3]).getReg())
     RegOp = 5;
-  ((ARMOperand*)Operands[RegOp])->addRegOperands(Inst, 1);
+  ((ARMOperand &)*Operands[RegOp]).addRegOperands(Inst, 1);
   Inst.addOperand(Inst.getOperand(0));
-  ((ARMOperand*)Operands[2])->addCondCodeOperands(Inst, 2);
+  ((ARMOperand &)*Operands[2]).addCondCodeOperands(Inst, 2);
 }
 
-void ARMAsmParser::
-cvtThumbBranches(MCInst &Inst,
-           const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+void ARMAsmParser::cvtThumbBranches(MCInst &Inst,
+                                    const OperandVector &Operands) {
   int CondOp = -1, ImmOp = -1;
   switch(Inst.getOpcode()) {
     case ARM::tB:
@@ -4430,7 +4421,7 @@ cvtThumbBranches(MCInst &Inst,
   } else {
     // outside IT blocks we can only have unconditional branches with AL
     // condition code or conditional branches with non-AL condition code
-    unsigned Cond = static_cast<ARMOperand*>(Operands[CondOp])->getCondCode();
+    unsigned Cond = static_cast<ARMOperand &>(*Operands[CondOp]).getCondCode();
     switch(Inst.getOpcode()) {
       case ARM::tB:
       case ARM::tBcc: 
@@ -4447,27 +4438,26 @@ cvtThumbBranches(MCInst &Inst,
   switch(Inst.getOpcode()) {
     // classify tB as either t2B or t1B based on range of immediate operand
     case ARM::tB: {
-      ARMOperand* op = static_cast<ARMOperand*>(Operands[ImmOp]);
-      if(!op->isSignedOffset<11, 1>() && isThumbTwo()) 
+      ARMOperand &op = static_cast<ARMOperand &>(*Operands[ImmOp]);
+      if (!op.isSignedOffset<11, 1>() && isThumbTwo())
         Inst.setOpcode(ARM::t2B);
       break;
     }
     // classify tBcc as either t2Bcc or t1Bcc based on range of immediate operand
     case ARM::tBcc: {
-      ARMOperand* op = static_cast<ARMOperand*>(Operands[ImmOp]);
-      if(!op->isSignedOffset<8, 1>() && isThumbTwo())
+      ARMOperand &op = static_cast<ARMOperand &>(*Operands[ImmOp]);
+      if (!op.isSignedOffset<8, 1>() && isThumbTwo())
         Inst.setOpcode(ARM::t2Bcc);
       break;
     }
   }
-  ((ARMOperand*)Operands[ImmOp])->addImmOperands(Inst, 1);
-  ((ARMOperand*)Operands[CondOp])->addCondCodeOperands(Inst, 2);
+  ((ARMOperand &)*Operands[ImmOp]).addImmOperands(Inst, 1);
+  ((ARMOperand &)*Operands[CondOp]).addCondCodeOperands(Inst, 2);
 }
 
 /// Parse an ARM memory expression, return false if successful else return true
 /// or an error.  The first token must be a '[' when called.
-bool ARMAsmParser::
-parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+bool ARMAsmParser::parseMemory(OperandVector &Operands) {
   SMLoc S, E;
   assert(Parser.getTok().is(AsmToken::LBrac) &&
          "Token is not a Left Bracket");
@@ -4717,8 +4707,8 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St,
 }
 
 /// parseFPImm - A floating point immediate expression operand.
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseFPImm(OperandVector &Operands) {
   // Anything that can accept a floating point constant as an operand
   // needs to go through here, as the regular parseExpression is
   // integer only.
@@ -4744,12 +4734,12 @@ parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   // integer constant. Make sure we don't try to parse an FPImm
   // for these:
   // vmov.i{8|16|32|64} <dreg|qreg>, #imm
-  ARMOperand *TyOp = static_cast<ARMOperand*>(Operands[2]);
-  bool isVmovf = TyOp->isToken() && (TyOp->getToken() == ".f32" ||
-                                     TyOp->getToken() == ".f64");
-  ARMOperand *Mnemonic = static_cast<ARMOperand*>(Operands[0]);
-  bool isFconst = Mnemonic->isToken() && (Mnemonic->getToken() == "fconstd" ||
-                                          Mnemonic->getToken() == "fconsts");
+  ARMOperand &TyOp = static_cast<ARMOperand &>(*Operands[2]);
+  bool isVmovf = TyOp.isToken() &&
+                 (TyOp.getToken() == ".f32" || TyOp.getToken() == ".f64");
+  ARMOperand &Mnemonic = static_cast<ARMOperand &>(*Operands[0]);
+  bool isFconst = Mnemonic.isToken() && (Mnemonic.getToken() == "fconstd" ||
+                                         Mnemonic.getToken() == "fconsts");
   if (!(isVmovf || isFconst))
     return MatchOperand_NoMatch;
 
@@ -4798,8 +4788,7 @@ parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
 
 /// Parse a arm instruction operand.  For now this parses the operand regardless
 /// of the mnemonic.
-bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
-                                StringRef Mnemonic) {
+bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
   SMLoc S, E;
 
   // Check if the current operand has a custom associated parser, if so, try to
@@ -5125,7 +5114,7 @@ getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
 }
 
 bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
-                               SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+                                          OperandVector &Operands) {
   // FIXME: This is all horribly hacky. We really need a better way to deal
   // with optional operands like this in the matcher table.
 
@@ -5138,17 +5127,17 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
   // conditionally adding the cc_out in the first place because we need
   // to check the type of the parsed immediate operand.
   if (Mnemonic == "mov" && Operands.size() > 4 && !isThumb() &&
-      !static_cast<ARMOperand*>(Operands[4])->isARMSOImm() &&
-      static_cast<ARMOperand*>(Operands[4])->isImm0_65535Expr() &&
-      static_cast<ARMOperand*>(Operands[1])->getReg() == 0)
+      !static_cast<ARMOperand &>(*Operands[4]).isARMSOImm() &&
+      static_cast<ARMOperand &>(*Operands[4]).isImm0_65535Expr() &&
+      static_cast<ARMOperand &>(*Operands[1]).getReg() == 0)
     return true;
 
   // Register-register 'add' for thumb does not have a cc_out operand
   // when there are only two register operands.
   if (isThumb() && Mnemonic == "add" && Operands.size() == 5 &&
-      static_cast<ARMOperand*>(Operands[3])->isReg() &&
-      static_cast<ARMOperand*>(Operands[4])->isReg() &&
-      static_cast<ARMOperand*>(Operands[1])->getReg() == 0)
+      static_cast<ARMOperand &>(*Operands[3]).isReg() &&
+      static_cast<ARMOperand &>(*Operands[4]).isReg() &&
+      static_cast<ARMOperand &>(*Operands[1]).getReg() == 0)
     return true;
   // Register-register 'add' for thumb does not have a cc_out operand
   // when it's an ADD Rdm, SP, {Rdm|#imm0_255} instruction. We do
@@ -5156,13 +5145,12 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
   // that can handle a different range and has a cc_out operand.
   if (((isThumb() && Mnemonic == "add") ||
        (isThumbTwo() && Mnemonic == "sub")) &&
-      Operands.size() == 6 &&
-      static_cast<ARMOperand*>(Operands[3])->isReg() &&
-      static_cast<ARMOperand*>(Operands[4])->isReg() &&
-      static_cast<ARMOperand*>(Operands[4])->getReg() == ARM::SP &&
-      static_cast<ARMOperand*>(Operands[1])->getReg() == 0 &&
-      ((Mnemonic == "add" &&static_cast<ARMOperand*>(Operands[5])->isReg()) ||
-       static_cast<ARMOperand*>(Operands[5])->isImm0_1020s4()))
+      Operands.size() == 6 && static_cast<ARMOperand &>(*Operands[3]).isReg() &&
+      static_cast<ARMOperand &>(*Operands[4]).isReg() &&
+      static_cast<ARMOperand &>(*Operands[4]).getReg() == ARM::SP &&
+      static_cast<ARMOperand &>(*Operands[1]).getReg() == 0 &&
+      ((Mnemonic == "add" && static_cast<ARMOperand &>(*Operands[5]).isReg()) ||
+       static_cast<ARMOperand &>(*Operands[5]).isImm0_1020s4()))
     return true;
   // For Thumb2, add/sub immediate does not have a cc_out operand for the
   // imm0_4095 variant. That's the least-preferred variant when
@@ -5170,23 +5158,22 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
   // should remove the cc_out operand, we have to explicitly check that
   // it's not one of the other variants. Ugh.
   if (isThumbTwo() && (Mnemonic == "add" || Mnemonic == "sub") &&
-      Operands.size() == 6 &&
-      static_cast<ARMOperand*>(Operands[3])->isReg() &&
-      static_cast<ARMOperand*>(Operands[4])->isReg() &&
-      static_cast<ARMOperand*>(Operands[5])->isImm()) {
+      Operands.size() == 6 && static_cast<ARMOperand &>(*Operands[3]).isReg() &&
+      static_cast<ARMOperand &>(*Operands[4]).isReg() &&
+      static_cast<ARMOperand &>(*Operands[5]).isImm()) {
     // Nest conditions rather than one big 'if' statement for readability.
     //
     // If both registers are low, we're in an IT block, and the immediate is
     // in range, we should use encoding T1 instead, which has a cc_out.
     if (inITBlock() &&
-        isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) &&
-        isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()) &&
-        static_cast<ARMOperand*>(Operands[5])->isImm0_7())
+        isARMLowRegister(static_cast<ARMOperand &>(*Operands[3]).getReg()) &&
+        isARMLowRegister(static_cast<ARMOperand &>(*Operands[4]).getReg()) &&
+        static_cast<ARMOperand &>(*Operands[5]).isImm0_7())
       return false;
     // Check against T3. If the second register is the PC, this is an
     // alternate form of ADR, which uses encoding T4, so check for that too.
-    if (static_cast<ARMOperand*>(Operands[4])->getReg() != ARM::PC &&
-        static_cast<ARMOperand*>(Operands[5])->isT2SOImm())
+    if (static_cast<ARMOperand &>(*Operands[4]).getReg() != ARM::PC &&
+        static_cast<ARMOperand &>(*Operands[5]).isT2SOImm())
       return false;
 
     // Otherwise, we use encoding T4, which does not have a cc_out
@@ -5198,35 +5185,34 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
   // if we have a "mul" mnemonic in Thumb mode, check if we'll be able to
   // use the 16-bit encoding or not.
   if (isThumbTwo() && Mnemonic == "mul" && Operands.size() == 6 &&
-      static_cast<ARMOperand*>(Operands[1])->getReg() == 0 &&
-      static_cast<ARMOperand*>(Operands[3])->isReg() &&
-      static_cast<ARMOperand*>(Operands[4])->isReg() &&
-      static_cast<ARMOperand*>(Operands[5])->isReg() &&
+      static_cast<ARMOperand &>(*Operands[1]).getReg() == 0 &&
+      static_cast<ARMOperand &>(*Operands[3]).isReg() &&
+      static_cast<ARMOperand &>(*Operands[4]).isReg() &&
+      static_cast<ARMOperand &>(*Operands[5]).isReg() &&
       // If the registers aren't low regs, the destination reg isn't the
       // same as one of the source regs, or the cc_out operand is zero
       // outside of an IT block, we have to use the 32-bit encoding, so
       // remove the cc_out operand.
-      (!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) ||
-       !isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()) ||
-       !isARMLowRegister(static_cast<ARMOperand*>(Operands[5])->getReg()) ||
-       !inITBlock() ||
-       (static_cast<ARMOperand*>(Operands[3])->getReg() !=
-        static_cast<ARMOperand*>(Operands[5])->getReg() &&
-        static_cast<ARMOperand*>(Operands[3])->getReg() !=
-        static_cast<ARMOperand*>(Operands[4])->getReg())))
+      (!isARMLowRegister(static_cast<ARMOperand &>(*Operands[3]).getReg()) ||
+       !isARMLowRegister(static_cast<ARMOperand &>(*Operands[4]).getReg()) ||
+       !isARMLowRegister(static_cast<ARMOperand &>(*Operands[5]).getReg()) ||
+       !inITBlock() || (static_cast<ARMOperand &>(*Operands[3]).getReg() !=
+                            static_cast<ARMOperand &>(*Operands[5]).getReg() &&
+                        static_cast<ARMOperand &>(*Operands[3]).getReg() !=
+                            static_cast<ARMOperand &>(*Operands[4]).getReg())))
     return true;
 
   // Also check the 'mul' syntax variant that doesn't specify an explicit
   // destination register.
   if (isThumbTwo() && Mnemonic == "mul" && Operands.size() == 5 &&
-      static_cast<ARMOperand*>(Operands[1])->getReg() == 0 &&
-      static_cast<ARMOperand*>(Operands[3])->isReg() &&
-      static_cast<ARMOperand*>(Operands[4])->isReg() &&
+      static_cast<ARMOperand &>(*Operands[1]).getReg() == 0 &&
+      static_cast<ARMOperand &>(*Operands[3]).isReg() &&
+      static_cast<ARMOperand &>(*Operands[4]).isReg() &&
       // If the registers aren't low regs  or the cc_out operand is zero
       // outside of an IT block, we have to use the 32-bit encoding, so
       // remove the cc_out operand.
-      (!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) ||
-       !isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()) ||
+      (!isARMLowRegister(static_cast<ARMOperand &>(*Operands[3]).getReg()) ||
+       !isARMLowRegister(static_cast<ARMOperand &>(*Operands[4]).getReg()) ||
        !inITBlock()))
     return true;
 
@@ -5239,32 +5225,32 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
   // anyway.
   if (isThumb() && (Mnemonic == "add" || Mnemonic == "sub") &&
       (Operands.size() == 5 || Operands.size() == 6) &&
-      static_cast<ARMOperand*>(Operands[3])->isReg() &&
-      static_cast<ARMOperand*>(Operands[3])->getReg() == ARM::SP &&
-      static_cast<ARMOperand*>(Operands[1])->getReg() == 0 &&
-      (static_cast<ARMOperand*>(Operands[4])->isImm() ||
+      static_cast<ARMOperand &>(*Operands[3]).isReg() &&
+      static_cast<ARMOperand &>(*Operands[3]).getReg() == ARM::SP &&
+      static_cast<ARMOperand &>(*Operands[1]).getReg() == 0 &&
+      (static_cast<ARMOperand &>(*Operands[4]).isImm() ||
        (Operands.size() == 6 &&
-        static_cast<ARMOperand*>(Operands[5])->isImm())))
+        static_cast<ARMOperand &>(*Operands[5]).isImm())))
     return true;
 
   return false;
 }
 
-bool ARMAsmParser::shouldOmitPredicateOperand(
-    StringRef Mnemonic, SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+bool ARMAsmParser::shouldOmitPredicateOperand(StringRef Mnemonic,
+                                              OperandVector &Operands) {
   // VRINT{Z, R, X} have a predicate operand in VFP, but not in NEON
   unsigned RegIdx = 3;
   if ((Mnemonic == "vrintz" || Mnemonic == "vrintx" || Mnemonic == "vrintr") &&
-      static_cast<ARMOperand *>(Operands[2])->getToken() == ".f32") {
-    if (static_cast<ARMOperand *>(Operands[3])->isToken() &&
-        static_cast<ARMOperand *>(Operands[3])->getToken() == ".f32")
+      static_cast<ARMOperand &>(*Operands[2]).getToken() == ".f32") {
+    if (static_cast<ARMOperand &>(*Operands[3]).isToken() &&
+        static_cast<ARMOperand &>(*Operands[3]).getToken() == ".f32")
       RegIdx = 4;
 
-    if (static_cast<ARMOperand *>(Operands[RegIdx])->isReg() &&
-        (ARMMCRegisterClasses[ARM::DPRRegClassID]
-             .contains(static_cast<ARMOperand *>(Operands[RegIdx])->getReg()) ||
-         ARMMCRegisterClasses[ARM::QPRRegClassID]
-             .contains(static_cast<ARMOperand *>(Operands[RegIdx])->getReg())))
+    if (static_cast<ARMOperand &>(*Operands[RegIdx]).isReg() &&
+        (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(
+             static_cast<ARMOperand &>(*Operands[RegIdx]).getReg()) ||
+         ARMMCRegisterClasses[ARM::QPRRegClassID].contains(
+             static_cast<ARMOperand &>(*Operands[RegIdx]).getReg())))
       return true;
   }
   return false;
@@ -5309,8 +5295,7 @@ static bool RequiresVFPRegListValidation(StringRef Inst,
 
 /// Parse an arm instruction mnemonic followed by its operands.
 bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
-                                    SMLoc NameLoc,
-                               SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+                                    SMLoc NameLoc, OperandVector &Operands) {
   // FIXME: Can this be done via tablegen in some fashion?
   bool RequireVFPRegisterListCheck;
   bool AcceptSinglePrecisionOnly;
@@ -5489,12 +5474,12 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
   Parser.Lex(); // Consume the EndOfStatement
 
   if (RequireVFPRegisterListCheck) {
-    ARMOperand *Op = static_cast<ARMOperand*>(Operands.back());
-    if (AcceptSinglePrecisionOnly && !Op->isSPRRegList())
-      return Error(Op->getStartLoc(),
+    ARMOperand &Op = static_cast<ARMOperand &>(*Operands.back());
+    if (AcceptSinglePrecisionOnly && !Op.isSPRRegList())
+      return Error(Op.getStartLoc(),
                    "VFP/Neon single precision register expected");
-    if (AcceptDoublePrecisionOnly && !Op->isDPRRegList())
-      return Error(Op->getStartLoc(),
+    if (AcceptDoublePrecisionOnly && !Op.isDPRRegList())
+      return Error(Op.getStartLoc(),
                    "VFP/Neon double precision register expected");
   }
 
@@ -5505,20 +5490,14 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
   // try to remove a cc_out operand that was explicitly set on the the
   // mnemonic, of course (CarrySetting == true). Reason number #317 the
   // table driven matcher doesn't fit well with the ARM instruction set.
-  if (!CarrySetting && shouldOmitCCOutOperand(Mnemonic, Operands)) {
-    ARMOperand *Op = static_cast<ARMOperand*>(Operands[1]);
+  if (!CarrySetting && shouldOmitCCOutOperand(Mnemonic, Operands))
     Operands.erase(Operands.begin() + 1);
-    delete Op;
-  }
 
   // Some instructions have the same mnemonic, but don't always
   // have a predicate. Distinguish them here and delete the
   // predicate if needed.
-  if (shouldOmitPredicateOperand(Mnemonic, Operands)) {
-    ARMOperand *Op = static_cast<ARMOperand*>(Operands[1]);
+  if (shouldOmitPredicateOperand(Mnemonic, Operands))
     Operands.erase(Operands.begin() + 1);
-    delete Op;
-  }
 
   // ARM mode 'blx' need special handling, as the register operand version
   // is predicable, but the label operand version is not. So, we can't rely
@@ -5526,11 +5505,8 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
   // a k_CondCode operand in the list. If we're trying to match the label
   // version, remove the k_CondCode operand here.
   if (!isThumb() && Mnemonic == "blx" && Operands.size() == 3 &&
-      static_cast<ARMOperand*>(Operands[2])->isImm()) {
-    ARMOperand *Op = static_cast<ARMOperand*>(Operands[1]);
+      static_cast<ARMOperand &>(*Operands[2]).isImm())
     Operands.erase(Operands.begin() + 1);
-    delete Op;
-  }
 
   // Adjust operands of ldrexd/strexd to MCK_GPRPair.
   // ldrexd/strexd require even/odd GPR pair. To enforce this constraint,
@@ -5543,53 +5519,50 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
        Mnemonic == "stlexd")) {
     bool isLoad = (Mnemonic == "ldrexd" || Mnemonic == "ldaexd");
     unsigned Idx = isLoad ? 2 : 3;
-    ARMOperand* Op1 = static_cast<ARMOperand*>(Operands[Idx]);
-    ARMOperand* Op2 = static_cast<ARMOperand*>(Operands[Idx+1]);
+    ARMOperand &Op1 = static_cast<ARMOperand &>(*Operands[Idx]);
+    ARMOperand &Op2 = static_cast<ARMOperand &>(*Operands[Idx + 1]);
 
     const MCRegisterClass& MRC = MRI->getRegClass(ARM::GPRRegClassID);
     // Adjust only if Op1 and Op2 are GPRs.
-    if (Op1->isReg() && Op2->isReg() && MRC.contains(Op1->getReg()) &&
-        MRC.contains(Op2->getReg())) {
-      unsigned Reg1 = Op1->getReg();
-      unsigned Reg2 = Op2->getReg();
+    if (Op1.isReg() && Op2.isReg() && MRC.contains(Op1.getReg()) &&
+        MRC.contains(Op2.getReg())) {
+      unsigned Reg1 = Op1.getReg();
+      unsigned Reg2 = Op2.getReg();
       unsigned Rt = MRI->getEncodingValue(Reg1);
       unsigned Rt2 = MRI->getEncodingValue(Reg2);
 
       // Rt2 must be Rt + 1 and Rt must be even.
       if (Rt + 1 != Rt2 || (Rt & 1)) {
-        Error(Op2->getStartLoc(), isLoad ?
-            "destination operands must be sequential" :
-            "source operands must be sequential");
+        Error(Op2.getStartLoc(), isLoad
+                                     ? "destination operands must be sequential"
+                                     : "source operands must be sequential");
         return true;
       }
       unsigned NewReg = MRI->getMatchingSuperReg(Reg1, ARM::gsub_0,
           &(MRI->getRegClass(ARM::GPRPairRegClassID)));
-      Operands.erase(Operands.begin() + Idx, Operands.begin() + Idx + 2);
-      Operands.insert(Operands.begin() + Idx, ARMOperand::CreateReg(
-            NewReg, Op1->getStartLoc(), Op2->getEndLoc()));
-      delete Op1;
-      delete Op2;
+      Operands[Idx] =
+          ARMOperand::CreateReg(NewReg, Op1.getStartLoc(), Op2.getEndLoc());
+      Operands.erase(Operands.begin() + Idx + 1);
     }
   }
 
   // GNU Assembler extension (compatibility)
   if ((Mnemonic == "ldrd" || Mnemonic == "strd")) {
-    ARMOperand *Op2 = static_cast<ARMOperand *>(Operands[2]);
-    ARMOperand *Op3 = static_cast<ARMOperand *>(Operands[3]);
-    if (Op3->isMem()) {
-      assert(Op2->isReg() && "expected register argument");
+    ARMOperand &Op2 = static_cast<ARMOperand &>(*Operands[2]);
+    ARMOperand &Op3 = static_cast<ARMOperand &>(*Operands[3]);
+    if (Op3.isMem()) {
+      assert(Op2.isReg() && "expected register argument");
 
       unsigned SuperReg = MRI->getMatchingSuperReg(
-          Op2->getReg(), ARM::gsub_0, &MRI->getRegClass(ARM::GPRPairRegClassID));
+          Op2.getReg(), ARM::gsub_0, &MRI->getRegClass(ARM::GPRPairRegClassID));
 
       assert(SuperReg && "expected register pair");
 
       unsigned PairedReg = MRI->getSubReg(SuperReg, ARM::gsub_1);
 
-      Operands.insert(Operands.begin() + 3,
-                      ARMOperand::CreateReg(PairedReg,
-                                            Op2->getStartLoc(),
-                                            Op2->getEndLoc()));
+      Operands.insert(
+          Operands.begin() + 3,
+          ARMOperand::CreateReg(PairedReg, Op2.getStartLoc(), Op2.getEndLoc()));
     }
   }
 
@@ -5599,19 +5572,13 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
   // so the Mnemonic is the original name "subs" and delete the predicate
   // operand so it will match the table entry.
   if (isThumbTwo() && Mnemonic == "sub" && Operands.size() == 6 &&
-      static_cast<ARMOperand*>(Operands[3])->isReg() &&
-      static_cast<ARMOperand*>(Operands[3])->getReg() == ARM::PC &&
-      static_cast<ARMOperand*>(Operands[4])->isReg() &&
-      static_cast<ARMOperand*>(Operands[4])->getReg() == ARM::LR &&
-      static_cast<ARMOperand*>(Operands[5])->isImm()) {
-    ARMOperand *Op0 = static_cast<ARMOperand*>(Operands[0]);
-    Operands.erase(Operands.begin());
-    delete Op0;
-    Operands.insert(Operands.begin(), ARMOperand::CreateToken(Name, NameLoc));
-
-    ARMOperand *Op1 = static_cast<ARMOperand*>(Operands[1]);
+      static_cast<ARMOperand &>(*Operands[3]).isReg() &&
+      static_cast<ARMOperand &>(*Operands[3]).getReg() == ARM::PC &&
+      static_cast<ARMOperand &>(*Operands[4]).isReg() &&
+      static_cast<ARMOperand &>(*Operands[4]).getReg() == ARM::LR &&
+      static_cast<ARMOperand &>(*Operands[5]).isImm()) {
+    Operands.front() = ARMOperand::CreateToken(Name, NameLoc);
     Operands.erase(Operands.begin() + 1);
-    delete Op1;
   }
   return false;
 }
@@ -5657,9 +5624,8 @@ static bool instIsBreakpoint(const MCInst &Inst) {
 }
 
 // FIXME: We would really like to be able to tablegen'erate this.
-bool ARMAsmParser::
-validateInstruction(MCInst &Inst,
-                    const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+bool ARMAsmParser::validateInstruction(MCInst &Inst,
+                                       const OperandVector &Operands) {
   const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
   SMLoc Loc = Operands[0]->getStartLoc();
 
@@ -5682,7 +5648,7 @@ validateInstruction(MCInst &Inst,
       // Find the condition code Operand to get its SMLoc information.
       SMLoc CondLoc;
       for (unsigned I = 1; I < Operands.size(); ++I)
-        if (static_cast<ARMOperand*>(Operands[I])->isCondCode())
+        if (static_cast<ARMOperand &>(*Operands[I]).isCondCode())
           CondLoc = Operands[I]->getStartLoc();
       return Error(CondLoc, "incorrect condition in IT block; got '" +
                    StringRef(ARMCondCodeToString(ARMCC::CondCodes(Cond))) +
@@ -5782,8 +5748,8 @@ validateInstruction(MCInst &Inst,
     // in the register list.
     unsigned Rn = Inst.getOperand(0).getReg();
     bool HasWritebackToken =
-      (static_cast<ARMOperand*>(Operands[3])->isToken() &&
-       static_cast<ARMOperand*>(Operands[3])->getToken() == "!");
+        (static_cast<ARMOperand &>(*Operands[3]).isToken() &&
+         static_cast<ARMOperand &>(*Operands[3]).getToken() == "!");
     bool ListContainsBase;
     if (checkLowRegisterList(Inst, 3, Rn, 0, ListContainsBase) && !isThumbTwo())
       return Error(Operands[3 + HasWritebackToken]->getStartLoc(),
@@ -5843,11 +5809,10 @@ validateInstruction(MCInst &Inst,
     // this first statement is always true for the new Inst.  Essentially, the
     // destination is unconditionally copied into the second source operand
     // without checking to see if it matches what we actually parsed.
-    if (Operands.size() == 6 &&
-        (((ARMOperand*)Operands[3])->getReg() !=
-         ((ARMOperand*)Operands[5])->getReg()) &&
-        (((ARMOperand*)Operands[3])->getReg() !=
-         ((ARMOperand*)Operands[4])->getReg())) {
+    if (Operands.size() == 6 && (((ARMOperand &)*Operands[3]).getReg() !=
+                                 ((ARMOperand &)*Operands[5]).getReg()) &&
+        (((ARMOperand &)*Operands[3]).getReg() !=
+         ((ARMOperand &)*Operands[4]).getReg())) {
       return Error(Operands[3]->getStartLoc(),
                    "destination register must match source register");
     }
@@ -5900,23 +5865,23 @@ validateInstruction(MCInst &Inst,
   }
   // Final range checking for Thumb unconditional branch instructions.
   case ARM::tB:
-    if (!(static_cast<ARMOperand*>(Operands[2]))->isSignedOffset<11, 1>())
+    if (!(static_cast<ARMOperand &>(*Operands[2])).isSignedOffset<11, 1>())
       return Error(Operands[2]->getStartLoc(), "branch target out of range");
     break;
   case ARM::t2B: {
     int op = (Operands[2]->isImm()) ? 2 : 3;
-    if (!(static_cast<ARMOperand*>(Operands[op]))->isSignedOffset<24, 1>())
+    if (!static_cast<ARMOperand &>(*Operands[op]).isSignedOffset<24, 1>())
       return Error(Operands[op]->getStartLoc(), "branch target out of range");
     break;
   }
   // Final range checking for Thumb conditional branch instructions.
   case ARM::tBcc:
-    if (!(static_cast<ARMOperand*>(Operands[2]))->isSignedOffset<8, 1>())
+    if (!static_cast<ARMOperand &>(*Operands[2]).isSignedOffset<8, 1>())
       return Error(Operands[2]->getStartLoc(), "branch target out of range");
     break;
   case ARM::t2Bcc: {
     int Op = (Operands[2]->isImm()) ? 2 : 3;
-    if (!(static_cast<ARMOperand*>(Operands[Op]))->isSignedOffset<20, 1>())
+    if (!static_cast<ARMOperand &>(*Operands[Op]).isSignedOffset<20, 1>())
       return Error(Operands[Op]->getStartLoc(), "branch target out of range");
     break;
   }
@@ -5931,19 +5896,19 @@ validateInstruction(MCInst &Inst,
     // lead to bugs that are difficult to find since this is an easy mistake
     // to make.
     int i = (Operands[3]->isImm()) ? 3 : 4;
-    ARMOperand *Op = static_cast<ARMOperand*>(Operands[i]);
-    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm());
+    ARMOperand &Op = static_cast<ARMOperand &>(*Operands[i]);
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op.getImm());
     if (CE) break;
-    const MCExpr *E = dyn_cast<MCExpr>(Op->getImm());
+    const MCExpr *E = dyn_cast<MCExpr>(Op.getImm());
     if (!E) break;
     const ARMMCExpr *ARM16Expr = dyn_cast<ARMMCExpr>(E);
     if (!ARM16Expr || (ARM16Expr->getKind() != ARMMCExpr::VK_ARM_HI16 &&
-                       ARM16Expr->getKind() != ARMMCExpr::VK_ARM_LO16)) {
-      return Error(Op->getStartLoc(),
-	     "immediate expression for mov requires :lower16: or :upper16");
-      break;
-    }
-    }
+                       ARM16Expr->getKind() != ARMMCExpr::VK_ARM_LO16))
+      return Error(
+          Op.getStartLoc(),
+          "immediate expression for mov requires :lower16: or :upper16");
+    break;
+  }
   }
 
   return false;
@@ -6205,9 +6170,8 @@ static unsigned getRealVLDOpcode(unsigned Opc, unsigned &Spacing) {
   }
 }
 
-bool ARMAsmParser::
-processInstruction(MCInst &Inst,
-                   const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+bool ARMAsmParser::processInstruction(MCInst &Inst,
+                                      const OperandVector &Operands) {
   switch (Inst.getOpcode()) {
   // Alias for alternate form of 'ldr{,b}t Rt, [Rn], #imm' instruction.
   case ARM::LDRT_POST:
@@ -6264,8 +6228,8 @@ processInstruction(MCInst &Inst,
     // Select the narrow version if the immediate will fit.
     if (Inst.getOperand(1).getImm() > 0 &&
         Inst.getOperand(1).getImm() <= 0xff &&
-        !(static_cast<ARMOperand*>(Operands[2])->isToken() &&
-         static_cast<ARMOperand*>(Operands[2])->getToken() == ".w"))
+        !(static_cast<ARMOperand &>(*Operands[2]).isToken() &&
+          static_cast<ARMOperand &>(*Operands[2]).getToken() == ".w"))
       Inst.setOpcode(ARM::tLDRpci);
     else
       Inst.setOpcode(ARM::t2LDRpci);
@@ -7355,8 +7319,8 @@ processInstruction(MCInst &Inst,
     if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
         Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() &&
         Inst.getOperand(5).getReg() == (inITBlock() ? 0 : ARM::CPSR) &&
-        !(static_cast<ARMOperand*>(Operands[3])->isToken() &&
-         static_cast<ARMOperand*>(Operands[3])->getToken() == ".w")) {
+        !(static_cast<ARMOperand &>(*Operands[3]).isToken() &&
+          static_cast<ARMOperand &>(*Operands[3]).getToken() == ".w")) {
       unsigned NewOpc;
       switch (Inst.getOpcode()) {
       default: llvm_unreachable("unexpected opcode");
@@ -7559,7 +7523,7 @@ processInstruction(MCInst &Inst,
   case ARM::LDMIA_UPD:
     // If this is a load of a single register via a 'pop', then we should use
     // a post-indexed LDR instruction instead, per the ARM ARM.
-    if (static_cast<ARMOperand*>(Operands[0])->getToken() == "pop" &&
+    if (static_cast<ARMOperand &>(*Operands[0]).getToken() == "pop" &&
         Inst.getNumOperands() == 5) {
       MCInst TmpInst;
       TmpInst.setOpcode(ARM::LDR_POST_IMM);
@@ -7577,7 +7541,7 @@ processInstruction(MCInst &Inst,
   case ARM::STMDB_UPD:
     // If this is a store of a single register via a 'push', then we should use
     // a pre-indexed STR instruction instead, per the ARM ARM.
-    if (static_cast<ARMOperand*>(Operands[0])->getToken() == "push" &&
+    if (static_cast<ARMOperand &>(*Operands[0]).getToken() == "push" &&
         Inst.getNumOperands() == 5) {
       MCInst TmpInst;
       TmpInst.setOpcode(ARM::STR_PRE_IMM);
@@ -7593,7 +7557,7 @@ processInstruction(MCInst &Inst,
   case ARM::t2ADDri12:
     // If the immediate fits for encoding T3 (t2ADDri) and the generic "add"
     // mnemonic was used (not "addw"), encoding T3 is preferred.
-    if (static_cast<ARMOperand*>(Operands[0])->getToken() != "add" ||
+    if (static_cast<ARMOperand &>(*Operands[0]).getToken() != "add" ||
         ARM_AM::getT2SOImmVal(Inst.getOperand(2).getImm()) == -1)
       break;
     Inst.setOpcode(ARM::t2ADDri);
@@ -7602,7 +7566,7 @@ processInstruction(MCInst &Inst,
   case ARM::t2SUBri12:
     // If the immediate fits for encoding T3 (t2SUBri) and the generic "sub"
     // mnemonic was used (not "subw"), encoding T3 is preferred.
-    if (static_cast<ARMOperand*>(Operands[0])->getToken() != "sub" ||
+    if (static_cast<ARMOperand &>(*Operands[0]).getToken() != "sub" ||
         ARM_AM::getT2SOImmVal(Inst.getOperand(2).getImm()) == -1)
       break;
     Inst.setOpcode(ARM::t2SUBri);
@@ -7638,9 +7602,9 @@ processInstruction(MCInst &Inst,
         !isARMLowRegister(Inst.getOperand(0).getReg()) ||
         (unsigned)Inst.getOperand(2).getImm() > 255 ||
         ((!inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR) ||
-        (inITBlock() && Inst.getOperand(5).getReg() != 0)) ||
-        (static_cast<ARMOperand*>(Operands[3])->isToken() &&
-         static_cast<ARMOperand*>(Operands[3])->getToken() == ".w"))
+         (inITBlock() && Inst.getOperand(5).getReg() != 0)) ||
+        (static_cast<ARMOperand &>(*Operands[3]).isToken() &&
+         static_cast<ARMOperand &>(*Operands[3]).getToken() == ".w"))
       break;
     MCInst TmpInst;
     TmpInst.setOpcode(Inst.getOpcode() == ARM::t2ADDri ?
@@ -7661,8 +7625,8 @@ processInstruction(MCInst &Inst,
     // 'as' behaviour. Make sure the wide encoding wasn't explicit.
     if (Inst.getOperand(0).getReg() != Inst.getOperand(1).getReg() ||
         Inst.getOperand(5).getReg() != 0 ||
-        (static_cast<ARMOperand*>(Operands[3])->isToken() &&
-         static_cast<ARMOperand*>(Operands[3])->getToken() == ".w"))
+        (static_cast<ARMOperand &>(*Operands[3]).isToken() &&
+         static_cast<ARMOperand &>(*Operands[3]).getToken() == ".w"))
       break;
     MCInst TmpInst;
     TmpInst.setOpcode(ARM::tADDhirr);
@@ -7719,8 +7683,8 @@ processInstruction(MCInst &Inst,
     // an error in validateInstruction().
     unsigned Rn = Inst.getOperand(0).getReg();
     bool hasWritebackToken =
-      (static_cast<ARMOperand*>(Operands[3])->isToken() &&
-       static_cast<ARMOperand*>(Operands[3])->getToken() == "!");
+        (static_cast<ARMOperand &>(*Operands[3]).isToken() &&
+         static_cast<ARMOperand &>(*Operands[3]).getToken() == "!");
     bool listContainsBase;
     if (checkLowRegisterList(Inst, 3, Rn, 0, listContainsBase) ||
         (!listContainsBase && !hasWritebackToken) ||
@@ -7782,10 +7746,10 @@ processInstruction(MCInst &Inst,
     if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
         (unsigned)Inst.getOperand(1).getImm() <= 255 &&
         ((!inITBlock() && Inst.getOperand(2).getImm() == ARMCC::AL &&
-         Inst.getOperand(4).getReg() == ARM::CPSR) ||
-        (inITBlock() && Inst.getOperand(4).getReg() == 0)) &&
-        (!static_cast<ARMOperand*>(Operands[2])->isToken() ||
-         static_cast<ARMOperand*>(Operands[2])->getToken() != ".w")) {
+          Inst.getOperand(4).getReg() == ARM::CPSR) ||
+         (inITBlock() && Inst.getOperand(4).getReg() == 0)) &&
+        (!static_cast<ARMOperand &>(*Operands[2]).isToken() ||
+         static_cast<ARMOperand &>(*Operands[2]).getToken() != ".w")) {
       // The operands aren't in the same order for tMOVi8...
       MCInst TmpInst;
       TmpInst.setOpcode(ARM::tMOVi8);
@@ -7806,8 +7770,8 @@ processInstruction(MCInst &Inst,
         isARMLowRegister(Inst.getOperand(1).getReg()) &&
         Inst.getOperand(2).getImm() == ARMCC::AL &&
         Inst.getOperand(4).getReg() == ARM::CPSR &&
-        (!static_cast<ARMOperand*>(Operands[2])->isToken() ||
-         static_cast<ARMOperand*>(Operands[2])->getToken() != ".w")) {
+        (!static_cast<ARMOperand &>(*Operands[2]).isToken() ||
+         static_cast<ARMOperand &>(*Operands[2]).getToken() != ".w")) {
       // The operands aren't the same for tMOV[S]r... (no cc_out)
       MCInst TmpInst;
       TmpInst.setOpcode(Inst.getOperand(4).getReg() ? ARM::tMOVSr : ARM::tMOVr);
@@ -7829,8 +7793,8 @@ processInstruction(MCInst &Inst,
     if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
         isARMLowRegister(Inst.getOperand(1).getReg()) &&
         Inst.getOperand(2).getImm() == 0 &&
-        (!static_cast<ARMOperand*>(Operands[2])->isToken() ||
-         static_cast<ARMOperand*>(Operands[2])->getToken() != ".w")) {
+        (!static_cast<ARMOperand &>(*Operands[2]).isToken() ||
+         static_cast<ARMOperand &>(*Operands[2]).getToken() != ".w")) {
       unsigned NewOpc;
       switch (Inst.getOpcode()) {
       default: llvm_unreachable("Illegal opcode!");
@@ -7942,9 +7906,10 @@ processInstruction(MCInst &Inst,
          isARMLowRegister(Inst.getOperand(2).getReg())) &&
         Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() &&
         ((!inITBlock() && Inst.getOperand(5).getReg() == ARM::CPSR) ||
-         (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) && 
-        (!static_cast<ARMOperand*>(Operands[3])->isToken() ||
-         !static_cast<ARMOperand*>(Operands[3])->getToken().equals_lower(".w"))) {
+         (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) &&
+        (!static_cast<ARMOperand &>(*Operands[3]).isToken() ||
+         !static_cast<ARMOperand &>(*Operands[3]).getToken().equals_lower(
+             ".w"))) {
       unsigned NewOpc;
       switch (Inst.getOpcode()) {
         default: llvm_unreachable("unexpected opcode");
@@ -7981,9 +7946,10 @@ processInstruction(MCInst &Inst,
         (Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() ||
          Inst.getOperand(0).getReg() == Inst.getOperand(2).getReg()) &&
         ((!inITBlock() && Inst.getOperand(5).getReg() == ARM::CPSR) ||
-         (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) && 
-        (!static_cast<ARMOperand*>(Operands[3])->isToken() ||
-         !static_cast<ARMOperand*>(Operands[3])->getToken().equals_lower(".w"))) {
+         (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) &&
+        (!static_cast<ARMOperand &>(*Operands[3]).isToken() ||
+         !static_cast<ARMOperand &>(*Operands[3]).getToken().equals_lower(
+             ".w"))) {
       unsigned NewOpc;
       switch (Inst.getOpcode()) {
         default: llvm_unreachable("unexpected opcode");
@@ -8063,11 +8029,10 @@ template <> inline bool IsCPSRDead<MCInst>(MCInst *Instr) {
 }
 
 static const char *getSubtargetFeatureName(unsigned Val);
-bool ARMAsmParser::
-MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
-                        SmallVectorImpl<MCParsedAsmOperand*> &Operands,
-                        MCStreamer &Out, unsigned &ErrorInfo,
-                        bool MatchingInlineAsm) {
+bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+                                           OperandVector &Operands,
+                                           MCStreamer &Out, unsigned &ErrorInfo,
+                                           bool MatchingInlineAsm) {
   MCInst Inst;
   unsigned MatchResult;
 
@@ -8136,7 +8101,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
       if (ErrorInfo >= Operands.size())
         return Error(IDLoc, "too few operands for instruction");
 
-      ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc();
+      ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getStartLoc();
       if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
     }
 
@@ -8144,7 +8109,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
   }
   case Match_MnemonicFail:
     return Error(IDLoc, "invalid instruction",
-                 ((ARMOperand*)Operands[0])->getLocRange());
+                 ((ARMOperand &)*Operands[0]).getLocRange());
   case Match_RequiresNotITBlock:
     return Error(IDLoc, "flag setting instruction only valid outside IT block");
   case Match_RequiresITBlock:
@@ -8154,12 +8119,12 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
   case Match_RequiresThumb2:
     return Error(IDLoc, "instruction variant requires Thumb2");
   case Match_ImmRange0_15: {
-    SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc();
+    SMLoc ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getStartLoc();
     if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
     return Error(ErrorLoc, "immediate operand must be in the range [0,15]");
   }
   case Match_ImmRange0_239: {
-    SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc();
+    SMLoc ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getStartLoc();
     if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
     return Error(ErrorLoc, "immediate operand must be in the range [0,239]");
   }
@@ -8175,7 +8140,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
   case Match_DupAlignedMemoryRequires64or128:
   case Match_AlignedMemoryRequires64or128or256:
   {
-    SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getAlignmentLoc();
+    SMLoc ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getAlignmentLoc();
     if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
     switch (MatchResult) {
       default:
@@ -8923,28 +8888,22 @@ bool ARMAsmParser::parseDirectiveRegSave(SMLoc L, bool IsVector) {
   }
 
   // RAII object to make sure parsed operands are deleted.
-  struct CleanupObject {
-    SmallVector<MCParsedAsmOperand *, 1> Operands;
-    ~CleanupObject() {
-      for (unsigned I = 0, E = Operands.size(); I != E; ++I)
-        delete Operands[I];
-    }
-  } CO;
+  SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> Operands;
 
   // Parse the register list
-  if (parseRegisterList(CO.Operands))
+  if (parseRegisterList(Operands))
     return false;
-  ARMOperand *Op = (ARMOperand*)CO.Operands[0];
-  if (!IsVector && !Op->isRegList()) {
+  ARMOperand &Op = (ARMOperand &)*Operands[0];
+  if (!IsVector && !Op.isRegList()) {
     Error(L, ".save expects GPR registers");
     return false;
   }
-  if (IsVector && !Op->isDPRRegList()) {
+  if (IsVector && !Op.isDPRRegList()) {
     Error(L, ".vsave expects DPR registers");
     return false;
   }
 
-  getTargetStreamer().emitRegSave(Op->getRegList(), IsVector);
+  getTargetStreamer().emitRegSave(Op.getRegList(), IsVector);
   return false;
 }
 
@@ -9468,23 +9427,23 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) {
 
 // Define this matcher function after the auto-generated include so we
 // have the match class enum definitions.
-unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp,
+unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
                                                   unsigned Kind) {
-  ARMOperand *Op = static_cast<ARMOperand*>(AsmOp);
+  ARMOperand &Op = static_cast<ARMOperand &>(AsmOp);
   // If the kind is a token for a literal immediate, check if our asm
   // operand matches. This is for InstAliases which have a fixed-value
   // immediate in the syntax.
   switch (Kind) {
   default: break;
   case MCK__35_0:
-    if (Op->isImm())
-      if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm()))
+    if (Op.isImm())
+      if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op.getImm()))
         if (CE->getValue() == 0)
           return Match_Success;
     break;
   case MCK_ARMSOImm:
-    if (Op->isImm()) {
-      const MCExpr *SOExpr = Op->getImm();
+    if (Op.isImm()) {
+      const MCExpr *SOExpr = Op.getImm();
       int64_t Value;
       if (!SOExpr->EvaluateAsAbsolute(Value))
         return Match_Success;
@@ -9493,8 +9452,8 @@ unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp,
     }
     break;
   case MCK_GPRPair:
-    if (Op->isReg() &&
-        MRI->getRegClass(ARM::GPRRegClassID).contains(Op->getReg()))
+    if (Op.isReg() &&
+        MRI->getRegClass(ARM::GPRRegClassID).contains(Op.getReg()))
       return Match_Success;
     break;
   }
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index e4b785d..228fb57 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -1092,13 +1092,13 @@ void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
   if (isSub) {
     O << ", "
       << markup("<imm:")
-      << "#-" << -OffImm
+      << "#-" << formatImm(-OffImm)
       << markup(">");
   }
   else if (AlwaysPrintImm0 || OffImm > 0) {
     O << ", "
       << markup("<imm:")
-      << "#" << OffImm
+      << "#" << formatImm(OffImm)
       << markup(">");
   }
   O << "]" << markup(">");
diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index 42a1cbb..1686d76 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -295,7 +295,12 @@ namespace ARMII {
 
     /// MO_OPTION_MASK - Most flags are mutually exclusive; this mask selects
     /// just that part of the flag set.
-    MO_OPTION_MASK = 0x7f,
+    MO_OPTION_MASK = 0x3f,
+
+    /// MO_DLLIMPORT - On a symbol operand, this represents that the reference
+    /// to the symbol is for an import stub.  This is used for DLL import
+    /// storage class indication on Windows.
+    MO_DLLIMPORT = 0x40,
 
     /// MO_NONLAZY - This is an independent flag, on a symbol operand "FOO" it
     /// represents a symbol which, if indirect, will get special Darwin mangling
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index a4d13ed..7b5d8b0 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -992,7 +992,8 @@ void ARMTargetELFStreamer::emitLabel(MCSymbol *Symbol) {
     return;
 
   const MCSymbolData &SD = Streamer.getOrCreateSymbolData(Symbol);
-  if (MCELF::GetType(SD) & (ELF::STT_FUNC << ELF_STT_Shift))
+  unsigned Type = MCELF::GetType(SD);
+  if (Type == ELF_STT_Func || Type == ELF_STT_GnuIFunc)
     Streamer.EmitThumbFunc(Symbol);
 }
 
@@ -1160,7 +1161,7 @@ void ARMELFStreamer::EmitPersonalityFixup(StringRef Name) {
   const MCSymbolRefExpr *PersonalityRef = MCSymbolRefExpr::Create(
       PersonalitySym, MCSymbolRefExpr::VK_ARM_NONE, getContext());
 
-  AddValueSymbols(PersonalityRef);
+  visitUsedExpr(*PersonalityRef);
   MCDataFragment *DF = getOrCreateDataFragment();
   DF->getFixups().push_back(MCFixup::Create(DF->getContents().size(),
                                             PersonalityRef,
@@ -1332,6 +1333,12 @@ MCStreamer *createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
   return S;
 }
 
+MCStreamer *createARMNullStreamer(MCContext &Ctx) {
+  MCStreamer *S = llvm::createNullStreamer(Ctx);
+  new ARMTargetStreamer(*S);
+  return S;
+}
+
   MCELFStreamer* createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
                                       raw_ostream &OS, MCCodeEmitter *Emitter,
                                       bool RelaxAll, bool NoExecStack,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index 5b51a52..b8ee555 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -1047,8 +1047,7 @@ ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx,
   // we have a movt or a movw, but that led to misleadingly results.
   // This is now disallowed in the the AsmParser in validateInstruction()
   // so this should never happen.
-  assert(0 && "expression without :upper16: or :lower16:");
-  return 0;
+  llvm_unreachable("expression without :upper16: or :lower16:");
 }
 
 uint32_t ARMMCCodeEmitter::
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
index 87ea875..e545e3c 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
@@ -41,33 +41,6 @@ ARMMCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
   return false;
 }
 
-// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps
-// that method should be made public?
-static void AddValueSymbols_(const MCExpr *Value, MCAssembler *Asm) {
-  switch (Value->getKind()) {
-  case MCExpr::Target:
-    llvm_unreachable("Can't handle nested target expr!");
-
-  case MCExpr::Constant:
-    break;
-
-  case MCExpr::Binary: {
-    const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
-    AddValueSymbols_(BE->getLHS(), Asm);
-    AddValueSymbols_(BE->getRHS(), Asm);
-    break;
-  }
-
-  case MCExpr::SymbolRef:
-    Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
-    break;
-
-  case MCExpr::Unary:
-    AddValueSymbols_(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm);
-    break;
-  }
-}
-
-void ARMMCExpr::AddValueSymbols(MCAssembler *Asm) const {
-  AddValueSymbols_(getSubExpr(), Asm);
+void ARMMCExpr::visitUsedExpr(MCStreamer &Streamer) const {
+  Streamer.visitUsedExpr(*getSubExpr());
 }
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
index d819139..c5c0b10 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
@@ -59,7 +59,7 @@ public:
   void PrintImpl(raw_ostream &OS) const override;
   bool EvaluateAsRelocatableImpl(MCValue &Res,
                                  const MCAsmLayout *Layout) const override;
-  void AddValueSymbols(MCAssembler *) const override;
+  void visitUsedExpr(MCStreamer &Streamer) const override;
   const MCSection *FindAssociatedSection() const override {
     return getSubExpr()->FindAssociatedSection();
   }
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 04d63a7..2b3855d 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -427,6 +427,12 @@ extern "C" void LLVMInitializeARMTargetMC() {
   TargetRegistry::RegisterAsmStreamer(TheThumbLETarget, createMCAsmStreamer);
   TargetRegistry::RegisterAsmStreamer(TheThumbBETarget, createMCAsmStreamer);
 
+  // Register the null streamer.
+  TargetRegistry::RegisterNullStreamer(TheARMLETarget, createARMNullStreamer);
+  TargetRegistry::RegisterNullStreamer(TheARMBETarget, createARMNullStreamer);
+  TargetRegistry::RegisterNullStreamer(TheThumbLETarget, createARMNullStreamer);
+  TargetRegistry::RegisterNullStreamer(TheThumbBETarget, createARMNullStreamer);
+
   // Register the MCInstPrinter.
   TargetRegistry::RegisterMCInstPrinter(TheARMLETarget, createARMMCInstPrinter);
   TargetRegistry::RegisterMCInstPrinter(TheARMBETarget, createARMMCInstPrinter);
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index 8853a8c..5326e56 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -51,6 +51,8 @@ MCStreamer *createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
                                 MCInstPrinter *InstPrint, MCCodeEmitter *CE,
                                 MCAsmBackend *TAB, bool ShowInst);
 
+MCStreamer *createARMNullStreamer(MCContext &Ctx);
+
 MCCodeEmitter *createARMLEMCCodeEmitter(const MCInstrInfo &MCII,
                                         const MCRegisterInfo &MRI,
                                         const MCSubtargetInfo &STI,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index ecfa4e5..186776a 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -32,6 +32,7 @@ class ARMMachObjectWriter : public MCMachObjectTargetWriter {
                                     const MCFragment *Fragment,
                                     const MCFixup &Fixup,
                                     MCValue Target,
+                                    unsigned Type,
                                     unsigned Log2Size,
                                     uint64_t &FixedValue);
   void RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
@@ -251,11 +252,11 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
                                                     const MCFragment *Fragment,
                                                     const MCFixup &Fixup,
                                                     MCValue Target,
+                                                    unsigned Type,
                                                     unsigned Log2Size,
                                                     uint64_t &FixedValue) {
   uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
   unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
-  unsigned Type = MachO::ARM_RELOC_VANILLA;
 
   // See <reloc.h>.
   const MCSymbol *A = &Target.getSymA()->getSymbol();
@@ -272,6 +273,7 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
   uint32_t Value2 = 0;
 
   if (const MCSymbolRefExpr *B = Target.getSymB()) {
+    assert(Type == MachO::ARM_RELOC_VANILLA && "invalid reloc for 2 symbols");
     const MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
 
     if (!B_SD->getFragment())
@@ -374,7 +376,8 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
       return RecordARMScatteredHalfRelocation(Writer, Asm, Layout, Fragment,
                                               Fixup, Target, FixedValue);
     return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
-                                        Target, Log2Size, FixedValue);
+                                        Target, RelocType, Log2Size,
+                                        FixedValue);
   }
 
   // Get the symbol data, if any.
@@ -392,7 +395,8 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
     Offset += 1 << Log2Size;
   if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD))
     return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
-                                        Target, Log2Size, FixedValue);
+                                        Target, RelocType, Log2Size,
+                                        FixedValue);
 
   // See <reloc.h>.
   uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
diff --git a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index e3cfb05..ad3f1ca 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -11,147 +11,12 @@
 //
 //===----------------------------------------------------------------------===//
 #include "llvm/ADT/MapVector.h"
+#include "llvm/MC/ConstantPools.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCStreamer.h"
 
 using namespace llvm;
-
-namespace {
-// A class to keep track of assembler-generated constant pools that are use to
-// implement the ldr-pseudo.
-class ConstantPool {
-  typedef SmallVector<std::pair<MCSymbol *, const MCExpr *>, 4> EntryVecTy;
-  EntryVecTy Entries;
-
-public:
-  // Initialize a new empty constant pool
-  ConstantPool() {}
-
-  // Add a new entry to the constant pool in the next slot.
-  // \param Value is the new entry to put in the constant pool.
-  //
-  // \returns a MCExpr that references the newly inserted value
-  const MCExpr *addEntry(const MCExpr *Value, MCContext &Context);
-
-  // Emit the contents of the constant pool using the provided streamer.
-  void emitEntries(MCStreamer &Streamer);
-
-  // Return true if the constant pool is empty
-  bool empty();
-};
-}
-
-namespace llvm {
-class AssemblerConstantPools {
-  // Map type used to keep track of per-Section constant pools used by the
-  // ldr-pseudo opcode. The map associates a section to its constant pool. The
-  // constant pool is a vector of (label, value) pairs. When the ldr
-  // pseudo is parsed we insert a new (label, value) pair into the constant pool
-  // for the current section and add MCSymbolRefExpr to the new label as
-  // an opcode to the ldr. After we have parsed all the user input we
-  // output the (label, value) pairs in each constant pool at the end of the
-  // section.
-  //
-  // We use the MapVector for the map type to ensure stable iteration of
-  // the sections at the end of the parse. We need to iterate over the
-  // sections in a stable order to ensure that we have print the
-  // constant pools in a deterministic order when printing an assembly
-  // file.
-  typedef MapVector<const MCSection *, ConstantPool> ConstantPoolMapTy;
-  ConstantPoolMapTy ConstantPools;
-
-public:
-  AssemblerConstantPools() {}
-  ~AssemblerConstantPools() {}
-
-  void emitAll(MCStreamer &Streamer);
-  void emitForCurrentSection(MCStreamer &Streamer);
-  const MCExpr *addEntry(MCStreamer &Streamer, const MCExpr *Expr);
-
-private:
-  ConstantPool *getConstantPool(const MCSection *Section);
-  ConstantPool &getOrCreateConstantPool(const MCSection *Section);
-};
-}
-
-//
-// ConstantPool implementation
-//
-// Emit the contents of the constant pool using the provided streamer.
-void ConstantPool::emitEntries(MCStreamer &Streamer) {
-  if (Entries.empty())
-    return;
-  Streamer.EmitCodeAlignment(4); // align to 4-byte address
-  Streamer.EmitDataRegion(MCDR_DataRegion);
-  for (EntryVecTy::const_iterator I = Entries.begin(), E = Entries.end();
-       I != E; ++I) {
-    Streamer.EmitLabel(I->first);
-    Streamer.EmitValue(I->second, 4);
-  }
-  Streamer.EmitDataRegion(MCDR_DataRegionEnd);
-  Entries.clear();
-}
-
-const MCExpr *ConstantPool::addEntry(const MCExpr *Value, MCContext &Context) {
-  MCSymbol *CPEntryLabel = Context.CreateTempSymbol();
-
-  Entries.push_back(std::make_pair(CPEntryLabel, Value));
-  return MCSymbolRefExpr::Create(CPEntryLabel, Context);
-}
-
-bool ConstantPool::empty() { return Entries.empty(); }
-
-//
-// AssemblerConstantPools implementation
-//
-ConstantPool *
-AssemblerConstantPools::getConstantPool(const MCSection *Section) {
-  ConstantPoolMapTy::iterator CP = ConstantPools.find(Section);
-  if (CP == ConstantPools.end())
-    return nullptr;
-
-  return &CP->second;
-}
-
-ConstantPool &
-AssemblerConstantPools::getOrCreateConstantPool(const MCSection *Section) {
-  return ConstantPools[Section];
-}
-
-static void emitConstantPool(MCStreamer &Streamer, const MCSection *Section,
-                             ConstantPool &CP) {
-  if (!CP.empty()) {
-    Streamer.SwitchSection(Section);
-    CP.emitEntries(Streamer);
-  }
-}
-
-void AssemblerConstantPools::emitAll(MCStreamer &Streamer) {
-  // Dump contents of assembler constant pools.
-  for (ConstantPoolMapTy::iterator CPI = ConstantPools.begin(),
-                                   CPE = ConstantPools.end();
-       CPI != CPE; ++CPI) {
-    const MCSection *Section = CPI->first;
-    ConstantPool &CP = CPI->second;
-
-    emitConstantPool(Streamer, Section, CP);
-  }
-}
-
-void AssemblerConstantPools::emitForCurrentSection(MCStreamer &Streamer) {
-  const MCSection *Section = Streamer.getCurrentSection().first;
-  if (ConstantPool *CP = getConstantPool(Section)) {
-    emitConstantPool(Streamer, Section, *CP);
-  }
-}
-
-const MCExpr *AssemblerConstantPools::addEntry(MCStreamer &Streamer,
-                                               const MCExpr *Expr) {
-  const MCSection *Section = Streamer.getCurrentSection().first;
-  return getOrCreateConstantPool(Section).addEntry(Expr, Streamer.getContext());
-}
-
 //
 // ARMTargetStreamer Implemenation
 //
@@ -175,78 +40,34 @@ void ARMTargetStreamer::finish() { ConstantPools->emitAll(Streamer); }
 
 // The remaining callbacks should be handled separately by each
 // streamer.
-void ARMTargetStreamer::emitFnStart() {
-  llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::emitFnEnd() {
-  llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::emitCantUnwind() {
-  llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::emitPersonality(const MCSymbol *Personality) {
-  llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::emitPersonalityIndex(unsigned Index) {
-  llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::emitHandlerData() {
-  llvm_unreachable("unimplemented");
-}
+void ARMTargetStreamer::emitFnStart() {}
+void ARMTargetStreamer::emitFnEnd() {}
+void ARMTargetStreamer::emitCantUnwind() {}
+void ARMTargetStreamer::emitPersonality(const MCSymbol *Personality) {}
+void ARMTargetStreamer::emitPersonalityIndex(unsigned Index) {}
+void ARMTargetStreamer::emitHandlerData() {}
 void ARMTargetStreamer::emitSetFP(unsigned FpReg, unsigned SpReg,
-                                       int64_t Offset) {
-  llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::emitMovSP(unsigned Reg, int64_t Offset) {
-  llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::emitPad(int64_t Offset) {
-  llvm_unreachable("unimplemented");
-}
-void
-ARMTargetStreamer::emitRegSave(const SmallVectorImpl<unsigned> &RegList,
-                                    bool isVector) {
-  llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::emitUnwindRaw(
-    int64_t StackOffset, const SmallVectorImpl<uint8_t> &Opcodes) {
-  llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::switchVendor(StringRef Vendor) {
-  llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::emitAttribute(unsigned Attribute, unsigned Value) {
-  llvm_unreachable("unimplemented");
-}
+                                  int64_t Offset) {}
+void ARMTargetStreamer::emitMovSP(unsigned Reg, int64_t Offset) {}
+void ARMTargetStreamer::emitPad(int64_t Offset) {}
+void ARMTargetStreamer::emitRegSave(const SmallVectorImpl<unsigned> &RegList,
+                                    bool isVector) {}
+void ARMTargetStreamer::emitUnwindRaw(int64_t StackOffset,
+                                      const SmallVectorImpl<uint8_t> &Opcodes) {
+}
+void ARMTargetStreamer::switchVendor(StringRef Vendor) {}
+void ARMTargetStreamer::emitAttribute(unsigned Attribute, unsigned Value) {}
 void ARMTargetStreamer::emitTextAttribute(unsigned Attribute,
-                                               StringRef String) {
-  llvm_unreachable("unimplemented");
-}
+                                          StringRef String) {}
 void ARMTargetStreamer::emitIntTextAttribute(unsigned Attribute,
-                                                  unsigned IntValue,
-                                                  StringRef StringValue) {
-  llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::emitArch(unsigned Arch) {
-  llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::emitObjectArch(unsigned Arch) {
-  llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::emitFPU(unsigned FPU) {
-  llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::finishAttributeSection() {
-  llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::emitInst(uint32_t Inst, char Suffix) {
-  llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::AnnotateTLSDescriptorSequence(
-    const MCSymbolRefExpr *SRE) {
-  llvm_unreachable("unimplemented");
-}
+                                             unsigned IntValue,
+                                             StringRef StringValue) {}
+void ARMTargetStreamer::emitArch(unsigned Arch) {}
+void ARMTargetStreamer::emitObjectArch(unsigned Arch) {}
+void ARMTargetStreamer::emitFPU(unsigned FPU) {}
+void ARMTargetStreamer::finishAttributeSection() {}
+void ARMTargetStreamer::emitInst(uint32_t Inst, char Suffix) {}
+void
+ARMTargetStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) {}
 
-void ARMTargetStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) {
-  llvm_unreachable("unimplemented");
-}
+void ARMTargetStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) {}
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index be29dc5..baa97a7 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -21,6 +21,9 @@
 
 using namespace llvm;
 
+Thumb1FrameLowering::Thumb1FrameLowering(const ARMSubtarget &sti)
+    : ARMFrameLowering(sti) {}
+
 bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const{
   const MachineFrameInfo *FFI = MF.getFrameInfo();
   unsigned CFSize = FFI->getMaxCallFrameSize();
diff --git a/lib/Target/ARM/Thumb1FrameLowering.h b/lib/Target/ARM/Thumb1FrameLowering.h
index f61874b..a227f8e 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.h
+++ b/lib/Target/ARM/Thumb1FrameLowering.h
@@ -11,11 +11,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef __THUMB_FRAMEINFO_H_
-#define __THUMB_FRAMEINFO_H_
+#ifndef LLVM_ARM_THUMB1FRAMELOWERING_H
+#define LLVM_ARM_THUMB1FRAMELOWERING_H
 
 #include "ARMFrameLowering.h"
-#include "ARMSubtarget.h"
 #include "Thumb1InstrInfo.h"
 #include "Thumb1RegisterInfo.h"
 #include "llvm/Target/TargetFrameLowering.h"
@@ -24,9 +23,7 @@ namespace llvm {
 
 class Thumb1FrameLowering : public ARMFrameLowering {
 public:
-  explicit Thumb1FrameLowering(const ARMSubtarget &sti)
-    : ARMFrameLowering(sti) {
-  }
+  explicit Thumb1FrameLowering(const ARMSubtarget &sti);
 
   /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
   /// the function.
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index 6267ecf..09debe7 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -1010,7 +1010,8 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
   AttributeSet FnAttrs = MF.getFunction()->getAttributes();
   OptimizeSize = FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
                                       Attribute::OptimizeForSize);
-  MinimizeSize = STI->isMinSize();
+  MinimizeSize =
+      FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
 
   BlockInfo.clear();
   BlockInfo.resize(MF.getNumBlockIDs());